001: /*
002: * Copyright 2001 Sun Microsystems, Inc. All rights reserved.
003: * PROPRIETARY/CONFIDENTIAL. Use of this product is subject to license terms.
004: */
005:
006: package com.sun.portal.search.rdmserver;
007:
008: import com.sun.portal.search.soif.*;
009: import com.sun.portal.search.rdm.*;
010: import com.sun.portal.search.rdmgr.*;
011: import com.sun.portal.search.util.*;
012: import com.sun.portal.search.db.*;
013: import com.sun.portal.log.common.PortalLogger;
014:
015: import java.util.*;
016: import java.util.logging.Logger;
017: import java.util.logging.Level;
018: import java.io.*;
019: import java.text.*;
020:
021: /*
022: * Support for RDM database requests
023: */
024: public class DatabaseService extends RDMService {
025:
026: static final int MAX_RD_BATCH = 65520;
027:
028: DbAccess dbaccess;
029: int curDbConnections;
030: int maxDbConnections = 6; // XXX
031: int indexBatchSize = MAX_RD_BATCH; // XXX dup in rdmgr
032: RDSubmit rdsubmit = null;
033:
034: // supported query languages
035: public static final String QL_URL = "url";
036: public static final String QL_SEARCH = "search";
037: public static final String QL_GATHERER = "gatherer";
038: public static final String QL_SUBTREE = "subtree";
039:
040: public static final String QL_LEGACY_COMPASS = "compass"; // alias for search
041: public static final String QL_LEGACY_NSIR = "nsir"; // alias for search
042: public static final String QL_LEGACY_VERITY = "verity"; // alias for search
043: public static final String QL_LEGACY_VERITY_QL = "verity-ql"; // alias for search
044:
045: class QLURLQuery {
046: String url;
047: }
048:
049: class QLSearchQuery {
050: String scope;
051: String newScope; // set if scope translation occured
052: boolean highlight;
053: String[] highlightTags;
054: String searchCategory;
055: }
056:
057: public DatabaseService() {
058:
059: // set up max connections
060: String p = SearchConfig.getValue(SearchConfig.DBMAX);
061: if (p != null) {
062: int cnt = Integer.parseInt(p);
063: if (cnt > 0)
064: maxDbConnections = cnt;
065: }
066: SearchLogger.getLogger().log(Level.INFO, "PSSH_CSPSRDMS0001",
067: new Object[] { new Integer(maxDbConnections) });
068:
069: // set the index batch size
070: if ((p = SearchConfig.getValue(SearchConfig.MAX_INDEX_BATCH)) != null)
071: indexBatchSize = Integer.parseInt(p);
072: if (indexBatchSize < 1 || indexBatchSize > MAX_RD_BATCH) {
073: SearchLogger.getLogger().log(
074: Level.WARNING,
075: "PSSH_CSPSRDMS0002",
076: new Object[] {
077: new Integer(SearchConfig.MAX_INDEX_BATCH),
078: new Integer(MAX_RD_BATCH) });
079: indexBatchSize = MAX_RD_BATCH;
080: }
081:
082: // rd retrieval
083: supportedServices.add(new RDMServiceDescriptor(RDM.RDM_RD_REQ,
084: null));
085: supportedServices.add(new RDMServiceDescriptor(RDM.RDM_RD_REQ,
086: QL_SEARCH));
087: supportedServices.add(new RDMServiceDescriptor(RDM.RDM_RD_REQ,
088: QL_GATHERER));
089: supportedServices.add(new RDMServiceDescriptor(RDM.RDM_RD_REQ,
090: QL_URL));
091: supportedServices.add(new RDMServiceDescriptor(RDM.RDM_RD_REQ,
092: QL_SUBTREE));
093:
094: // rd submission
095: supportedServices.add(new RDMServiceDescriptor(
096: RDM.RDM_RD_SUBMIT_REQ, null));
097:
098: // legacy - aliases
099: supportedServices.add(new RDMServiceDescriptor(RDM.RDM_RD_REQ,
100: QL_LEGACY_COMPASS));
101: supportedServices.add(new RDMServiceDescriptor(RDM.RDM_RD_REQ,
102: QL_LEGACY_NSIR));
103: supportedServices.add(new RDMServiceDescriptor(RDM.RDM_RD_REQ,
104: QL_LEGACY_VERITY));
105: supportedServices.add(new RDMServiceDescriptor(RDM.RDM_RD_REQ,
106: QL_LEGACY_VERITY_QL));
107:
108: dbaccess = DbAccess.getInstance();
109:
110: }
111:
112: /**
113: * RDM-QL=URL Service Function
114: *
115: * Takes the incoming RDM Request message, and returns the RDMResponse.
116: */
117: public void service(RDMRequest req, RDMResponse res)
118: throws Exception {
119:
120: SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSRDMS0003");
121:
122: normalizeRequest(req);
123:
124: // What request type?
125: String reqType = req.getHeader().getType();
126:
127: if (reqType.equalsIgnoreCase(RDM.RDM_RD_REQ)) {
128:
129: // RD retrieval requests - query language is required...
130:
131: // Set outgoing response type
132: res.getHeader().setType(RDM.RDM_RD_RES);
133:
134: // Dummy result in case of error, for clients that don't handle errors cleanly
135: res.getHeader().setResponseInterpret(
136: "0 results out of 0 hits across 0 documents");
137:
138: // Do we actually have a query?
139: if (req.getQuery() == null) {
140: res.getHeader().setErrorMessage("Bad or missing query");
141: return;
142: }
143:
144: String ql = req.getHeader().getQueryLanguage();
145: if (ql == null) {
146: res.getHeader().setErrorMessage(
147: "Bad or missing query language");
148: return;
149: }
150:
151: if (ql.equalsIgnoreCase(QL_SEARCH))
152: ql_search_service(req, res);
153: else if (ql.equalsIgnoreCase(QL_URL))
154: ql_url_service(req, res);
155: else if (ql.equalsIgnoreCase(QL_GATHERER))
156: ql_gatherer_service(req, res);
157: else if (ql.equalsIgnoreCase(QL_SUBTREE))
158: ql_subtree_service(req, res);
159:
160: // Try legacy QLs
161: else if (ql.equalsIgnoreCase(QL_LEGACY_NSIR)
162: || ql.equalsIgnoreCase(QL_LEGACY_COMPASS)
163: || ql.equalsIgnoreCase(QL_LEGACY_VERITY)
164: || ql.equalsIgnoreCase(QL_LEGACY_VERITY_QL)) {
165: req.getHeader().setQueryLanguage(QL_SEARCH);
166: ql_search_service(req, res);
167: }
168:
169: } else if (reqType.equalsIgnoreCase(RDM.RDM_RD_SUBMIT_REQ)) {
170: // RD submit request
171: // Set outgoing response type
172: res.getHeader().setType(RDM.RDM_RD_SUBMIT_RES);
173: rdsubmit_service(req, res);
174: } else {
175: // shouldn't happen
176: res.getHeader().setErrorMessage(
177: "Bad or missing request type");
178: return;
179: }
180:
181: }
182:
183: // Restart is called in a single-threaded mode
184: public void shutdown() throws Exception {
185: dbaccess.shutdown(null);
186: }
187:
188: public void normalizeRequest(RDMRequest req) {
189: SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSRDMS0004");
190: RDMQuery qry = req.getQuery();
191: // Set database name if needed
192: if (qry != null && qry.getDatabase() == null)
193: qry.setDatabase(SearchConfig.getValue(SearchConfig.DBNAME));
194: }
195:
196: /**
197: * RDM-QL=URL Service Function
198: *
199: * Takes the incoming RDM Request message, and returns the RDMResponse.
200: */
201: public void ql_url_service(RDMRequest req, RDMResponse res)
202: throws Exception {
203:
204: SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSRDMS0005");
205:
206: // Validate/parse Scope specification
207: QLURLQuery qry = new QLURLQuery();
208: String scope = req.getQuery().getScope();
209: if (scope == null || scope.length() == 0
210: || !URLScopeParse(scope, qry)) {
211: res.getHeader().setErrorMessage("Invalid Scope");
212: res.getHeader().setResponseInterpret(
213: "0 results out of 0 hits across 0 documents");
214: return;
215: }
216:
217: // Retrieve the results
218: getURLResults(req, res, qry);
219:
220: }
221:
222: /**
223: * RDM-QL=URL Service Function
224: *
225: * Takes the incoming RDM Request message, and returns the RDMResponse.
226: */
227: public void ql_gatherer_service(RDMRequest req, RDMResponse res)
228: throws Exception {
229:
230: SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSRDMS0006");
231:
232: // Validate/parse Scope specification
233: QLSearchQuery qry = new QLSearchQuery();
234: String scope = req.getQuery().getScope();
235: if (scope == null || scope.length() == 0
236: || !gathererScopeParse(scope, qry)) {
237: res.getHeader().setErrorMessage("Invalid Scope");
238: res.getHeader().setResponseInterpret(
239: "0 results out of 0 hits across 0 documents");
240: return;
241: }
242: // set ViewHits to unbounded viewhits=null
243: if (req.getQuery().getViewHits() == null)
244: req.getQuery().setViewHits("1..-1");
245:
246: // Retrieve the results
247: getSearchResults(req, res, qry);
248:
249: }
250:
251: /**
252: * RDM-QL=SEARCH Service Function
253: *
254: * Takes the incoming RDM Request message, and returns the RDMResponse.
255: */
256: public void ql_search_service(RDMRequest req, RDMResponse res)
257: throws Exception {
258:
259: SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSRDMS0007");
260:
261: // Validate/parse Scope specification
262: QLSearchQuery qry = new QLSearchQuery();
263:
264: // Scope
265: String scope = req.getQuery().getScope();
266: if (scope == null || scope.length() == 0
267: || !searchScopeParse(scope, qry)) {
268: res.getHeader().setErrorMessage("Invalid Scope");
269: res.getHeader().setResponseInterpret(
270: "0 results out of 0 hits across 0 documents");
271: return;
272: }
273:
274: // Search category
275: String searchCategory = req.getQuery().getSearchCategory();
276: if (searchCategory != null
277: && searchCategory.length() > 0
278: && !searchCategory
279: .equalsIgnoreCase(RDMTaxonomy.RDM_TAXONOMY_ROOT))
280: qry.newScope = "(classification <STARTS> " + searchCategory
281: + ") <AND> (" + qry.scope + ")";
282:
283: // Set up highlight info
284: String p = req.getQuery().getHighlight();
285: if (p != null
286: && (p.equalsIgnoreCase("true") || p
287: .equalsIgnoreCase("1")))
288: qry.highlight = true;
289:
290: // highlight tags
291: String hlt = req.getQuery().getHighlightTags();
292: if (hlt != null) {
293: String[] tags = String2Array.string2Array(hlt, ',');
294: if (tags != null && (tags.length == 6 || tags.length == 12))
295: qry.highlightTags = tags;
296: }
297:
298: // Retrieve the results
299: getSearchResults(req, res, qry);
300:
301: }
302:
303: /**
304: * RDM-QL=Taxonomy-Basic Service Function
305: * Takes the incoming RDM Request message, and returns the RDMResponse.
306: */
307: public void ql_subtree_service(RDMRequest req, RDMResponse res)
308: throws Exception {
309:
310: SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSRDMS0008");
311:
312: // Validate Scope specification (translates to search scope)
313: QLSearchQuery qry = new QLSearchQuery();
314: String scope = req.getQuery().getScope();
315: if (scope == null || scope.length() == 0
316: || !subtreeScopeParse(scope, qry)) {
317: res.getHeader().setErrorMessage("Invalid Scope");
318: res.getHeader().setResponseInterpret(
319: "0 results out of 0 hits across 0 documents");
320: return;
321: }
322:
323: // get the results...
324: getSearchResults(req, res, qry);
325: }
326:
327: /**
328: * RD_SUBMIT_REQ Service Function
329: * Takes the incoming RDM Request message, and returns the RDMResponse.
330: */
331: public void rdsubmit_service(RDMRequest req, RDMResponse res)
332: throws Exception {
333: // XXX need to support bg update and bg indexing (rdmgr daemon?)
334: SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSRDMS0009");
335:
336: RDMView view = null; // XXX there is no query... new RDMView(req);
337: RDMDb db = null;
338: SToken st = req.getSToken();
339:
340: int nResults = 0;
341:
342: // XXX hack - we'll add database to rdmheader for now - but we need to
343: // pull it from rdsubmit request header per request (in the submit request loop)
344: String dbname = req.getHeader().getSOIF().getValue(
345: RDM.SUBMIT_DB);
346: if (dbname == null)
347: dbname = SearchConfig.getValue(SearchConfig.DBNAME); // submit to default db if none specified
348:
349: try {
350: // Get a database connection
351: try {
352: db = dbaccess.writeStart(st, dbname);
353: } catch (Exception e) {
354: SearchLogger.getLogger().log(Level.WARNING,
355: "PSSH_CSPSRDMS0010", e);
356: throw new Exception("Cannot access database.");
357: }
358:
359: res.sendHeader();
360:
361: // submit rds
362: RDSubmitRequest submitReq = new RDSubmitRequest();
363: submitReq.sis = req.getInputStream();
364: submitReq.sos = res.getOutputStream();
365: submitReq.db = db;
366: submitReq.max_rd_batch = indexBatchSize;
367:
368: nResults = new RDSubmit().process_input(st, submitReq, req
369: .getTransaction());
370:
371: res.getHeader().setResponseInterpret(nResults + " results");
372:
373: // send out the result header (at end of stream XXX)
374: //res.sendHeader();
375: } finally {
376: if (db != null)
377: dbaccess.writeFinish(st, db);
378: }
379:
380: // Log message - written to disk later
381: SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSRDMS0011",
382: new Integer(nResults));
383: req.logRDM("db=" + dbname + " processed=" + nResults);
384:
385: }
386:
387: /**
388: * Subtree scope - "descendant classification[\r]\nquery"
389: */
390: protected boolean subtreeScopeParse(String scope, QLSearchQuery qry) {
391:
392: SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSRDMS0012",
393: scope);
394:
395: if (!scope.regionMatches(true, 0, "descendant", 0, 10))
396: return false;
397:
398: int p = scope.indexOf('\n');
399: if (p == -1)
400: return false;
401:
402: String classification = scope.substring(10, p).trim();
403: if (classification.length() == 0)
404: return false;
405: String query = scope.substring(++p).trim();
406:
407: // Translate the query to search within the classification.
408: if (query.length() == 0)
409: qry.newScope = "(classification <STARTS> "
410: + Encoder.quotedEscape(classification, true) + ")";
411: else if (classification
412: .equalsIgnoreCase(RDMTaxonomy.RDM_TAXONOMY_ROOT))
413: qry.newScope = query;
414: else
415: qry.newScope = "(classification <STARTS> "
416: + Encoder.quotedEscape(classification, true)
417: + ") <AND> (" + query + ")";
418:
419: SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSRDMS0013",
420: qry.newScope);
421: return true;
422: }
423:
424: protected boolean URLScopeParse(String scope, QLURLQuery qry) {
425: if (scope != null) {
426: qry.url = scope;
427: return true;
428: }
429: return false;
430: }
431:
432: protected boolean gathererScopeParse(String scope, QLSearchQuery qry) {
433: SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSRDMS0014",
434: scope);
435: qry.scope = scope;
436: if (scope.equalsIgnoreCase("all")) {
437: qry.newScope = "*";
438: return true;
439: } else if (scope.regionMatches(true, 0, "since", 0, 5)) {
440: // Transform to search query
441: qry.newScope = "rd-last-modified > \""
442: + scope.substring(5).trim() + "\"";
443: return true;
444: }
445: return false;
446: }
447:
448: protected boolean searchScopeParse(String scope, QLSearchQuery qry) {
449: if (scope != null) {
450: qry.scope = scope;
451: return true;
452: }
453: return false;
454: }
455:
456: protected void getURLResults(RDMRequest req, RDMResponse res,
457: QLURLQuery qry) throws Exception {
458:
459: RDMView view = new RDMView(req);
460: RDMDb db = null;
461: SToken st = req.getSToken();
462:
463: int nResults = 0;
464: SearchLogger.getLogger().log(Level.FINEST, "PSSH_CSPSRDMS0015",
465: qry.url);
466: try {
467: // Get a database connection
468: try {
469: db = dbaccess.readStart(st, req.getQuery()
470: .getDatabase());
471: } catch (Exception e) {
472: SearchLogger.getLogger().log(Level.WARNING,
473: "PSSH_CSPSRDMS0016", e);
474: throw new Exception("Cannot access database.");
475: }
476:
477: // Fetch database RD by URL
478: SOIF s = db.fetch(st, qry.url, view.attr, 0, null);
479: if (s != null)
480: nResults = 1;
481:
482: res.getHeader().setResponseInterpret(nResults + " results");
483:
484: // send out the results
485: res.sendHeader();
486: if (s != null)
487: res.getOutputStream().write(s);
488: } catch (Exception e) {
489: // XXX Don't want to catch exceptions here, but for now we'll return
490: // an empty result set because the taglib can't handle any sort of error
491: res.getHeader().setResponseInterpret("0 results");
492: throw e;
493: } finally {
494: if (db != null)
495: dbaccess.readFinish(st, db);
496: }
497:
498: // Log message - written to disk later
499: SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSRDMS0017",
500: new Integer(nResults));
501: req.logRDM("db=" + req.getQuery().getDatabase() + " hits="
502: + nResults + " url=\"" + qry.url + "\"");
503:
504: }
505:
506: /**
507: * Outputs the matching RDs for the given search query.
508: */
509: protected void getSearchResults(RDMRequest req, RDMResponse res,
510: QLSearchQuery qry) throws Exception {
511:
512: RDMDb db = null;
513:
514: long nHits = 0;
515: long nDocs = 0;
516: int nResults = 0;
517:
518: SearchLogger.getLogger().log(Level.FINEST, "PSSH_CSPSRDMS0018",
519: qry.scope);
520:
521: SToken st = req.getSToken();
522: RDMTransaction t = req.getTransaction();
523:
524: try {
525:
526: // Get a database connection
527: try {
528: db = dbaccess.readStart(st, req.getQuery()
529: .getDatabase());
530: } catch (Exception e) {
531: SearchLogger.getLogger().log(Level.WARNING,
532: "PSSH_CSPSRDMS0019", e);
533: throw new Exception("Cannot access database.");
534: }
535:
536: int flags = 0;
537:
538: String search_scope = qry.newScope; // newScope is set if the query was xlated
539: if (search_scope == null)
540: search_scope = qry.scope;
541:
542: RDMView view = new RDMView(req);
543: RDMResultSet rs = db.search(st, search_scope,
544: view.hits.max, view.attr, view.order, t);
545:
546: nDocs = rs.getDocCount();
547: nHits = rs.getHitCount();
548:
549: int intHits = (int) Math.min(nHits, Integer.MAX_VALUE);
550: int minHit = view.hits.min == 0 ? 0 : Math.min(
551: view.hits.min - 1, intHits);
552: int maxHit = Math.min(view.hits.max, intHits);
553: int nReturned = maxHit - minHit;
554:
555: // send the response header
556: res.getHeader().setResponseInterpret(
557: nReturned + " results out of " + nHits
558: + " hits across " + nDocs + " documents");
559: res.sendHeader();
560:
561: // send the results
562: for (int i = minHit; i < maxHit; ++i) {
563: SOIF s = null;
564: try {
565: s = rs.getResult(i, view.attr, qry.highlightTags);
566: } catch (Exception e) {
567: SearchLogger.getLogger().log(Level.WARNING,
568: "PSSH_CSPSRDMS0138", e);
569: }
570: if (s == null) {
571: // already logged by the result set class? (all of them?)
572: SearchLogger.getLogger().log(Level.INFO,
573: "PSSH_CSPSRDMS0139", new Integer(i));
574: continue;
575: }
576: res.getOutputStream().write(s);
577: ++nResults;
578: }
579:
580: } finally {
581: if (db != null)
582: dbaccess.readFinish(st, db);
583: }
584:
585: // Log message
586: SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSRDMS0020",
587: new Integer(nResults));
588: req.logRDM("db="
589: + req.getQuery().getDatabase()
590: + " hits="
591: + nResults
592: + "/"
593: + nHits
594: + "/"
595: + nDocs
596: + " scope=\""
597: + Encoder.quotedEscape(((qry.scope != null) ? qry.scope
598: : qry.newScope)) + "\"");
599:
600: }
601:
602: }
|