001: /*
002: * Copyright 2001 Sun Microsystems, Inc. All rights reserved.
003: * PROPRIETARY/CONFIDENTIAL. Use of this product is subject to license terms.
004: */
005:
006: package com.sun.portal.search.db;
007:
008: import com.sun.portal.search.rdm.*;
009: import com.sun.portal.search.soif.*;
010: import com.sun.portal.search.util.*;
011: import com.sun.portal.log.common.PortalLogger;
012:
013: import java.util.*;
014: import java.util.logging.Logger;
015: import java.util.logging.Level;
016: import java.io.*;
017:
018: // XXX this is not finished - want to move taxonomy to db
019: // (preferably with a stock standard db interface)
020:
021: /**
022: * Support for RDM Taxonomy requests
023: *
024: * <pre>RDM-Query-Language=Taxonomy-Basic
025: *
026: * Scope = descendant Classification | descendant/N Classification
027: * children Classification | anklebiter Classification
028: * advise-doc-count/N Classification
029: *
030: * where
031: * Classification = ROOT | Classification-Id
032: *
033: * Example RDM messages
034: * --------------------
035: *
036: * To send the entire taxonomy (node and everything below):
037: *
038: * @RDMHEADER { -
039: * RDM-Version{x}: 1.0
040: * RDM-Type{x}: taxonomy-description-request
041: * RDM-Query-Language{x}: taxonomy-basic
042: * }
043: *
044: * @RDMQUERY { -
045: * Scope{x}: descendant ROOT
046: * }
047: *
048: * To send the entire only the children (e.g., node and 1 directly below):
049: *
050: * @RDMHEADER { -
051: * RDM-Version{x}: 1.0
052: * RDM-Type{x}: taxonomy-description-request
053: * RDM-Query-Language{x}: taxonomy-basic
054: * }
055: *
056: * @RDMQUERY { -
057: * Scope{x}: anklebiter ROOT
058: * }
059: *
060: * OR
061: *
062: * @RDMHEADER { -
063: * RDM-Version{x}: 1.0
064: * RDM-Type{x}: taxonomy-description-request
065: * RDM-Query-Language{x}: taxonomy-basic
066: * }
067: *
068: * @RDMQUERY { -
069: * Scope{x}: anklebiter Education/Training:Internal Training Classes
070: * }
071: *
072: * Added in compass 302...
073: *
074: * To advise of classified doc counts, eg, when db stat don't match with verity
075: *
076: * @RDMHEADER { -
077: * RDM-Version{x}: 1.0
078: * RDM-Type{x}: taxonomy-description-request
079: * RDM-Query-Language{x}: taxonomy-basic
080: * }
081: *
082: * @RDMQUERY { -
083: * Scope{x}: advise-doc-count/N Internal Engineering
084: * }
085: *
086: */
087: public class TaxonomyDb extends SearchOnlyDb {
088:
089: // XXX This will extend IndexedSOIFDb once tax is a real DB
090:
091: // classification stats support
092: public static final String CLASS_KEY = "statistics:classification";
093: public static final String CLASS_NAME = "s_class";
094: public static final String CLASS_NODES = "s_nodes";
095: public static final String CLASS_THIS = "s_nodecnt";
096: public static final String CLASS_IDXBATCH = "s_idxbatch";
097:
098: NovaDb searchengine = new NovaDb();
099:
100: RDMTaxonomy theTax = null;
101: static Date theTax_lmt = null;
102: static Date theTax_refresh = null;
103: static int theTax_refresh_rate = (60 * 5); // default: 5 minutes
104:
105: // Supported query languages
106: public static final String QL_TAX_BASIC = "taxonomy-basic";
107: public static final String QL_SEARCH = "search";
108: public static final String QL_CLASSIFICATION = "classification";
109:
110: /** Taxonomy processing support */
111: class QLTaxQuery {
112: String class_id;
113: int depth;
114: boolean retrieve_rd;
115: boolean advise_doc_count;
116: int doc_count;
117: }
118:
119: /** Taxonomy processing support */
120: class QLSearchQuery {
121: String scope;
122: }
123:
124: protected void getTaxBasicResults(RDMRequest req, RDMResponse res,
125: QLTaxQuery qry) throws Exception {
126:
127: SearchLogger.getLogger().log(Level.FINEST, "PSSH_CSPSB0066",
128: req.getQuery().getScope());
129:
130: RDMTaxonomy tp = theTax; // use already-parsed Taxonomy
131:
132: // Find the interesting node
133: RDMClassification cp = null;
134: if ((cp = tp.find(qry.class_id)) == null) {
135: // log - Classification Mismatch: {0}
136: SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSB0067",
137: qry.class_id);
138: res.getHeader().setResponseInterpret("0 results");
139: return;
140: }
141:
142: if (qry.advise_doc_count) {
143: /**
144: * Handle classified document count advice
145: * NB: This processes and logs the request and sets the result hdr
146: */
147: checkTaxonomy(req, res, qry, cp);
148: return;
149: }
150:
151: // XXX This is where we would return either RDs or Categories depending
152: // on the requested rdm type, but we only support cats at the moment.
153:
154: // Taxonomy description request handling
155: RDMView view = new RDMView(req);
156: // XXX special case for taxonomy bwd compat
157: // - if view-hits not given, default to all (instead of 10, etc)
158: if (req.getQuery().getViewHits() == null)
159: view.hits.max = RDMViewHits.RDMVIEWHITS_MAX;
160:
161: // XXX Temporary results buffer so we can count the results before sending them (XXX - not for RDs)
162: SOIFBuffer sb = new SOIFBuffer();
163: SOIFOutputStream ss = new SOIFOutputStream(sb);
164:
165: // Print leading Taxonomy information
166: if (!qry.retrieve_rd)
167: ss.write(tp.getSOIF());
168:
169: // Set View-Attributes if available
170: if (view != null && view.attr != null) {
171: SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSB0068",
172: view.attr);
173: ss.setAllowed(view.attr);
174: }
175:
176: // Traverse the Taxonomy and save the results
177: QLTaxTraverse tt = new QLTaxTraverse();
178: tt.t = tp;
179: tt.qry = qry;
180: tt.n = 0;
181: tt.total = 0;
182: tt.max = view.hits.max;
183: tt.depth_start = tp.depth(cp);
184: if (qry.depth == -1)
185: tt.depth_max = -1; // unlimited
186: else
187: tt.depth_max = qry.depth + tt.depth_start;
188: tt.ss = ss;
189: SearchLogger.getLogger().log(
190: Level.FINER,
191: "PSSH_CSPSB0078",
192: new Object[] { Integer.toString(tt.depth_start),
193: Integer.toString(tt.depth_max) });
194: cp.apply(RDM.RDM_TAX_INORDER, tt);
195:
196: // Header for taxonomy description results
197: res.getHeader().setResponseInterpret(
198: tt.n + " results out of " + tt.n + " hits across "
199: + tt.total + " documents");
200:
201: // Log message - written to disk later
202: req.logRDM("xfer=" + tt.n + " scope=\""
203: + req.getQuery().getScope() + "\"");
204:
205: // send out the results
206: res.sendHeader();
207: res.getOutputStream().write(sb.toByteArray());
208:
209: }
210:
211: /**
212: * Handles advisory corrections to the classified document count stats.
213: * (the stats are kept in a rather unreliable way)
214: */
215: protected void checkTaxonomy(RDMRequest req, RDMResponse res,
216: QLTaxQuery qry, RDMClassification cp) throws Exception {
217: int old_count = 0, new_count = 0, delta = 0;
218:
219: /**
220: * We've got doc count advice - check the tax and correct any errors.
221: * If we detect an incorrect count, adjust ndescdocs of this and
222: * all parent nodes by the delta.
223: * XXX This is pretty weak - we really need to do verity
224: * searches to get the exact numbers for every node, but that's
225: * a job for a command line utility. This should keep the plebs happy :)
226: *
227: * NB: We are called with the tax write lock already held
228: */
229:
230: SearchLogger.getLogger()
231: .log(
232: Level.FINE,
233: "PSSH_CSPSB0079",
234: new Object[] {
235: Integer.toString(cp.getNumDocs()),
236: Integer.toString(qry.doc_count),
237: qry.class_id });
238:
239: old_count = cp.getNumDocs();
240: new_count = qry.doc_count;
241: if ((delta = (new_count - old_count)) != 0) {
242: RDMClassification cx = cp;
243: cx.setNumDocs(new_count);
244: cx.getSOIF().replace(RDM.A_RDM_NCATDOC,
245: "" + cx.getNumDocs());
246: do {
247: /** Could just call the tax tree load walker again here,
248: * but this is way more efficient (does it matter?)...
249: */
250: int desc_cnt = cx.getNumDescDocs() + delta;
251: if (desc_cnt >= 0) { // sanity check
252: cx.setNumDescDocs(desc_cnt);
253: cx.getSOIF().replace(RDM.A_RDM_NSUBDOC,
254: "" + desc_cnt);
255: } else {
256: // log - Count went -ve while adjusting classified doc counts
257: SearchLogger.getLogger().log(Level.WARNING,
258: "PSSH_CSPSB0080");
259: break;
260: }
261: } while ((cx = cx.getParent()) != null);
262:
263: // save the new stats
264: // XXX this should be saved in a taxonomy db (along with the taxonomy)
265: // XXXXXX dbaccess.saveCategorizedCount(null, qry.class_id, new_count);
266: }
267:
268: // Header for advise-doc-count results
269: // XXX not sending any results back for this at the moment
270: /**
271: * sprintf(msg, "Advise doc count: old count = %d new count = %d for %s",
272: * old_count, new_count, qry.class_id);
273: * RDMHeader_SetResponseInterpret(res.header, msg);
274: */
275:
276: // Log message - written to disk later
277: req.logRDM("taxonomy advise - old count=" + old_count
278: + " scope=\"" + req.getQuery().getScope() + "\"");
279: }
280:
281: class QLTaxTraverse implements RDMCallback {
282:
283: RDMTaxonomy t;
284: QLTaxQuery qry;
285: SOIFOutputStream ss;
286: int depth_max, depth_start;
287: int n, max, total;
288:
289: public void callback(Object o) throws Exception {
290: RDMClassification c = (RDMClassification) o;
291: String cid;
292: int depth = 0;
293:
294: total++;
295: // Skip taxonomy root
296: /**
297: * if (c == t.root)
298: * return;
299: */
300:
301: // Is this a Classification that we want to use?
302: if ((depth_max != -1) && ((depth = t.depth(c)) > depth_max))
303: return;
304:
305: if (max != 0 && n >= max)
306: return;
307:
308: // Generate the Classification information that we want
309: cid = c.getId();
310: SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSB0081",
311: new Object[] { cid, Integer.toString(depth) });
312: if (qry.retrieve_rd) {
313: // nothing
314: SearchLogger.getLogger().log(Level.FINE,
315: "PSSH_CSPSB0069", cid);
316: // XXX not finished
317: } else {
318: ss.write(c.getSOIF());
319: }
320: n++;
321: }
322: }
323:
324: /** preload the parsed taxonomy */
325: protected void loadTaxonomy(String taxfn) throws Exception {
326:
327: Date lmt = new Date(new File(taxfn).lastModified());
328:
329: // Shortcut reload if needed
330: if (theTax_lmt == lmt) { // nothing changed
331: SearchLogger.getLogger().log(Level.FINER, "PSSH_CSPSB0070");
332: return;
333: }
334:
335: SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSB0071");
336: SOIFInputStream ss = null;
337: try {
338: ss = new SOIFInputStream(taxfn);
339: } catch (Exception e) {
340: SearchLogger.getLogger().log(Level.WARNING,
341: "PSSH_CSPSB0082", taxfn);
342: throw e;
343: }
344:
345: // Nuke the existing preloaded taxonomy - if needed
346: //if (theTax)
347: //RDMTaxonomy_Free(theTax);
348:
349: SearchLogger.getLogger().log(Level.FINEST, "PSSH_CSPSB0072",
350: taxfn);
351: try {
352: theTax = new RDMTaxonomy(ss); // Parse taxonomy
353: } catch (Exception e) {
354: SearchLogger.getLogger().log(Level.WARNING,
355: "PSSH_CSPSB0073", taxfn);
356: throw new Exception("Failed to parse taxonomy " + taxfn);
357: }
358: //SOIFStream_Finish(ss);
359: //fp.fclose();
360:
361: // Load/calculate the classified docs info for each classification
362: SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSB0074");
363: //XXX s = RDM_Database_ReadCategorized(null);
364: SOIF s = new SOIF();
365: theTax.apply(RDM.RDM_TAX_POSTORDER, new AddCatContent(s));
366:
367: // Add the sub-node and doc/sub-doc info to all of the Class SOIFs
368: SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSB0075");
369: theTax.apply(RDM.RDM_TAX_INORDER, new UpdateClassSOIF());
370:
371: // Set some properties about the preloaded taxonomy
372: theTax_lmt = lmt;
373: theTax_refresh = new Date(new Date().getTime()
374: + theTax_refresh_rate * 1000);
375: }
376:
377: /********************************************************
378: * This callback class runs the dbaccess function
379: * RDM_Database_CategorizedCount() for each node in the taxonomy
380: * and copies the value (number of docs in this node) into
381: * the node member c.ndocs.
382: * It also sums the descendant doc count information.
383: * Must be called POSTORDER.
384: ********************************************************/
385:
386: class AddCatContent implements RDMCallback {
387: SOIF stats_soif;
388:
389: AddCatContent(SOIF s) {
390: stats_soif = s;
391: }
392:
393: public void callback(Object o) throws Exception {
394: RDMClassification c = (RDMClassification) o;
395:
396: if (c.getDepth() == 0) // skip ROOT XXX ???
397: return;
398:
399: if (stats_soif == null)
400: return; // no stats - counts will all be zero
401:
402: //c.setNumDocs(RDM_Database_CategorizedCount(stats_soif, c.getId()));
403:
404: // handle descendant doc summing
405: c.setNumDescDocs(c.getNumDescDocs() + c.getNumDocs());
406: RDMClassification p = c.getParent();
407: if (p != null)
408: p.setNumDescDocs(p.getNumDescDocs()
409: + c.getNumDescDocs());
410: }
411: }
412:
413: /********************************************************
414: * This callback class copies the ndescendant and categorized doc
415: * info from the classification structure into the classification
416: * soif.
417: ********************************************************/
418:
419: class UpdateClassSOIF implements RDMCallback {
420: public void callback(Object o) throws Exception {
421: RDMClassification c = (RDMClassification) o;
422: int n = 0;
423:
424: if (c.getDepth() == 0) // skip ROOT XXX ???
425: return;
426:
427: SearchLogger.getLogger().log(
428: Level.FINE,
429: "PSSH_CSPSB0076",
430: new Object[] {
431: Integer.toString(c.getNumDescendant()),
432: Integer.toString(c.getNumDocs()),
433: Integer.toString(c.getNumDescDocs()),
434: c.getId() });
435:
436: // update the class soif
437: c.getSOIF().replace(RDM.A_RDM_NSUBCAT,
438: "" + c.getNumDescendant());
439: c.getSOIF().replace(RDM.A_RDM_NCATDOC, "" + c.getNumDocs());
440: c.getSOIF().replace(RDM.A_RDM_NSUBDOC,
441: "" + c.getNumDescDocs());
442:
443: if (c.getChildren() != null) {
444: RDMClassification child;
445: for (n = 0; n < c.nChildren(); ++n) {
446: String id = c.nthChild(n).getId();
447: String category = id
448: .substring(id.lastIndexOf(':') + 1); // should never fail
449: c.getSOIF().replace(RDM.A_RDM_CHILD, category, n);
450: }
451: }
452: }
453: }
454:
455: /**
456: * open -- Opens a Search Database
457: * - rootdir -- db home dir
458: * - dbname -- name of database from root.db (e.g., default)
459: * - rw -- SOIFDb.WRITER or SOIFDb.WRCREAT or SOIFDb.READER
460: * - mode -- Unix mode
461: */
462: public void open(SToken st, String rootdir, String dbname, int rw,
463: int mode) throws RDMException {
464: searchengine.open(st, rootdir, dbname, rw, mode);
465: refreshTaxonomy(dbname);
466: }
467:
468: /** Taxonomy reload */
469: protected synchronized void refreshTaxonomy(String dbname)
470: throws RDMException {
471: Date now = new Date();
472: if (theTax == null || theTax_refresh.before(now)) {
473: // Reload the taxonomy now...
474: init(dbname);
475: theTax_refresh = new Date(now.getTime()
476: + theTax_refresh_rate * 1000);
477: }
478: }
479:
480: public void init(String dbname) throws RDMException {
481:
482: // XXX ignoring tax dbname for now - will honour when tax becomes a real db
483: String taxfn = SearchConfig.getValue(SearchConfig.TAX);
484: String taxrr = SearchConfig.getValue(SearchConfig.TAX_REFRESH);
485:
486: theTax_lmt = new Date(0);
487: theTax_refresh = new Date(0);
488:
489: // Load the taxonomy from disk into RDMTaxonomy object
490: if (taxfn == null) { // Locate taxonomy
491: SearchLogger.getLogger().log(Level.WARNING,
492: "PSSH_CSPSB0083", SearchConfig.TAX);
493: throw new RDMException("Failed to initialize");
494: }
495:
496: if (taxrr != null)
497: theTax_refresh_rate = Integer.parseInt(taxrr);
498:
499: try {
500: loadTaxonomy(taxfn);
501: } catch (Exception e) {
502: SearchLogger.getLogger().log(Level.WARNING,
503: "PSSH_CSPSB0084", taxfn);
504: throw new RDMException(e.getMessage()); // XXX
505: }
506:
507: SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSB0077",
508: taxfn);
509: }
510:
511: /**
512: * Closes db and index extents
513: * @param st
514: * @throws RDMException
515: */
516: public void close(SToken st) throws RDMException {
517: searchengine.close(st);
518: }
519:
520: /**
521: * Query language support
522: * @param st
523: * @throws RDMException
524: * @return Set of supported query languages (String)
525: */
526: // public Set getSupportedQueryLanguages(SToken st) throws RDMException {
527: // throw new RDMException("not implemented");
528: //}
529: /**
530: * Returns 0 if present, or Db.DB_NOTFOUND or DBb.DB_KEYEMPTY (for missing recno key)
531: * If result is null, simply checks for presence of key in db.
532: * @param st
533: * @param key
534: * @param result
535: * @param flags
536: * @param t
537: * @throws RDMException
538: * @return */
539: public int fetch(SToken st, Datum key, Datum result, int flags,
540: RDMTransaction t) throws RDMException {
541: throw new RDMException("not implemented");
542: }
543:
544: /**
545: * retrieve RD from database, filtered by view
546: */
547: public SOIF fetch(SToken st, String url, Set view, int flags,
548: RDMTransaction t) throws RDMException {
549: throw new RDMException("not implemented");
550: }
551:
552: public SOIF fetch(SToken st, String url, int flags, RDMTransaction t)
553: throws RDMException {
554: throw new RDMException("not implemented");
555: }
556:
557: /**
558: * Recover the db - must be run stand alone (ie, no one else has the db open)
559: */
560: public void recover(String dbhome, boolean fatal)
561: throws RDMException {
562: }
563:
564: }
|