0001: /*
0002: * Copyright 2001 Sun Microsystems, Inc. All rights reserved.
0003: * PROPRIETARY/CONFIDENTIAL. Use of this product is subject to license terms.
0004: */
0005:
0006: package com.sun.portal.search.rdmserver;
0007:
0008: import com.sun.portal.search.rdm.*;
0009: import com.sun.portal.search.soif.*;
0010: import com.sun.portal.search.db.*;
0011: import com.sun.portal.search.util.*;
0012: import com.sun.portal.log.common.PortalLogger;
0013:
0014: import com.sun.kt.search.*;
0015:
0016: import java.util.*;
0017: import java.util.logging.Logger;
0018: import java.util.logging.Level;
0019: import java.io.*;
0020:
0021: /**
0022: * Support for RDM Taxonomy requests
0023: *
0024: * <pre>RDM-Query-Language=Taxonomy-Basic
0025: *
0026: * Scope = descendant Classification | descendant/N Classification
0027: * children Classification | anklebiter Classification
0028: * advise-doc-count/N Classification
0029: *
0030: * where
0031: * Classification = ROOT | Classification-Id
0032: *
0033: * Example RDM messages
0034: * --------------------
0035: *
0036: * To send the entire taxonomy (node and everything below):
0037: *
0038: * @RDMHEADER { -
0039: * RDM-Version{x}: 1.0
0040: * RDM-Type{x}: taxonomy-description-request
0041: * RDM-Query-Language{x}: taxonomy-basic
0042: * }
0043: *
0044: * @RDMQUERY { -
0045: * Scope{x}: descendant ROOT
0046: * }
0047: *
0048: * To send the entire only the children (e.g., node and 1 directly below):
0049: *
0050: * @RDMHEADER { -
0051: * RDM-Version{x}: 1.0
0052: * RDM-Type{x}: taxonomy-description-request
0053: * RDM-Query-Language{x}: taxonomy-basic
0054: * }
0055: *
0056: * @RDMQUERY { -
0057: * Scope{x}: anklebiter ROOT
0058: * }
0059: *
0060: * OR
0061: *
0062: * @RDMHEADER { -
0063: * RDM-Version{x}: 1.0
0064: * RDM-Type{x}: taxonomy-description-request
0065: * RDM-Query-Language{x}: taxonomy-basic
0066: * }
0067: *
0068: * @RDMQUERY { -
0069: * Scope{x}: anklebiter Education/Training:Internal Training Classes
0070: * }
0071: *
0072: * Added in compass 302...
0073: *
0074: * To advise of classified doc counts, eg, when db stat don't match with verity
0075: *
0076: * @RDMHEADER { -
0077: * RDM-Version{x}: 1.0
0078: * RDM-Type{x}: taxonomy-description-request
0079: * RDM-Query-Language{x}: taxonomy-basic
0080: * }
0081: *
0082: * @RDMQUERY { -
0083: * Scope{x}: advise-doc-count/N Internal Engineering
0084: * }
0085: *
0086: */
0087: public class TaxonomyService extends RDMService {
0088:
0089: // classification stats support
0090: public static final String CLASS_KEY = "statistics:classification";
0091: public static final String CLASS_NAME = "s_class";
0092: public static final String CLASS_NODES = "s_nodes";
0093: public static final String CLASS_THIS = "s_nodecnt";
0094: public static final String CLASS_IDXBATCH = "s_idxbatch";
0095:
0096: RDMTaxonomy theTax = null;
0097: static Date theTax_lmt = null;
0098: static Date theTax_refresh = null;
0099: static int theTax_refresh_rate = (60 * 1); // default: 1 minutes
0100:
0101: // Supported query languages
0102: public static final String QL_TAX_BASIC = "taxonomy-basic";
0103: public static final String QL_SEARCH = "search";
0104: public static final String QL_CLASSIFICATION = "classification";
0105:
0106: // legacy - aliases
0107: public static final String QL_LEGACY_COMPASS = "compass";
0108: public static final String QL_LEGACY_NSIR = "nsir";
0109: public static final String QL_LEGACY_VERITY = "verity";
0110: public static final String QL_LEGACY_VERITY_QL = "verity-ql";
0111:
0112: DbAccess dbaccess;
0113:
0114: /** Taxonomy processing support */
0115: class QLTaxQuery {
0116: String class_id;
0117: int depth;
0118: boolean retrieve_rd;
0119: boolean advise_doc_count;
0120: boolean exclude_first_node;
0121: int doc_count;
0122: }
0123:
0124: /** Taxonomy processing support */
0125: class QLSearchQuery {
0126: String scope;
0127: }
0128:
0129: public TaxonomyService() {
0130: supportedServices.add(new RDMServiceDescriptor(RDM.RDM_TAX_REQ,
0131: null));
0132: supportedServices.add(new RDMServiceDescriptor(RDM.RDM_TAX_REQ,
0133: QL_TAX_BASIC));
0134: //supportedServices.add(new RDMServiceDescriptor(RDM.RDM_RD_REQ, QL_TAX_BASIC)); // not supported
0135: supportedServices.add(new RDMServiceDescriptor(RDM.RDM_TAX_REQ,
0136: QL_SEARCH));
0137:
0138: // aliases
0139: //supportedServices.add(new RDMServiceDescriptor(RDM.RDM_RD_REQ, QL_CLASSIFICATION));
0140: // alias for tax basic, but not supported (use a db search instead)
0141:
0142: // legacy
0143: supportedServices.add(new RDMServiceDescriptor(RDM.RDM_TAX_REQ,
0144: QL_LEGACY_COMPASS)); // alias for search
0145: supportedServices.add(new RDMServiceDescriptor(RDM.RDM_TAX_REQ,
0146: QL_LEGACY_NSIR)); // alias for search
0147: supportedServices.add(new RDMServiceDescriptor(RDM.RDM_TAX_REQ,
0148: QL_LEGACY_VERITY)); // alias for search
0149: supportedServices.add(new RDMServiceDescriptor(RDM.RDM_TAX_REQ,
0150: QL_LEGACY_VERITY_QL)); // alias for search
0151:
0152: dbaccess = DbAccess.getInstance();
0153: }
0154:
0155: public void init(String dbname) throws Exception {
0156:
0157: // XXX ignoring tax dbname for now - will honour when tax becomes a real db
0158: // All of this code shouldbe moved to a tax db class
0159: String taxfn = SearchConfig.getValue(SearchConfig.TAX);
0160: String taxrr = SearchConfig.getValue(SearchConfig.TAX_REFRESH);
0161:
0162: theTax_lmt = new Date(0);
0163: theTax_refresh = new Date(0);
0164:
0165: // Load the taxonomy from disk into RDMTaxonomy object
0166: if (taxfn == null) { // Locate taxonomy
0167: SearchLogger.getLogger().log(Level.WARNING,
0168: "PSSH_CSPSRDMS0090", SearchConfig.TAX);
0169: throw new Exception("Failed to initialize");
0170: }
0171:
0172: if (taxrr != null)
0173: theTax_refresh_rate = Integer.parseInt(taxrr);
0174:
0175: try {
0176: loadTaxonomy(taxfn);
0177: } catch (Exception e) {
0178: SearchLogger.getLogger().log(Level.WARNING,
0179: "PSSH_CSPSRDMS0091", taxfn);
0180: throw e;
0181: }
0182:
0183: // Log - Loaded Taxonomy: {0}.
0184: SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSRDMS0092",
0185: taxfn);
0186: }
0187:
0188: /**
0189: * RDM-QL=Taxonomy-Basic Service Function
0190: * Takes the incoming RDM Request message, and returns the RDMResponse.
0191: */
0192: public void service(RDMRequest req, RDMResponse res)
0193: throws Exception {
0194:
0195: SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSRDMS0093");
0196:
0197: normalizeRequest(req);
0198: req.setSToken(new SToken(null, false, false, false)); // don't check anything
0199:
0200: // Trigger protected taxonomy reload
0201: refreshTaxonomy(req.getQuery().getDatabase());
0202:
0203: // Set outgoing response @RDMHEADER
0204: res.getHeader().setType(RDM.RDM_TAX_RES);
0205:
0206: // Dummy result in case of error, for clients that don't handle errors cleanly
0207: res.getHeader().setResponseInterpret(
0208: "0 results out of 0 hits across 0 documents");
0209:
0210: if (theTax != null) {
0211: String tax_id = theTax.getId();
0212: res.getHeader().getSOIF().replace(RDM.A_RDM_TAX, tax_id);
0213: }
0214:
0215: // Do we actually have a query?
0216: if (req.getQuery() == null) {
0217: res.getHeader().setErrorMessage("Bad or missing query");
0218: return;
0219: }
0220:
0221: // Which query language?
0222: String ql = req.getHeader().getQueryLanguage();
0223:
0224: if (ql.equalsIgnoreCase(QL_SEARCH))
0225: ql_search_service(req, res);
0226: else if (ql.equalsIgnoreCase(QL_TAX_BASIC)
0227: || ql.equalsIgnoreCase(QL_CLASSIFICATION))
0228: ql_tax_basic_service(req, res);
0229:
0230: // Try legacy QLs
0231: else if (ql.equalsIgnoreCase(QL_LEGACY_NSIR)
0232: || ql.equalsIgnoreCase(QL_LEGACY_COMPASS)
0233: || ql.equalsIgnoreCase(QL_LEGACY_VERITY)
0234: || ql.equalsIgnoreCase(QL_LEGACY_VERITY_QL)) {
0235: req.getHeader().setQueryLanguage(QL_SEARCH);
0236: ql_search_service(req, res);
0237: }
0238:
0239: }
0240:
0241: /**
0242: * RDM-QL=Taxonomy-Basic Service Function
0243: * Takes the incoming RDM Request message, and returns the RDMResponse.
0244: */
0245: public void ql_tax_basic_service(RDMRequest req, RDMResponse res)
0246: throws Exception {
0247:
0248: SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSRDMS0093");
0249:
0250: // Validate Scope specification
0251: QLTaxQuery qry = new QLTaxQuery();
0252: String scope = req.getQuery().getScope();
0253: try {
0254: taxBasicScopeParse(scope, qry);
0255: } catch (Exception e) {
0256: SearchLogger.getLogger().log(Level.WARNING,
0257: "PSSH_CSPSRDMS0094", scope);
0258: res.getHeader().setErrorMessage("Invalid Scope");
0259: res.getHeader().setResponseInterpret(
0260: "0 results out of 0 hits across 0 documents");
0261: return;
0262: }
0263: SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSRDMS0095",
0264: scope);
0265:
0266: /** XXX
0267: * Set taxonomy id for tax search results to the default tax id.
0268: * XXX There's currently no way to search a different tax.
0269: * If there ever is, this will need to change.
0270: * XXX For tax requests only? (currently only used in browse
0271: * templates, eg, normal)
0272: * XXX Need RDMHeader_SetTaxonomyId()
0273: */
0274: /*if (theTax) {
0275: String tax_id = RDMTaxonomy_GetId(theTax);
0276: SOIF_Replace(res.header.soif, A_RDM.RDM_TAX, tax_id, strlen(tax_id));
0277: }
0278: */
0279:
0280: // get the results...
0281: getTaxBasicResults(req, res, qry);
0282: }
0283:
0284: /**
0285: * RDM-QL=Taxonomy-Basic Service Function
0286: * Takes the incoming RDM Request message, and returns the RDMResponse.
0287: */
0288: public void ql_search_service(RDMRequest req, RDMResponse res)
0289: throws Exception {
0290:
0291: SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSRDMS0096");
0292:
0293: // Validate Scope specification
0294: QLSearchQuery qry = new QLSearchQuery();
0295: String scope = req.getQuery().getScope();
0296: try {
0297: searchScopeParse(scope, qry);
0298: } catch (Exception e) {
0299: SearchLogger.getLogger().log(Level.WARNING,
0300: "PSSH_CSPSRDMS0097", scope);
0301: res.getHeader().setErrorMessage("Invalid Scope");
0302: res.getHeader().setResponseInterpret(
0303: "0 results out of 0 hits across 0 documents");
0304: return;
0305: }
0306: SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSRDMS0098",
0307: scope);
0308:
0309: // get the results...
0310: getSearchResults(req, res, qry);
0311: }
0312:
0313: protected boolean searchScopeParse(String scope, QLSearchQuery qry) {
0314: if (scope != null) {
0315: qry.scope = scope;
0316: return true;
0317: }
0318: return false;
0319: }
0320:
0321: protected void getSearchResults(RDMRequest req, RDMResponse res,
0322: QLSearchQuery qry) throws Exception {
0323:
0324: // XXX A lot of this is duplicated in DatabaseService
0325:
0326: RDMView view = new RDMView(req);
0327: RDMDb db = null;
0328:
0329: long nDocs = 0;
0330: long nHits = 0;
0331: int nResults = 0;
0332:
0333: SearchLogger.getLogger().log(Level.FINEST, "PSSH_CSPSRDMS0099",
0334: qry.scope);
0335:
0336: SToken st = req.getSToken();
0337: RDMTransaction t = req.getTransaction();
0338:
0339: try {
0340:
0341: // Get a database connection
0342: try {
0343: db = dbaccess.readStart(st, req.getQuery()
0344: .getDatabase());
0345: } catch (Exception e) {
0346: SearchLogger.getLogger().log(Level.WARNING,
0347: "PSSH_CSPSRDMS0100", e);
0348: throw new Exception("Cannot access database.");
0349: }
0350:
0351: int flags = 0;
0352:
0353: String search_scope = qry.scope; // newScope is set if the query was xlated
0354: if (search_scope == null)
0355: search_scope = qry.scope;
0356:
0357: RDMResultSet rs = db.search(st, search_scope,
0358: view.hits.max, view.attr, view.order, t);
0359:
0360: nHits = rs.getHitCount();
0361: int intHits = (int) Math.min(nHits, Integer.MAX_VALUE);
0362: int minHit = view.hits.min == 0 ? 0 : Math.min(
0363: view.hits.min - 1, intHits);
0364: int maxHit = Math.min(view.hits.max, intHits);
0365: int nReturned = maxHit - minHit;
0366:
0367: // send out the results
0368: nDocs = db.count(st, t);
0369: res.getHeader().setResponseInterpret(
0370: nReturned + " results out of " + nHits
0371: + " hits across " + nDocs + " documents");
0372: res.sendHeader();
0373:
0374: // XXX SOIF hits could come straight from db, but this let's us stream
0375: // the output - need to stream from RDMDb
0376:
0377: // Fetch classification RD by URL
0378: for (int i = minHit; i < maxHit; ++i) {
0379: // XXX move this to taxdb
0380: Result hit = ((NovaResultSet) rs).getHit(i);
0381: String url = (String) hit.getField("id");
0382: if (url == null) {
0383: SearchLogger.getLogger().log(Level.WARNING,
0384: "PSSH_CSPSRDMS0101");
0385: continue;
0386: }
0387:
0388: // Find the interesting node
0389: RDMClassification cp;
0390: SOIF s;
0391: if ((cp = theTax.find(url)) == null) {
0392: SearchLogger.getLogger().log(Level.WARNING,
0393: "PSSH_CSPSRDMS0102", url);
0394: // Create a minimal result, no custom attrs available...
0395: s = new SOIF(RDM.A_SN_RDM_CLASS, "-");
0396: s.replace(RDM.A_RDM_ID, url);
0397: } else {
0398: SOIF cat = cp.getSOIF();
0399: // We need to shallow 'clone' the shared cat soif here, so as to safely add the score to it
0400: s = new SOIF(cat.getSchemaName(), cat.getURL());
0401: s.merge(cat);
0402: }
0403: s.replace("score", ""
0404: + Math.round(hit.getScore() * 100.0f));
0405: res.getOutputStream().write(s, view.attr);
0406: ++nResults;
0407: }
0408:
0409: } finally {
0410: if (db != null)
0411: dbaccess.readFinish(st, db);
0412: }
0413:
0414: // Log message - written to disk later
0415: SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSRDMS0103",
0416: new Integer(nResults));
0417: req.logRDM("db=" + req.getQuery().getDatabase() + " hits="
0418: + nResults + "/" + nHits + "/" + nDocs + " scope=\""
0419: + Encoder.quotedEscape(qry.scope) + "\"");
0420:
0421: }
0422:
0423: public void normalizeRequest(RDMRequest req) {
0424:
0425: SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSRDMS0104");
0426:
0427: RDMHeader hdr = req.getHeader();
0428: RDMQuery qry = req.getQuery();
0429:
0430: String rdmtype = hdr.getType();
0431: String ql, scope;
0432:
0433: if (rdmtype.equalsIgnoreCase(RDM.RDM_TAX_REQ)) {
0434:
0435: // Set taxonomy database name if needed
0436: if (qry.getDatabase() == null)
0437: qry.setDatabase(SearchConfig
0438: .getValue(SearchConfig.TAX_DBNAME));
0439:
0440: // Set Default Query Language if needed
0441: ql = hdr.getQueryLanguage();
0442: if (ql == null || ql.length() == 0)
0443: hdr.setQueryLanguage(QL_TAX_BASIC);
0444:
0445: // Set Default Scope if needed
0446: if (req.getQuery() == null)
0447: req.setQuery(new RDMQuery((String) null));
0448: scope = req.getQuery().getScope();
0449: if (scope == null || scope.length() == 0)
0450: req.getQuery().setScope("descendant ROOT");
0451: }
0452:
0453: }
0454:
0455: protected void taxBasicScopeParse(String scope, QLTaxQuery qry)
0456: throws Exception {
0457:
0458: int p = 0;
0459: int n = 0;
0460: int count = 0;
0461: char ch;
0462:
0463: qry.depth = -1; // default - unlimited
0464: qry.class_id = null;
0465: qry.retrieve_rd = false;
0466: qry.advise_doc_count = false;
0467: qry.doc_count = 0;
0468:
0469: // Syntax: directive[/ddd] class
0470:
0471: if (scope.regionMatches(true, 0, "anklebiter", 0, 10)) {
0472:
0473: // XXX is this slow? p += "anklebiter".length;
0474: p += 10;
0475: qry.depth = 1;
0476: SearchLogger.getLogger().log(Level.FINER,
0477: "PSSH_CSPSRDMS0105");
0478:
0479: } else if (scope.regionMatches(true, 0, "children", 0, 8)) {
0480:
0481: // children is synonymous with anklebiter
0482: p += 8;
0483: qry.depth = 1;
0484: if (scope.charAt(p) == '*') {
0485: ++p;
0486: qry.exclude_first_node = true;
0487: }
0488: SearchLogger.getLogger().log(Level.FINER,
0489: "PSSH_CSPSRDMS0106");
0490:
0491: } else if (scope.regionMatches(true, 0, "descendant", 0, 10)) {
0492:
0493: // Retrieve depth if specified from descendant/ddd
0494: p += 10;
0495: if (scope.charAt(p) == '/') {
0496: for (++p; '0' <= (ch = scope.charAt(p)) && ch <= '9'; ++p)
0497: count = count * 10 + ch - '0';
0498: qry.depth = count;
0499: } else
0500: SearchLogger.getLogger().log(Level.FINER,
0501: "PSSH_CSPSRDMS0107", new Integer(qry.depth));
0502:
0503: } else if (scope.regionMatches(true, 0, "advise-doc-count", 0,
0504: 16)) {
0505:
0506: // Retrieve doc count from advise-doc-count/ddd
0507: p += 16;
0508: if (scope.charAt(p) == '/') {
0509: for (++p; '0' <= (ch = scope.charAt(p)) && ch <= '9'; ++p)
0510: count = count * 10 + ch - '0';
0511: qry.doc_count = count;
0512: } else
0513: throw new Exception("invalid scope");
0514: qry.advise_doc_count = true;
0515: SearchLogger.getLogger().log(Level.FINER,
0516: "PSSH_CSPSRDMS0108", new Integer(qry.doc_count));
0517:
0518: } else
0519: throw new Exception("invalid scope");
0520:
0521: if (scope.charAt(p) != ' ')
0522: throw new Exception("invalid scope");
0523: while (scope.charAt(p) == ' ')
0524: p++; // skip space
0525:
0526: qry.class_id = scope.substring(p).trim();
0527: }
0528:
0529: /** Taxonomy reload */
0530: // XXX synch on TAXLOCK?
0531: protected synchronized void refreshTaxonomy(String dbname)
0532: throws Exception {
0533: Date now = new Date();
0534: if (theTax == null || theTax_refresh.before(now)) {
0535: // Reload the taxonomy now...
0536: init(dbname);
0537: theTax_refresh = new Date(now.getTime()
0538: + theTax_refresh_rate * 1000);
0539: }
0540: }
0541:
0542: protected void getTaxBasicResults(RDMRequest req, RDMResponse res,
0543: QLTaxQuery qry) throws Exception {
0544:
0545: SearchLogger.getLogger().log(Level.FINEST, "PSSH_CSPSRDMS0109",
0546: req.getQuery().getScope());
0547:
0548: RDMTaxonomy tp = theTax; // use already-parsed Taxonomy
0549:
0550: // Find the interesting node
0551: RDMClassification cp = null;
0552: if ((cp = tp.find(qry.class_id)) == null) {
0553: res.getHeader().setResponseInterpret(
0554: "0 results out of 0 hits across 0 documents");
0555: return;
0556: }
0557:
0558: if (qry.advise_doc_count) {
0559: /**
0560: * Handle classified document count advice
0561: * NB: This processes and logs the request and sets the result hdr
0562: */
0563: handleBrowseAdvice(req, res, qry, cp);
0564: return;
0565: }
0566:
0567: // XXX This is where we would return either RDs or Categories depending
0568: // on the requested rdm type, but we only support cats at the moment.
0569:
0570: // Taxonomy description request handling
0571: RDMView view = new RDMView(req);
0572: // XXX special case for taxonomy bwd compat
0573: // - if view-hits not given, default to all (instead of 10, etc)
0574: if (req.getQuery().getViewHits() == null)
0575: view.hits.max = RDMViewHits.RDMVIEWHITS_MAX;
0576:
0577: // XXX Temporary results buffer so we can count the results before sending them (XXX - not for RDs)
0578: SOIFBuffer sb = new SOIFBuffer();
0579: SOIFOutputStream ss = new SOIFOutputStream(sb);
0580:
0581: // Print leading Taxonomy information
0582: if (!qry.retrieve_rd)
0583: ss.write(tp.getSOIF());
0584:
0585: // Set View-Attributes if available
0586: if (view != null && view.attr != null) {
0587: SearchLogger.getLogger().log(Level.FINE,
0588: "PSSH_CSPSRDMS0110", view.attr);
0589: ss.setAllowed(view.attr);
0590: }
0591:
0592: // Traverse the Taxonomy and save the results
0593: QLTaxTraverse tt = new QLTaxTraverse();
0594: tt.t = tp;
0595: tt.qry = qry;
0596: tt.results = 0;
0597: tt.hits = 0;
0598: tt.total = 0;
0599: tt.min = view.hits.min;
0600: tt.max = view.hits.max;
0601: tt.depth_start = tp.depth(cp);
0602: if (qry.depth == -1)
0603: tt.depth_max = -1; // unlimited
0604: else
0605: tt.depth_max = qry.depth + tt.depth_start;
0606: tt.ss = ss;
0607: SearchLogger.getLogger().log(
0608: Level.FINER,
0609: "PSSH_CSPSRDMS0111",
0610: new Object[] { new Integer(tt.depth_start),
0611: new Integer(tt.depth_max) });
0612: cp.apply(RDM.RDM_TAX_INORDER, tt);
0613:
0614: // Header for taxonomy description results
0615: res.getHeader().setResponseInterpret(
0616: tt.results + " results out of " + tt.hits
0617: + " hits across " + tt.total + " documents");
0618:
0619: // Log message - written to disk later
0620: req.logRDM("xfer=" + tt.results + " scope=\""
0621: + Encoder.quotedEscape(req.getQuery().getScope())
0622: + "\"");
0623:
0624: // send out the results
0625: res.sendHeader();
0626: res.getOutputStream().write(sb.toByteArray());
0627:
0628: }
0629:
0630: /**
0631: * Handles advisory corrections to the classified document count stats.
0632: * (the stats are kept in a rather unreliable way)
0633: */
0634: protected void handleBrowseAdvice(RDMRequest req, RDMResponse res,
0635: QLTaxQuery qry, RDMClassification cp) throws Exception {
0636:
0637: int old_count = 0, new_count = 0, delta = 0;
0638:
0639: /**
0640: * We've got doc count advice - check the tax and correct any errors.
0641: * If we detect an incorrect count, adjust ndescdocs of this and
0642: * all parent nodes by the delta.
0643: * XXX This is pretty weak - we really need to do verity
0644: * searches to get the exact numbers for every node, but that's
0645: * a job for a command line utility. This should keep the plebs happy :)
0646: * XXX To handle different counts for different access roles, this will
0647: * have to include rols info in the class counts. Or, we can admit
0648: * defeat and toss out this feature...
0649: */
0650:
0651: SearchLogger.getLogger().log(
0652: Level.FINE,
0653: "PSSH_CSPSRDMS0112",
0654: new Object[] { new Integer(cp.getNumDocs()),
0655: new Integer(qry.doc_count), qry.class_id });
0656:
0657: old_count = cp.getNumDocs();
0658: new_count = qry.doc_count;
0659: if ((delta = (new_count - old_count)) != 0) {
0660: RDMClassification cx = cp;
0661: cx.setNumDocs(new_count);
0662: cx.getSOIF().replace(RDM.A_RDM_NCATDOC,
0663: "" + cx.getNumDocs());
0664: do {
0665: /** Could just call the tax tree load walker again here,
0666: * but this is way more efficient (does it matter?)...
0667: */
0668: int desc_cnt = cx.getNumDescDocs() + delta;
0669: if (desc_cnt >= 0) { // sanity check
0670: cx.setNumDescDocs(desc_cnt);
0671: cx.getSOIF().replace(RDM.A_RDM_NSUBDOC,
0672: "" + desc_cnt);
0673: } else {
0674: SearchLogger.getLogger().log(Level.WARNING,
0675: "PSSH_CSPSRDMS0113");
0676: break;
0677: }
0678: } while ((cx = cx.getParent()) != null);
0679:
0680: // save the new stats
0681: // XXX this should be saved in a taxonomy db (along with the taxonomy)
0682: saveCategorizedCount(req.getSToken(), SearchConfig
0683: .getValue(SearchConfig.DBNAME), qry.class_id,
0684: new_count, req.getTransaction());
0685: }
0686:
0687: // Header for advise-doc-count results
0688: // XXX not sending any results back for this at the moment
0689: /**
0690: * sprintf(msg, "Advise doc count: old count = %d new count = %d for %s",
0691: * old_count, new_count, qry.class_id);
0692: * RDMHeader_SetResponseInterpret(res.header, msg);
0693: */
0694:
0695: // Log message - written to disk later
0696: req.logRDM("taxonomy advise - old count=" + old_count
0697: + " scope=\""
0698: + Encoder.quotedEscape(req.getQuery().getScope())
0699: + "\"");
0700: }
0701:
0702: /** Read classification browse counts */
0703: public SOIF readCategorized(SToken st, String dbname,
0704: RDMTransaction t) throws Exception {
0705: // XXX this will disappear when tax is a real db
0706: SOIF s = null;
0707: RDMDb db = null;
0708: try {
0709: db = dbaccess.readStart(st, dbname);
0710: s = db.fetch(st, CLASS_KEY, null, 0, t);
0711: } catch (Exception e) {
0712: SearchLogger.getLogger().log(Level.WARNING,
0713: "PSSH_CSPSRDMS0114", e);
0714: return null;
0715: //throw e;
0716: } finally {
0717: if (db != null)
0718: dbaccess.readFinish(st, db);
0719: }
0720: return s;
0721: }
0722:
0723: /** Read classification browse counts */
0724: public void writeCategorized(SToken st, String dbname, SOIF s,
0725: RDMTransaction t) throws Exception {
0726: // XXX this will disappear when tax is a real db
0727: RDMDb db = null;
0728: try {
0729: db = dbaccess.writeStart(st, dbname);
0730: db.store(st, s, null, SOIFDb.NOSTATS, t);
0731: SearchLogger.getLogger().log(Level.FINE,
0732: "PSSH_CSPSRDMS0115");
0733: } catch (Exception e) {
0734: SearchLogger.getLogger().log(Level.WARNING,
0735: "PSSH_CSPSRDMS0116", e);
0736: throw e;
0737: } finally {
0738: if (db != null)
0739: dbaccess.writeFinish(st, db);
0740: }
0741: }
0742:
0743: static public int getCategorizedCount(SOIF s, String category) {
0744: int i;
0745: AVPair avp = s.getAVPair(CLASS_NAME);
0746: AVPair node = s.getAVPair(CLASS_THIS);
0747: String p;
0748: for (i = 0; i <= avp.getMaxIndex(); i++) {
0749: if (!avp.nthValid(i) || !node.nthValid(i))
0750: continue;
0751: p = avp.getValue(i);
0752: if (p.equals(category)) {
0753: if ((p = node.getValue(i)) != null) {
0754: SearchLogger.getLogger().log(Level.FINE,
0755: "PSSH_CSPSRDMS0117",
0756: new Object[] { CLASS_THIS, category, p });
0757: return Integer.parseInt(p);
0758: }
0759: break;
0760: }
0761: }
0762: return 0;
0763: }
0764:
0765: // XXX This should synchronize just with refreshTaxonomy or theTax
0766: public synchronized void saveCategorizedCount(SToken st,
0767: String dbname, String category, int count, RDMTransaction t)
0768: throws Exception {
0769:
0770: int i = 0;
0771: String p;
0772:
0773: SOIF s = readCategorized(st, dbname, t);
0774: if (s == null)
0775: s = new SOIF(CLASS_KEY, CLASS_KEY);
0776:
0777: AVPair avp = s.getAVPair(CLASS_NAME);
0778:
0779: if (avp != null) {
0780: for (i = 0; i <= avp.getMaxIndex(); i++) {
0781: if (!avp.nthValid(i))
0782: continue;
0783: if ((p = avp.getValue(i)) != null && p.equals(category)) {
0784: // found the class, now change the count
0785: if (count > 0)
0786: s.replace(CLASS_THIS, String.valueOf(count), i);
0787: else {
0788: // 0 count - remove this stats entry altogether
0789: s.remove(CLASS_NAME, i);
0790: s.remove(CLASS_THIS, i);
0791: avp.squeeze();
0792: s.replace(CLASS_NODES, String.valueOf(avp
0793: .getMaxIndex() + 1));
0794: }
0795: SearchLogger.getLogger().log(Level.FINE,
0796: "PSSH_CSPSRDMS0118");
0797: writeCategorized(st, dbname, s, t);
0798: return;
0799: }
0800: }
0801: }
0802:
0803: if (count == 0) {
0804: // no existing entry and we don't want to save zero counts
0805: return;
0806: }
0807:
0808: if (avp == null || i > avp.getMaxIndex()) {
0809: // class wasn't found - add it now and fix up s_nodes count
0810: String buf = String.valueOf(count);
0811: if (!s.insert(CLASS_NAME, category, i)
0812: || !s.insert(CLASS_THIS, buf, i)) {
0813: SearchLogger.getLogger().log(Level.WARNING,
0814: "PSSH_CSPSRDMS0119");
0815: return;
0816: }
0817: buf = String.valueOf(avp != null ? avp.valueCount() : 1);
0818: s.replace(CLASS_NODES, buf);
0819: }
0820:
0821: SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSRDMS0120");
0822: writeCategorized(st, dbname, s, t);
0823: return;
0824: }
0825:
0826: class QLTaxTraverse implements RDMCallback {
0827:
0828: RDMTaxonomy t;
0829: QLTaxQuery qry;
0830: SOIFOutputStream ss;
0831: int depth_max, depth_start;
0832: int results, hits, total, min, max;
0833: boolean seen_first_node = false;
0834:
0835: public void callback(Object o) throws Exception {
0836: RDMClassification c = (RDMClassification) o;
0837: String cid;
0838: int depth = 0;
0839:
0840: total++;
0841:
0842: // exclude top node?
0843: if (!seen_first_node) {
0844: seen_first_node = true;
0845: if (qry.exclude_first_node)
0846: return;
0847: }
0848:
0849: // Skip taxonomy root
0850: /**
0851: * if (c == t.root)
0852: * return;
0853: */
0854:
0855: // Is this a Classification that we want to use?
0856: if ((depth_max != -1) && ((depth = t.depth(c)) > depth_max))
0857: return;
0858:
0859: hits++;
0860:
0861: // Generate the Classification information that we want
0862: cid = c.getId();
0863: SearchLogger.getLogger().log(Level.FINE,
0864: "PSSH_CSPSRDMS0121",
0865: new Object[] { cid, new Integer(depth) });
0866: if (qry.retrieve_rd) {
0867: // nothing
0868: SearchLogger.getLogger().log(Level.FINE,
0869: "PSSH_CSPSRDMS0122", new Object[] { cid });
0870: // XXX not finished
0871: } else {
0872: if (hits >= min && (max == 0 || hits <= max)) {
0873: ss.write(c.getSOIF());
0874: results++;
0875: }
0876: }
0877: }
0878: }
0879:
0880: /** preload the parsed taxonomy */
0881: protected void loadTaxonomy(String taxfn) throws Exception {
0882:
0883: Date lmt = new Date(new File(taxfn).lastModified());
0884:
0885: // Shortcut reload if needed
0886: if (theTax_lmt == lmt) { // nothing changed
0887: SearchLogger.getLogger().log(Level.FINER,
0888: "PSSH_CSPSRDMS0123");
0889: return;
0890: }
0891:
0892: SearchLogger.getLogger().log(Level.INFO, "PSSH_CSPSRDMS0124",
0893: taxfn);
0894: SOIFInputStream ss = null;
0895: try {
0896: ss = new SOIFInputStream(taxfn);
0897: } catch (Exception e) {
0898: SearchLogger.getLogger().log(Level.WARNING,
0899: "PSSH_CSPSRDMS0125", taxfn);
0900: throw e;
0901: }
0902:
0903: SearchLogger.getLogger().log(Level.FINEST, "PSSH_CSPSRDMS0126",
0904: taxfn);
0905: try {
0906: theTax = new RDMTaxonomy(ss); // Parse taxonomy
0907: } catch (Exception e) {
0908: SearchLogger.getLogger().log(Level.WARNING,
0909: "PSSH_CSPSRDMS0127", taxfn);
0910: throw new Exception("Failed to parse taxonomy " + taxfn);
0911: }
0912:
0913: // Load/calculate the classified docs info for each classification
0914: //SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSRDMS0128");
0915: //SOIF s = readCategorized(
0916: // new SToken(null, false, false) /*no checking*/,
0917: // SearchConfig.getValue(SearchConfig.DBNAME),
0918: // null /*txn*/);
0919: //theTax.apply(RDM.RDM_TAX_POSTORDER, new AddCatContent(s));
0920:
0921: // Add the sub-node and doc/sub-doc info to all of the Class SOIFs
0922: SearchLogger.getLogger().log(Level.FINE, "PSSH_CSPSRDMS0129");
0923: theTax.apply(RDM.RDM_TAX_INORDER, new UpdateClassSOIF());
0924:
0925: // Set some properties about the preloaded taxonomy
0926: theTax_lmt = lmt;
0927: theTax_refresh = new Date(new Date().getTime()
0928: + theTax_refresh_rate * 1000);
0929: }
0930:
0931: /********************************************************
0932: * This callback class runs the dbaccess function
0933: * RDM_Database_CategorizedCount() for each node in the taxonomy
0934: * and copies the value (number of docs in this node) into
0935: * the node member c.ndocs.
0936: * It also sums the descendant doc count information.
0937: * Must be called POSTORDER.
0938: ********************************************************/
0939:
0940: class AddCatContent implements RDMCallback {
0941: SOIF stats_soif;
0942:
0943: AddCatContent(SOIF s) {
0944: stats_soif = s;
0945: }
0946:
0947: public void callback(Object o) throws Exception {
0948: RDMClassification c = (RDMClassification) o;
0949:
0950: if (c.getDepth() == 0) // skip ROOT XXX ???
0951: return;
0952:
0953: if (stats_soif == null)
0954: return; // no stats - counts will all be zero
0955:
0956: //c.setNumDocs(RDM_Database_CategorizedCount(stats_soif, c.getId()));
0957:
0958: // handle descendant doc summing
0959: c.setNumDescDocs(c.getNumDescDocs() + c.getNumDocs());
0960: RDMClassification p = c.getParent();
0961: if (p != null)
0962: p.setNumDescDocs(p.getNumDescDocs()
0963: + c.getNumDescDocs());
0964: }
0965: }
0966:
0967: /********************************************************
0968: * This callback class copies the ndescendant and categorized doc
0969: * info from the classification structure into the classification
0970: * soif.
0971: ********************************************************/
0972:
0973: class UpdateClassSOIF implements RDMCallback {
0974: public void callback(Object o) throws Exception {
0975: RDMClassification c = (RDMClassification) o;
0976: int n = 0;
0977:
0978: if (c.getDepth() == 0) // skip ROOT XXX ???
0979: return;
0980:
0981: SearchLogger.getLogger()
0982: .log(
0983: Level.FINE,
0984: "PSSH_CSPSRDMS0130",
0985: new Object[] {
0986: new Integer(c.getNumDescendant()),
0987: new Integer(c.getNumDocs()),
0988: new Integer(c.getNumDescDocs()),
0989: c.getId() });
0990:
0991: // update the class soif
0992: c.getSOIF().replace(RDM.A_RDM_NSUBCAT,
0993: "" + c.getNumDescendant());
0994: c.getSOIF().replace(RDM.A_RDM_NCATDOC, "" + c.getNumDocs());
0995: c.getSOIF().replace(RDM.A_RDM_NSUBDOC,
0996: "" + c.getNumDescDocs());
0997:
0998: if (c.getChildren() != null) {
0999: RDMClassification child;
1000: for (n = 0; n < c.nChildren(); ++n) {
1001: String id = c.nthChild(n).getId();
1002: String category = id
1003: .substring(id.lastIndexOf(':') + 1); // should never fail
1004: c.getSOIF().replace(RDM.A_RDM_CHILD, category, n);
1005: }
1006: }
1007: }
1008: }
1009:
1010: }
|