001: /*
002: * RDMResultSet.java
003: *
004: * Created on August 30, 2002, 8:36 PM
005: */
006:
007: package com.sun.portal.search.db;
008:
009: import com.sun.portal.search.rdm.*;
010: import com.sun.portal.search.soif.*;
011: import com.sun.portal.search.util.Encoder;
012: import com.sun.portal.search.util.*; // XXX logging here???
013: import com.sun.portal.log.common.PortalLogger;
014:
015: import com.sun.kt.search.*;
016:
017: import java.util.*;
018: import java.util.logging.Logger;
019: import java.util.logging.Level;
020: import java.text.*;
021:
022: public class NovaResultSet extends RDMResultSet {
023:
024: /** Creates a new instance of RDMResultSet */
025: public NovaResultSet(SToken st, RDMDb database, String query,
026: ResultSet rs, int numHits, int docCount, RDMTransaction t) {
027:
028: super (st, database, query, t);
029:
030: if (rs != null) {
031: try {
032: this .hits = rs.getResults(0, numHits);
033: hitCount = rs.size();
034: } catch (Exception e) {
035: SearchLogger.getLogger().log(Level.WARNING,
036: "PSSH_CSPSB0031");
037: //throw e;
038: }
039: }
040: this .resultCount = hits.size();
041: this .docCount = docCount;
042: }
043:
044: /**
045: * @return a SOIF hit, highlighted and filtered by view
046: */
047: public SOIF getResult(int i, Set view, String[] hltags) {
048: if (hits.size() == 0)
049: return null;
050: Result hit = (Result) hits.get(i);
051: SOIF s = highlight(hit, view, hltags);
052: return s;
053: }
054:
055: /**
056: * @return current number of hits in this set
057: */
058: // XXX do we really need this?
059: public int getResultCount() {
060: return resultCount;
061: }
062:
063: /**
064: * @return total number of hits for this search
065: */
066: public long getHitCount() {
067: return hitCount;
068: }
069:
070: /**
071: * @return number of docs searched
072: */
073: public long getDocCount() {
074: return docCount;
075: }
076:
077: /**
078: * @return number of docs in this result set
079: */
080: public java.lang.String toString() {
081: return super .toString(); // XXX
082: }
083:
084: // XXX not needed one taxdb is done
085: public Result getHit(int i) {
086: return (Result) hits.get(i);
087: }
088:
089: private void buildHighlightInfo(Result hit, SOIF s, Set attrs,
090: String[] hltags, int passageContext, int passageSize,
091: int maxPassages, String[] fldHltags, int fldPassageContext,
092: int fldPassageSize, int fldMaxPassages, int summaryFromBody) {
093:
094: Map m = null;
095:
096: if (hit.getNPassages() > 0) {
097:
098: // We have passages, and perhaps highlights...
099:
100: // XXX hardcoded constants
101: boolean includeHitTerms = attrs.contains("hit-terms");
102: boolean includeHighlightPositions = attrs
103: .contains("highlight-positions");
104: boolean includePassages = attrs.contains("passage");
105: boolean includePassageScores = attrs
106: .contains("passage-score");
107: boolean includePassageScoreInHighlight = false;
108: boolean includePassageCount = attrs
109: .contains("passage-count");
110: boolean highlightPassages = !includeHighlightPositions;
111: boolean highlightDescription = attrs
112: .contains("hl-description");
113:
114: boolean mvSort = false;
115: for (Iterator i = attrs.iterator(); i.hasNext();) {
116: String at = (String) i.next();
117: if (at.equalsIgnoreCase("hl-description"))
118: hit.addPassageField("NonField",
119: Passage.UNIQUE_PASSAGES, passageContext,
120: passageSize, true);
121: else if (at.regionMatches(true, 0, "hl-", 0, 3))
122: hit.addPassageField(at.substring(3),
123: Passage.JOIN_PASSAGES, fldPassageContext,
124: fldPassageSize, mvSort);
125: }
126:
127: m = hit.getPassages(NovaDb.soifToIndexableMap(s), true,
128: Passage.UNIQUE_PASSAGES, passageContext,
129: fldPassageSize, true);
130:
131: if (m != null) {
132:
133: for (Iterator p = m.entrySet().iterator(); p.hasNext();) {
134:
135: Map.Entry me = (Map.Entry) p.next();
136:
137: String fldname = (String) me.getKey();
138: List passages = (List) me.getValue();
139:
140: //if (fldname == null)
141: //continue;
142:
143: if (fldname == null
144: || fldname.equalsIgnoreCase("NonField")) {
145:
146: // Passages from the body and any unrequested fields
147:
148: StringBuffer newDesc = new StringBuffer();
149:
150: int j = 0;
151: for (Iterator q = passages.iterator(); j < maxPassages
152: && q.hasNext(); ++j) {
153:
154: Passage pass = (Passage) q.next();
155: pass.highlightPassage(null);
156: String passtext = pass.getHLValue(true);
157: int[] posns = pass
158: .getPassageWordPositions();
159:
160: if (highlightDescription
161: || highlightPassages) {
162:
163: // We should use the highlighter class here but it can't do html encoding...
164: StringBuffer sb = new StringBuffer();
165: int curpos = 0;
166: String piece = null;
167: sb.append(hltags[0]);
168: for (int k = 0; k < posns.length; k += 2) {
169: piece = passtext.substring(curpos,
170: posns[k]);
171: // XXX assumes html - not mobile friendly!
172: Encoder.htmlEncode(piece, sb);
173: if (k == 0)
174: sb.append(hltags[2]);
175: sb.append(hltags[4]);
176: piece = passtext.substring(
177: posns[k], posns[k + 1]);
178: Encoder.htmlEncode(piece, sb);
179: sb.append(hltags[5]);
180: if (k == (posns.length - 2))
181: sb.append(hltags[3]);
182: curpos = posns[k + 1];
183: }
184: piece = passtext.substring(curpos,
185: passtext.length());
186: Encoder.htmlEncode(piece, sb);
187: sb.append(hltags[1]);
188:
189: if (includePassages)
190: s.replace("passage",
191: highlightPassages ? sb
192: .toString()
193: : passtext, j);
194:
195: if (highlightDescription) {
196: if (includePassageScoreInHighlight)
197: newDesc.append("["
198: + scoreForm.format(pass
199: .getScore())
200: + "] "); // XXX HTML
201: newDesc.append(sb.toString());
202: if (j < (maxPassages - 1)
203: && q.hasNext())
204: newDesc.append("<br>"); // XXX HTML
205: }
206: }
207:
208: if (includePassageScores) {
209: s.replace("passage-score", scoreForm
210: .format(pass.getScore()), j);
211: }
212:
213: if (includeHighlightPositions) {
214: // The highlight offset info.
215: StringBuffer b1 = new StringBuffer();
216: for (int k = 0; k < posns.length; ++k) {
217: if (k > 0)
218: b1.append(",");
219: b1.append(posns[k]);
220: }
221: s.replace("highlight-positions", b1
222: .toString(), j);
223: }
224:
225: if (includeHitTerms) {
226: // The hit terms.
227: StringBuffer b = new StringBuffer();
228: String[] mt = pass.getMatchingTerms();
229: for (int k = 0; k < mt.length; k++) {
230: String hitTerm = mt[k];
231: if (hitTerm != null) {
232: if (k > 0)
233: b.append(",");
234: b.append(mt[k]);
235: }
236: }
237: s.replace("hit-terms", b.toString(), j);
238: }
239:
240: if (includePassageCount)
241: s.replace("passage-count", String
242: .valueOf(hit.getNPassages()));
243:
244: }
245:
246: if (highlightDescription
247: && newDesc.length() > 0)
248: s.replace("hl-description", newDesc
249: .toString());
250:
251: }
252:
253: else {
254:
255: // Passages from a requested field // XXX need to merge with above
256:
257: StringBuffer newDesc = new StringBuffer();
258:
259: int j = 0;
260: for (Iterator q = passages.iterator(); /*j < 4 &&*/q
261: .hasNext(); ++j) {
262:
263: Passage pass = (Passage) q.next();
264: pass.highlightPassage(null);
265: String passtext = pass.getHLValue(true);
266: int[] posns = pass
267: .getPassageWordPositions();
268:
269: if (highlightDescription
270: || highlightPassages) {
271:
272: // We should use the highlighter class here but it can't do html encoding...
273: StringBuffer sb = new StringBuffer();
274: int curpos = 0;
275: String piece = null;
276: sb.append(fldHltags[0]);
277: for (int k = 0; k < posns.length; k += 2) {
278: piece = passtext.substring(curpos,
279: posns[k]);
280: Encoder.htmlEncode(piece, sb);
281: if (k == 0)
282: sb.append(fldHltags[2]);
283: sb.append(fldHltags[4]);
284: piece = passtext.substring(
285: posns[k], posns[k + 1]);
286: Encoder.htmlEncode(piece, sb);
287: sb.append(fldHltags[5]);
288: if (k == (posns.length - 2))
289: sb.append(fldHltags[3]);
290: curpos = posns[k + 1];
291: }
292: piece = passtext.substring(curpos,
293: passtext.length());
294: Encoder.htmlEncode(piece, sb);
295: sb.append(fldHltags[1]);
296:
297: if (includePassages)
298: s.replace("passage-" + fldname,
299: highlightPassages ? sb
300: .toString()
301: : passtext, j);
302:
303: if (highlightDescription) {
304: if (includePassageScoreInHighlight)
305: newDesc.append("["
306: + scoreForm.format(pass
307: .getScore())
308: + "] "); // XXX HTML
309: newDesc.append(sb.toString());
310: if (j < (fldMaxPassages - 1)
311: && q.hasNext())
312: newDesc.append("<br>"); // XXX HTML
313: }
314: }
315:
316: if (includePassageScores) {
317: s.replace("passage-score-" + fldname,
318: scoreForm.format(pass
319: .getScore()), j);
320: }
321:
322: if (includeHighlightPositions) {
323: // The highlight offset info.
324: StringBuffer b1 = new StringBuffer();
325: for (int k = 0; k < posns.length; ++k) {
326: if (k > 0)
327: b1.append(",");
328: b1.append(posns[k]);
329: }
330: s.replace("highlight-positions-"
331: + fldname, b1.toString(), j);
332: }
333:
334: if (includeHitTerms) {
335: // The hit terms.
336: StringBuffer b = new StringBuffer();
337: String[] mt = pass.getMatchingTerms();
338: for (int k = 0; k < mt.length; k++) {
339: String hitTerm = mt[k];
340: if (hitTerm != null) {
341: if (k > 0)
342: b.append(",");
343: b.append(mt[k]);
344: }
345: }
346: s.replace("hit-terms-" + fldname, b
347: .toString(), j);
348: }
349:
350: if (includePassageCount)
351: s.replace("passage-count-" + fldname,
352: String.valueOf(hit
353: .getNPassages()));
354:
355: if (highlightDescription
356: && newDesc.length() > 0)
357: s.replace("hl-" + fldname, newDesc
358: .toString(), j);
359: newDesc.setLength(0);
360:
361: }
362:
363: } // for all fields
364:
365: } // for all passages
366:
367: } // if passage map non-empty - if (m != null)
368:
369: } // if passage count > 0
370:
371: // Check for missing highlights - rename or copy them from the raw values as needed...
372: if (m == null || attrs.contains("hl-description")
373: && !s.contains("hl-description")) {
374: // No passages, eg, * query. Or no body passages, eg, a hit only in a hl'd field.
375: for (Iterator i = attrs.iterator(); i.hasNext();) {
376: String hl = (String) i.next();
377: if (hl.regionMatches(true, 0, "hl-", 0, 3)) {
378: if (s.contains(hl))
379: continue; // we already have the hl
380: String nonhl = hl.substring(3);
381:
382: if (nonhl.equalsIgnoreCase("url")) {
383: // url is special case - not a real attribute (perhaps should be...)
384: s.replace(hl, s.getURL());
385: continue;
386: }
387:
388: AVPair av = s.getAVPair(nonhl);
389: if (av == null)
390: continue; // there is no raw value
391: if (attrs.contains(nonhl)) {
392: // Trouble if client wants nonhl and hl.
393: // We have to resort to a clumsy AVPair clone (string only). XXX
394: AVPair av1 = new AVPair(hl);
395: for (int ix = 0; ix <= av.getMaxIndex(); ++ix)
396: av1.insert(av.getValue(ix), ix);
397: s.replace(av1);
398: } else
399: s.rename(nonhl, hl);
400: }
401: }
402: }
403:
404: // If no highlight summary and no description, generate a summary from body prefix
405: if (summaryFromBody > 0
406: && s.getValue("summary") == null
407: && (!attrs.contains("description") || s
408: .getValue("description") == null)) {
409: String pt = s.getValue("partial-text");
410: if (pt != null)
411: s.replace("description", pt.substring(0,
412: summaryFromBody)
413: + "...");
414: }
415:
416: }
417:
418: private SOIF highlight(Result hit, Set view, String[] hltags) {
419:
420: if (!hlInited) {
421: synchronized (this .getClass()) {
422: hlInit();
423: hlInited = true;
424: }
425: }
426:
427: boolean highlight = false;
428:
429: // Use hl-description view-attr to trigger highlights
430: if (view != null && view.contains("hl-description"))
431: highlight = true;
432:
433: boolean doHighlights = highlightsEnabled && highlight;
434:
435: // Fetch database RD by its key
436: String dbkeyval = (String) hit.getField(dbkey);
437: if (dbkeyval == null) {
438: SearchLogger.getLogger().log(Level.WARNING,
439: "PSSH_CSPSB0032");
440: return null; // XXX - exception?
441: }
442:
443: Set dbview = doHighlights ? null : view; // need complete, original SOIF for highlighting
444: SOIF s = null;
445: try {
446: s = getDb().fetch(getSToken(), dbkeyval, dbview, 0,
447: getTransaction());
448: } catch (RDMException e) {
449: SearchLogger.getLogger().log(Level.WARNING,
450: "PSSH_CSPSB0033", e);
451: }
452: if (s == null) {
453: SearchLogger.getLogger().log(Level.WARNING,
454: "PSSH_CSPSB0034", dbkeyval);
455: return null; // XXX - exception?
456: }
457:
458: String[] highlightTags = (String[]) defaultHighlightTags
459: .clone();
460: String[] fldHighlightTags = (String[]) defaultFldHighlightTags
461: .clone();
462:
463: if (hltags != null && hltags.length >= 6)
464: System.arraycopy(hltags, 0, highlightTags, 0, 6);
465: if (hltags != null && hltags.length == 12)
466: System.arraycopy(hltags, 6, fldHighlightTags, 0, 6);
467:
468: if (doHighlights)
469: buildHighlightInfo(hit, s, view, highlightTags,
470: defaultPassageContext, defaultPassageSize,
471: defaultMaxPassages, fldHighlightTags,
472: defaultFldPassageContext, defaultFldPassageSize,
473: defaultFldMaxPassages, defaultSummaryFromBody);
474:
475: if (view != null) {
476: if (doHighlights) {
477: // remove non-view attrs since we had to fetch the whole RD from the db to highlight it
478: for (Iterator it = s.keySet().iterator(); it.hasNext();) {
479: if (!view.contains(it.next()))
480: it.remove();
481: }
482: }
483: }
484:
485: // XXX it may be better to only return the score if requested
486: if (view == null || view.contains("score"))
487: s.replace("score", ""
488: + Math.round((hit.getScore() * 100.0f)));
489:
490: return s;
491:
492: }
493:
494: static void hlInit() {
495: // set up highlighting
496: String p = SearchConfig.getValue(SearchConfig.HIGHLIGHTS);
497: if (p != null)
498: highlightsEnabled = p.equalsIgnoreCase("true");
499:
500: p = SearchConfig.getValue(SearchConfig.HIGHLIGHT_TAGS);
501: if (p != null) {
502: String[] tags = String2Array.string2Array(p, ',');
503: if (tags != null && tags.length == 6)
504: defaultHighlightTags = tags;
505: }
506:
507: p = SearchConfig.getValue(SearchConfig.PASSAGE_CONTEXT);
508: if (p != null) {
509: int cnt = Integer.parseInt(p);
510: if (cnt > 0)
511: defaultPassageContext = cnt;
512: }
513:
514: p = SearchConfig.getValue(SearchConfig.PASSAGE_SIZE);
515: if (p != null) {
516: int cnt = Integer.parseInt(p);
517: if (cnt > 0)
518: defaultPassageSize = cnt;
519: }
520:
521: p = SearchConfig.getValue(SearchConfig.MAX_PASSAGES);
522: if (p != null) {
523: int cnt = Integer.parseInt(p);
524: if (cnt > 0)
525: defaultMaxPassages = cnt;
526: }
527:
528: p = SearchConfig.getValue(SearchConfig.HIGHLIGHT_TAGS_FIELD);
529: if (p != null) {
530: String[] tags = String2Array.string2Array(p, ',');
531: if (tags != null && tags.length == 6)
532: defaultFldHighlightTags = tags;
533: }
534:
535: p = SearchConfig.getValue(SearchConfig.PASSAGE_CONTEXT_FIELD);
536: if (p != null) {
537: int cnt = Integer.parseInt(p);
538: if (cnt > 0)
539: defaultFldPassageContext = cnt;
540: }
541:
542: p = SearchConfig.getValue(SearchConfig.PASSAGE_SIZE_FIELD);
543: if (p != null) {
544: int cnt = Integer.parseInt(p);
545: if (cnt > 0)
546: defaultFldPassageSize = cnt;
547: }
548:
549: p = SearchConfig.getValue(SearchConfig.MAX_PASSAGES_FIELD);
550: if (p != null) {
551: int cnt = Integer.parseInt(p);
552: if (cnt > 0)
553: defaultFldMaxPassages = cnt;
554: }
555:
556: p = SearchConfig.getValue(SearchConfig.SUMMARY_FROM_BODY);
557: if (p != null) {
558: try {
559: int cnt = Integer.parseInt(p);
560: if (cnt > 0)
561: defaultSummaryFromBody = cnt;
562: } catch (Exception e) {
563: Logger debugLogger1 = SearchLogger.getLogger();
564: debugLogger1.log(Level.WARNING, "PSSH_CSPSB0035",
565: SearchConfig.SUMMARY_FROM_BODY);
566: }
567: }
568: }
569:
570: static DecimalFormat scoreForm = new DecimalFormat("###0.000");
571:
572: protected String dbkey = "URL"; // used for db lookups
573:
574: protected int resultCount; // total number of results in this set (limited to int)
575: protected long hitCount; // total number of hits for this query
576: protected long docCount; // total number of docs searched for this query
577: protected List hits; // Nova hit array
578:
579: // XXX these need to be extracted from view!
580: // XXX move to RDMResultSet? RDMView?
581: static String[] defaultHighlightTags = { "... ", " ...",
582: "<font color=bb0033>", "</font>", "<b>", "</b>" };
583: static int defaultMaxPassages = 3;
584: static int defaultPassageContext = 6; // words
585: static int defaultPassageSize = 500; // chars
586: static String[] defaultFldHighlightTags = { "", "", "", "", "<b>",
587: "</b>" };
588: static int defaultFldMaxPassages = 1;
589: static int defaultFldPassageContext = -1; // words
590: static int defaultFldPassageSize = 150; // chars
591: static int defaultSummaryFromBody = 0; // don't generate a description from the body
592: static boolean highlightsEnabled = true;
593: static boolean hlInited = false; // highlight initialisation
594:
595: }
|