001: package org.contineo.core.searchengine.comparision;
002:
003: import java.util.ArrayList;
004: import java.util.Collection;
005: import java.util.Collections;
006: import java.util.Hashtable;
007: import java.util.Iterator;
008: import java.util.List;
009:
010: import org.contineo.core.document.Term;
011: import org.contineo.core.document.dao.TermDAO;
012: import org.contineo.core.security.Menu;
013: import org.contineo.core.security.dao.MenuDAO;
014: import org.contineo.util.Context;
015:
016: /**
017: * Class for finding similar documents. Created on 21.03.2004
018: *
019: * @author Michael Scholz
020: */
021: public class Searcher {
022:
023: public Searcher() {
024: }
025:
026: /**
027: * This method finds documents, which are similar to a reference document.
028: * All documents are valued by dice-coefficient. dice-coefficient = 2*scalar
029: * product (doc1,doc2) / (absolute value(doc1) + absoulute value(doc2))
030: *
031: * @param docId - ID of the reference document.
032: * @param minScore - Minimum score value (between 0 and 1)
033: * @return Collection of similar documents sorted by score value.
034: */
035: public Collection findSimilarDocuments(int menuId, double minScore,
036: String username) {
037: TermDAO termsDao = (TermDAO) Context.getInstance().getBean(
038: TermDAO.class);
039: Collection basicTerms = termsDao.findByMenuId(menuId);
040:
041: // select all documents having a keyword a the basic document
042: Collection<Term> terms = termsDao.findByStem(menuId);
043: Collection<SearchResult> result = new ArrayList<SearchResult>();
044: Iterator iter = terms.iterator();
045: MenuDAO mdao = (MenuDAO) Context.getInstance().getBean(
046: MenuDAO.class);
047: Collection<Integer> coll2 = mdao.findMenuIdByUserName(username);
048:
049: while (iter.hasNext()) {
050: // calculate the score for ranking
051: Term term = (Term) iter.next();
052:
053: if (coll2.contains(new Integer(term.getMenuId()))) {
054: Collection docTerms = termsDao.findByMenuId(term
055: .getMenuId());
056: double score = calculateScore(basicTerms, docTerms);
057:
058: if (score >= minScore) {
059: SearchResult sres = new SearchResult();
060: Menu menu = mdao.findByPrimaryKey(term.getMenuId());
061: sres.setScore(score);
062: sres.setIcon(menu.getMenuIcon());
063: sres.setMenuId(menu.getMenuId());
064: sres.setName(menu.getMenuText());
065: sres.setPath(menu.getMenuPath());
066: if (!result.contains(sres))
067: result.add(sres);
068: }
069: }
070: }
071:
072: Collections.sort((List<SearchResult>) result,
073: new SearchResultComparator());
074: return result;
075: }
076:
077: private double calculateScore(Collection refTerms, Collection terms) {
078: double score = 0.0d;
079: double abs1 = 0.0d;
080: double abs2 = 0.0d;
081: Hashtable table = convert(terms);
082: Iterator iter = refTerms.iterator();
083:
084: while (iter.hasNext()) {
085: Term term = (Term) iter.next();
086: abs1 += term.getValue() * term.getValue();
087:
088: if (table.containsKey(term.getStem())) {
089: Double value = (Double) table.get(term.getStem());
090: abs2 += value.doubleValue() * value.doubleValue();
091: score += value.doubleValue() * term.getValue();
092: }
093: }
094:
095: return (2 * score) / (abs1 + abs2);
096: }
097:
098: private Hashtable convert(Collection coll) {
099: Hashtable<String, Double> table = new Hashtable<String, Double>(
100: coll.size());
101: Iterator iter = coll.iterator();
102:
103: while (iter.hasNext()) {
104: Term term = (Term) iter.next();
105: table.put(term.getStem(), new Double(term.getValue()));
106: }
107:
108: return table;
109: }
110: }
|