001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. The ASF licenses this file to You
004: * under the Apache License, Version 2.0 (the "License"); you may not
005: * use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License. For additional information regarding
015: * copyright in this work, please see the NOTICE file in the top level
016: * directory of this distribution.
017: */
018:
019: package org.apache.roller.business.hibernate;
020:
021: import java.util.ArrayList;
022: import java.util.Calendar;
023: import java.util.Date;
024: import java.util.Iterator;
025: import java.util.List;
026: import org.apache.commons.lang.StringUtils;
027: import org.apache.commons.logging.Log;
028: import org.apache.commons.logging.LogFactory;
029: import org.hibernate.Criteria;
030: import org.hibernate.Hibernate;
031: import org.hibernate.HibernateException;
032: import org.hibernate.Query;
033: import org.hibernate.Session;
034: import org.hibernate.criterion.Expression;
035: import org.hibernate.criterion.Junction;
036: import org.hibernate.criterion.Order;
037: import org.hibernate.type.Type;
038: import org.apache.roller.RollerException;
039: import org.apache.roller.config.RollerRuntimeConfig;
040: import org.apache.roller.business.referrers.RefererManager;
041: import org.apache.roller.pojos.RefererData;
042: import org.apache.roller.pojos.WeblogEntryData;
043: import org.apache.roller.pojos.WebsiteData;
044: import org.apache.roller.pojos.WebsiteDisplayData;
045: import org.hibernate.dialect.OracleDialect;
046: import org.hibernate.dialect.SQLServerDialect;
047: import org.hibernate.engine.SessionFactoryImplementor;
048: import org.hibernate.dialect.Dialect;
049: import org.apache.roller.business.Roller;
050: import org.apache.roller.business.RollerFactory;
051: import org.apache.roller.business.UserManager;
052: import org.apache.roller.business.WeblogManager;
053: import org.apache.roller.pojos.StatCount;
054: import org.apache.roller.util.DateUtil;
055: import org.apache.roller.util.LinkbackExtractor;
056: import org.apache.roller.util.Utilities;
057:
058: /**
059: * Hibernate implementation of the RefererManager.
060: */
061: public class HibernateRefererManagerImpl implements RefererManager {
062:
063: static final long serialVersionUID = -4966091850482256435L;
064:
065: private static Log log = LogFactory
066: .getLog(HibernateRefererManagerImpl.class);
067:
068: protected static final String DAYHITS = "dayHits";
069: protected static final String TOTALHITS = "totalHits";
070:
071: private HibernatePersistenceStrategy strategy = null;
072: private Date mRefDate = new Date();
073:
074: public HibernateRefererManagerImpl(
075: HibernatePersistenceStrategy strat) {
076:
077: log.debug("Instantiating Hibernate Referer Manager");
078:
079: strategy = strat;
080: }
081:
082: public void saveReferer(RefererData referer) throws RollerException {
083: strategy.store(referer);
084: }
085:
086: public void removeReferer(RefererData referer)
087: throws RollerException {
088: strategy.remove(referer);
089: }
090:
091: /**
092: * Clear referrer dayhits and remove referrers without excerpts.
093: *
094: * TODO: do we really need dialect specific queries?
095: */
096: public void clearReferrers() throws RollerException {
097:
098: if (log.isDebugEnabled()) {
099: log.debug("clearReferrers");
100: }
101: try {
102: Session session = ((HibernatePersistenceStrategy) strategy)
103: .getSession();
104: Dialect currentDialect = ((SessionFactoryImplementor) session
105: .getSessionFactory()).getDialect();
106: String reset = "update RefererData set dayHits=0";
107: session.createQuery(reset).executeUpdate();
108: String delete = null;
109: if (currentDialect instanceof SQLServerDialect
110: || currentDialect instanceof OracleDialect) {
111: delete = "delete RefererData where excerpt is null or excerpt like ''";
112: } else {
113: delete = "delete RefererData where excerpt is null or excerpt=''";
114: }
115: session.createQuery(delete).executeUpdate();
116: } catch (Exception e) {
117: log.error("EXCEPTION resetting referers", e);
118: }
119: }
120:
121: /**
122: * Clear referrer dayhits and remove referrers without excerpts.
123: *
124: * TODO: do we really need dialect specific queries?
125: */
126: public void clearReferrers(WebsiteData website)
127: throws RollerException {
128:
129: if (log.isDebugEnabled()) {
130: log.debug("clearReferrers");
131: }
132: try {
133: Session session = ((HibernatePersistenceStrategy) strategy)
134: .getSession();
135: Dialect currentDialect = ((SessionFactoryImplementor) session
136: .getSessionFactory()).getDialect();
137: String reset = "update RefererData set dayHits=0 where website=:site";
138: session.createQuery(reset).setParameter("site", website)
139: .executeUpdate();
140: String delete = null;
141: if (currentDialect instanceof SQLServerDialect
142: || currentDialect instanceof OracleDialect) {
143: delete = "delete RefererData where website=:site and (excerpt is null or excerpt like '')";
144: } else {
145: delete = "delete RefererData where website=:site and (excerpt is null or excerpt='')";
146: }
147: session.createQuery(delete).setParameter("site", website)
148: .executeUpdate();
149: } catch (Exception e) {
150: log.error("EXCEPTION resetting referers", e);
151: }
152: }
153:
154: /**
155: * Apply ignoreWord/spam filters to all referers in system.
156: */
157: public void applyRefererFilters() throws RollerException {
158:
159: try {
160: Session session = ((HibernatePersistenceStrategy) strategy)
161: .getSession();
162: Criteria criteria = session
163: .createCriteria(RefererData.class);
164:
165: String spamwords = RollerRuntimeConfig
166: .getProperty("spam.blacklist");
167:
168: String[] blacklist = StringUtils.split(StringUtils
169: .deleteWhitespace(spamwords), ",");
170: Junction or = Expression.disjunction();
171: for (int i = 0; i < blacklist.length; i++) {
172: String ignoreWord = blacklist[i].trim();
173: //log.debug("including ignore word - "+ignoreWord);
174: or.add(Expression.ilike("refererUrl", "%" + ignoreWord
175: + "%"));
176: }
177: criteria.add(Expression.conjunction().add(
178: Expression.disjunction().add(
179: Expression.isNull("excerpt")).add(
180: Expression.eq("excerpt", ""))).add(or));
181:
182: log.debug("removing spam referers - "
183: + criteria.list().size());
184:
185: Iterator referer = criteria.list().iterator();
186: while (referer.hasNext()) {
187: this .strategy.remove((RefererData) referer.next());
188: }
189:
190: } catch (HibernateException e) {
191: throw new RollerException(e);
192: }
193: }
194:
195: /**
196: * Apply ignoreWord/spam filters to all referers in website.
197: */
198: public void applyRefererFilters(WebsiteData website)
199: throws RollerException {
200:
201: if (null == website)
202: throw new RollerException("website is null");
203: if (null == website.getBlacklist())
204: return;
205:
206: try {
207: Session session = ((HibernatePersistenceStrategy) strategy)
208: .getSession();
209: Criteria criteria = session
210: .createCriteria(RefererData.class);
211:
212: String[] blacklist = StringUtils.split(StringUtils
213: .deleteWhitespace(website.getBlacklist()), ",");
214: if (blacklist.length == 0)
215: return;
216:
217: Junction or = Expression.disjunction();
218: for (int i = 0; i < blacklist.length; i++) {
219: String ignoreWord = blacklist[i].trim();
220: or.add(Expression.ilike("refererUrl", "%" + ignoreWord
221: + "%"));
222: }
223: criteria.add(Expression.conjunction().add(
224: Expression.disjunction().add(
225: Expression.isNull("excerpt")).add(
226: Expression.eq("excerpt", ""))).add(
227: Expression.eq("website", website)).add(or));
228:
229: Iterator referer = criteria.list().iterator();
230: while (referer.hasNext()) {
231: this .strategy.remove((RefererData) referer.next());
232: }
233:
234: } catch (HibernateException e) {
235: throw new RollerException(e);
236: }
237: }
238:
239: /**
240: * Use Hibernate directly because Roller's Query API does too much allocation.
241: */
242: protected List getExistingReferers(WebsiteData website,
243: String dateString, String permalink) throws RollerException {
244:
245: try {
246: Session session = ((HibernatePersistenceStrategy) strategy)
247: .getSession();
248: Criteria criteria = session
249: .createCriteria(RefererData.class);
250: criteria.add(Expression.conjunction().add(
251: Expression.eq("website", website)).add(
252: Expression.eq("dateString", dateString)).add(
253: Expression.eq("refererPermalink", permalink)));
254:
255: return criteria.list();
256: } catch (HibernateException e) {
257: throw new RollerException(e);
258: }
259: }
260:
261: /**
262: * Use Hibernate directly because Roller's Query API does too much allocation.
263: */
264: protected List getMatchingReferers(WebsiteData website,
265: String requestUrl, String refererUrl)
266: throws RollerException {
267:
268: try {
269: Session session = ((HibernatePersistenceStrategy) strategy)
270: .getSession();
271: Criteria criteria = session
272: .createCriteria(RefererData.class);
273: criteria.add(Expression.conjunction().add(
274: Expression.eq("website", website)).add(
275: Expression.eq("requestUrl", requestUrl)).add(
276: Expression.eq("refererUrl", refererUrl)));
277:
278: return criteria.list();
279: } catch (HibernateException e) {
280: throw new RollerException(e);
281: }
282: }
283:
284: /**
285: * Returns hot weblogs as StatCount objects, in descending order by today's hits.
286: */
287: public List getHotWeblogs(int sinceDays, int offset, int length)
288: throws RollerException {
289: // TODO: ATLAS getDaysPopularWebsites DONE TESTED
290: String msg = "Getting hot weblogs";
291: ArrayList result = new ArrayList();
292: Calendar cal = Calendar.getInstance();
293: cal.setTime(new Date());
294: cal.add(Calendar.DATE, -1 * sinceDays);
295: Date startDate = cal.getTime();
296: try {
297: Session session = ((HibernatePersistenceStrategy) strategy)
298: .getSession();
299: Query query = session
300: .createQuery("select sum(r.dayHits) as s, w.id, w.name, w.handle "
301: + "from WebsiteData w, RefererData r "
302: + "where r.website=w and w.enabled=true and w.active=true and w.lastModified > :startDate "
303: + "group by w.name, w.handle, w.id order by col_0_0_ desc");
304: query.setParameter("startDate", startDate);
305:
306: // +"group by w.name, w.handle, w.id order by s desc");
307: // The above would be *much* better but "HQL parser does not
308: // resolve alias in ORDER BY clause" (See Hibernate issue HHH-892)
309:
310: if (offset != 0) {
311: query.setFirstResult(offset);
312: }
313: if (length != -1) {
314: query.setMaxResults(length);
315: }
316: Iterator rawResults = query.list().iterator();
317: for (Iterator it = query.list().iterator(); it.hasNext();) {
318: Object[] row = (Object[]) it.next();
319: Number hits = (Number) row[0];
320: String websiteId = (String) row[1];
321: String websiteName = (String) row[2];
322: String websiteHandle = (String) row[3];
323: result.add(new StatCount(websiteId, websiteHandle,
324: websiteName, "statCount.weblogDayHits", hits
325: .longValue()));
326: }
327: return result;
328:
329: } catch (Throwable pe) {
330: log.error(msg, pe);
331: throw new RollerException(msg, pe);
332: }
333: }
334:
335: /**
336: * @deprecated Replaced by getHotWeblogs().
337: */
338: public List getDaysPopularWebsites(int offset, int length)
339: throws RollerException {
340: // TODO: ATLAS getDaysPopularWebsites DONE TESTED
341: String msg = "Getting popular websites";
342: ArrayList result = new ArrayList();
343: try {
344: Session session = ((HibernatePersistenceStrategy) strategy)
345: .getSession();
346: Query query = session
347: .createQuery("select sum(r.dayHits) as s, w.id, w.name, w.handle "
348: + "from WebsiteData w, RefererData r "
349: + "where r.website=w and w.enabled=true and w.active=true "
350: + "group by w.name, w.handle, w.id order by col_0_0_ desc");
351:
352: // +"group by w.name, w.handle, w.id order by s desc");
353: // The above would be *much* better but "HQL parser does not
354: // resolve alias in ORDER BY clause" (See Hibernate issue HHH-892)
355:
356: if (offset != 0) {
357: query.setFirstResult(offset);
358: }
359: if (length != -1) {
360: query.setMaxResults(length);
361: }
362: Iterator rawResults = query.list().iterator();
363: for (Iterator it = query.list().iterator(); it.hasNext();) {
364: Object[] row = (Object[]) it.next();
365: Number hits = (Number) row[0];
366: String websiteId = (String) row[1];
367: String websiteName = (String) row[2];
368: String websiteHandle = (String) row[3];
369: result.add(new WebsiteDisplayData(websiteId,
370: websiteName, websiteHandle, new Integer(hits
371: .intValue())));
372: }
373: return result;
374:
375: } catch (Throwable pe) {
376: log.error(msg, pe);
377: throw new RollerException(msg, pe);
378: }
379: }
380:
381: /**
382: * Use raw SQL because Hibernate can't handle the query.
383: */
384: protected int getHits(WebsiteData website, String type)
385: throws RollerException {
386: int hits = 0;
387: if (log.isDebugEnabled()) {
388: log.debug("getHits: " + website.getName());
389: }
390:
391: Object[] args = { Boolean.TRUE, website.getId() };
392: Type[] types = { Hibernate.BOOLEAN, Hibernate.STRING };
393:
394: // For a query like this, Hibernate returns a list of lists
395: Session session = ((HibernatePersistenceStrategy) strategy)
396: .getSession();
397: List results;
398: try {
399: // begin transaction
400: this .strategy.getSession().beginTransaction();
401:
402: Query q = session
403: .createQuery("select sum(h.dayHits),sum(h.totalHits) from h in class "
404: + "org.apache.roller.pojos.RefererData "
405: + "where h.website.enabled=? and h.website.id=? ");
406: q.setParameters(args, types);
407: results = q.list();
408: } catch (HibernateException e) {
409: throw new RollerException(e);
410: }
411: Object[] resultsArray = (Object[]) results.get(0);
412:
413: if (resultsArray.length > 0 && type.equals(DAYHITS)) {
414: if (resultsArray[0] != null) {
415: hits = ((Number) resultsArray[0]).intValue();
416: }
417: } else if (resultsArray.length > 0) {
418: if (resultsArray[0] != null) {
419: hits = ((Number) resultsArray[1]).intValue();
420: }
421: } else {
422: hits = 0;
423: }
424:
425: return hits;
426: }
427:
428: /**
429: * @see org.apache.roller.pojos.RefererManager#getReferers(java.lang.String)
430: */
431: public List getReferers(WebsiteData website) throws RollerException {
432: if (website == null)
433: throw new RollerException("website is null");
434:
435: try {
436: Session session = ((HibernatePersistenceStrategy) strategy)
437: .getSession();
438: Criteria criteria = session
439: .createCriteria(RefererData.class);
440: criteria.add(Expression.eq("website", website));
441: criteria.addOrder(Order.desc("totalHits"));
442:
443: return criteria.list();
444: } catch (HibernateException e) {
445: throw new RollerException(e);
446: }
447: }
448:
449: /**
450: * @see org.apache.roller.pojos.RefererManager#getTodaysReferers(String)
451: */
452: public List getTodaysReferers(WebsiteData website)
453: throws RollerException {
454: if (website == null)
455: throw new RollerException("website is null");
456:
457: try {
458: Session session = ((HibernatePersistenceStrategy) strategy)
459: .getSession();
460: Criteria criteria = session
461: .createCriteria(RefererData.class);
462: criteria.add(Expression.eq("website", website));
463: criteria.add(Expression.gt("dayHits", new Integer(0)));
464: criteria.addOrder(Order.desc("dayHits"));
465:
466: return criteria.list();
467: } catch (HibernateException e) {
468: throw new RollerException(e);
469: }
470: }
471:
472: /**
473: * Returns referers for a specified day. Duplicate enties are not
474: * included in this list so the hit counts may not be accurate.
475: * @see org.apache.roller.pojos.RefererManager#getReferersToDate(
476: * org.apache.roller.pojos.WebsiteData, java.lang.String)
477: */
478: public List getReferersToDate(WebsiteData website, String date)
479: throws RollerException {
480: if (website == null)
481: throw new RollerException("website is null");
482:
483: if (date == null)
484: throw new RollerException("Date is null");
485:
486: try {
487: Session session = ((HibernatePersistenceStrategy) strategy)
488: .getSession();
489: Criteria criteria = session
490: .createCriteria(RefererData.class);
491: criteria.add(Expression.eq("website", website));
492: criteria.add(Expression.eq("dateString", date));
493: criteria.add(Expression.eq("duplicate", Boolean.FALSE));
494: criteria.addOrder(Order.desc("totalHits"));
495:
496: return criteria.list();
497: } catch (HibernateException e) {
498: throw new RollerException(e);
499: }
500: }
501:
502: /**
503: * @see org.apache.roller.pojos.RefererManager#getReferersToEntry(
504: * java.lang.String, java.lang.String)
505: */
506: public List getReferersToEntry(String entryid)
507: throws RollerException {
508: if (null == entryid)
509: throw new RollerException("entryid is null");
510:
511: try {
512: Session session = ((HibernatePersistenceStrategy) strategy)
513: .getSession();
514: Criteria criteria = session
515: .createCriteria(RefererData.class);
516: criteria.createAlias("weblogEntry", "e");
517:
518: criteria.add(Expression.eq("e.id", entryid));
519: criteria.add(Expression.isNotNull("title"));
520: criteria.add(Expression.isNotNull("excerpt"));
521:
522: criteria.addOrder(Order.desc("totalHits"));
523:
524: return criteria.list();
525: } catch (HibernateException e) {
526: throw new RollerException(e);
527: }
528: }
529:
530: /**
531: * Query for collection of referers.
532: */
533: protected List getReferersToWebsite(WebsiteData website,
534: String refererUrl) throws RollerException {
535:
536: try {
537: Session session = ((HibernatePersistenceStrategy) strategy)
538: .getSession();
539: Criteria criteria = session
540: .createCriteria(RefererData.class);
541: criteria.add(Expression.eq("website", website));
542: criteria.add(Expression.eq("refererUrl", refererUrl));
543:
544: return criteria.list();
545: } catch (HibernateException e) {
546: throw new RollerException(e);
547: }
548: }
549:
550: /**
551: * Query for collection of referers.
552: */
553: protected List getReferersWithSameTitle(WebsiteData website,
554: String requestUrl, String title, String excerpt)
555: throws RollerException {
556:
557: try {
558: Session session = ((HibernatePersistenceStrategy) strategy)
559: .getSession();
560: Criteria criteria = session
561: .createCriteria(RefererData.class);
562:
563: Junction conjunction = Expression.conjunction();
564: conjunction.add(Expression.eq("website", website));
565: conjunction.add(Expression.eq("requestUrl", requestUrl));
566:
567: Junction disjunction = Expression.conjunction();
568: disjunction.add(Expression.eq("title", title));
569: disjunction.add(Expression.eq("excerpt", excerpt));
570:
571: criteria.add(conjunction);
572: criteria.add(disjunction);
573:
574: return criteria.list();
575: } catch (HibernateException e) {
576: throw new RollerException(e);
577: }
578: }
579:
580: public int getDayHits(WebsiteData website) throws RollerException {
581: return getHits(website, DAYHITS);
582: }
583:
584: public int getTotalHits(WebsiteData website) throws RollerException {
585: return getHits(website, TOTALHITS);
586: }
587:
588: /**
589: * @see org.apache.roller.pojos.RefererManager#retrieveReferer(java.lang.String)
590: */
591: public RefererData getReferer(String id) throws RollerException {
592: return (RefererData) strategy.load(id, RefererData.class);
593: }
594:
595: public void processReferrer(String requestUrl, String referrerUrl,
596: String weblogHandle, String entryAnchor, String dateString) {
597:
598: log.debug("processing referrer [" + referrerUrl
599: + "] accessing [" + requestUrl + "]");
600:
601: if (weblogHandle == null)
602: return;
603:
604: String selfSiteFragment = "/" + weblogHandle;
605: WebsiteData weblog = null;
606: WeblogEntryData entry = null;
607:
608: // lookup the weblog now
609: try {
610: UserManager userMgr = RollerFactory.getRoller()
611: .getUserManager();
612: weblog = userMgr.getWebsiteByHandle(weblogHandle);
613: if (weblog == null)
614: return;
615:
616: // now lookup weblog entry if possible
617: if (entryAnchor != null) {
618: WeblogManager weblogMgr = RollerFactory.getRoller()
619: .getWeblogManager();
620: entry = weblogMgr.getWeblogEntryByAnchor(weblog,
621: entryAnchor);
622: }
623: } catch (RollerException re) {
624: // problem looking up website, gotta bail
625: log.error("Error looking up website object", re);
626: return;
627: }
628:
629: try {
630: List matchRef = null;
631:
632: // try to find existing RefererData for referrerUrl
633: if (referrerUrl == null || referrerUrl.trim().length() < 8) {
634: referrerUrl = "direct";
635:
636: // Get referer specified by referer URL of direct
637: matchRef = getReferersToWebsite(weblog, referrerUrl);
638: } else {
639: referrerUrl = Utilities.stripJsessionId(referrerUrl);
640:
641: // Query for referer with same referer and request URLs
642: matchRef = getMatchingReferers(weblog, requestUrl,
643: referrerUrl);
644:
645: // If referer was not found, try adding or leaving off 'www'
646: if (matchRef.size() == 0) {
647: String secondTryUrl = null;
648: if (referrerUrl.startsWith("http://www")) {
649: secondTryUrl = "http://"
650: + referrerUrl.substring(11);
651: } else {
652: secondTryUrl = "http://www"
653: + referrerUrl.substring(7);
654: }
655:
656: matchRef = getMatchingReferers(weblog, requestUrl,
657: secondTryUrl);
658: if (matchRef.size() == 1) {
659: referrerUrl = secondTryUrl;
660: }
661: }
662: }
663:
664: if (matchRef.size() == 1) {
665: // Referer was found in database, so bump up hit count
666: RefererData ref = (RefererData) matchRef.get(0);
667:
668: ref.setDayHits(new Integer(
669: ref.getDayHits().intValue() + 1));
670: ref.setTotalHits(new Integer(ref.getTotalHits()
671: .intValue() + 1));
672:
673: log
674: .debug("Incrementing hit count on existing referer: "
675: + referrerUrl);
676:
677: saveReferer(ref);
678:
679: } else if (matchRef.size() == 0) {
680:
681: // Referer was not found in database, so new Referer object
682: Integer one = new Integer(1);
683: RefererData ref = new RefererData(null, weblog, entry,
684: dateString, referrerUrl, null, requestUrl,
685: null, "", // Read comment above regarding Derby bug
686: Boolean.FALSE, Boolean.FALSE, one, one);
687:
688: if (log.isDebugEnabled()) {
689: log.debug("newReferer=" + ref.getRefererUrl());
690: }
691:
692: String refurl = ref.getRefererUrl();
693:
694: // If not a direct or search engine then search for linkback
695: boolean doLinkbackExtraction = RollerRuntimeConfig
696: .getBooleanProperty("site.linkbacks.enabled");
697: if (doLinkbackExtraction
698: && entry != null
699: && !refurl.equals("direct")
700: && !refurl.startsWith("http://google")
701: && !refurl.startsWith("http://www.google")
702: && !refurl.startsWith("http://search.netscape")
703: && !refurl.startsWith("http://www.blinkpro")
704: && !refurl.startsWith("http://search.msn")
705: && !refurl.startsWith("http://search.yahoo")
706: && !refurl.startsWith("http://uk.search.yahoo")
707: && !refurl
708: .startsWith("http://www.javablogs.com")
709: && !refurl.startsWith("http://www.teoma")) {
710: // Launch thread to extract referer linkback
711:
712: try {
713: Roller mRoller = RollerFactory.getRoller();
714: mRoller.getThreadManager().executeInBackground(
715: new LinkbackExtractorRunnable(ref));
716: } catch (InterruptedException e) {
717: log
718: .warn(
719: "Interrupted during linkback extraction",
720: e);
721: }
722: } else {
723: saveReferer(ref);
724: }
725: }
726: } catch (RollerException pe) {
727: log.error(pe);
728: } catch (NullPointerException npe) {
729: log.error(npe);
730: }
731: }
732:
733: /**
734: * Use LinkbackExtractor to parse title and excerpt from referer
735: */
736: class LinkbackExtractorRunnable implements Runnable {
737:
738: private RefererData mReferer = null;
739:
740: public LinkbackExtractorRunnable(RefererData referer) {
741: mReferer = referer;
742: }
743:
744: public void run() {
745:
746: try {
747: LinkbackExtractor lb = new LinkbackExtractor(mReferer
748: .getRefererUrl(), mReferer.getRequestUrl());
749:
750: if (lb.getTitle() != null && lb.getExcerpt() != null) {
751: mReferer.setTitle(lb.getTitle());
752: mReferer.setExcerpt(lb.getExcerpt());
753:
754: if (lb.getPermalink() != null) {
755: // The presence of a permalink indicates that this
756: // linkback was parsed out of an RSS feed and is
757: // presumed to be a good linkback.
758:
759: mReferer.setRefererPermalink(lb.getPermalink());
760:
761: // See if this request/permalink is in the DB
762: List matchRef = getExistingReferers(mReferer
763: .getWebsite(),
764: mReferer.getDateString(), mReferer
765: .getRefererPermalink());
766:
767: // If it is the first, then set it to be visible
768: if (matchRef.size() == 0) {
769: mReferer.setVisible(Boolean.TRUE);
770: } else {
771: // We can't throw away duplicates or we will
772: // end up reparsing them everytime a hit comes
773: // in from one of them, but we can mark them
774: // as duplicates.
775: mReferer.setDuplicate(Boolean.TRUE);
776: }
777:
778: saveReferer(mReferer);
779:
780: }
781:
782: else {
783: // Store the new referer
784: saveReferer(mReferer);
785:
786: // Hacky Referer URL weighting kludge:
787: //
788: // If there are multple referers to a request URL,
789: // then we want to pick the best one. The others
790: // are marked as duplicates. To do this we use a
791: // weight. The weight formula is:
792: //
793: // w = URL length + (100 if URL contains anchor)
794:
795: // LOOP: find the referer with the highest weight
796: Boolean visible = Boolean.FALSE;
797: List refs = getReferersWithSameTitle(mReferer
798: .getWebsite(),
799: mReferer.getRequestUrl(),
800: lb.getTitle(), lb.getExcerpt());
801: RefererData chosen = null;
802: int maxweight = 0;
803: for (Iterator rdItr = refs.iterator(); rdItr
804: .hasNext();) {
805: RefererData referer = (RefererData) rdItr
806: .next();
807:
808: int weight = referer.getRefererUrl()
809: .length();
810: if (referer.getRefererUrl().indexOf('#') != -1) {
811: weight += 100;
812: }
813:
814: if (weight > maxweight) {
815: chosen = referer;
816: maxweight = weight;
817: }
818:
819: if (referer.getVisible().booleanValue()) {
820: // If any are visible then chosen
821: // replacement must be visible as well.
822: visible = Boolean.TRUE;
823: }
824:
825: }
826:
827: // LOOP: to mark all of the lower weight ones
828: // as duplicates
829: for (Iterator rdItr = refs.iterator(); rdItr
830: .hasNext();) {
831: RefererData referer = (RefererData) rdItr
832: .next();
833:
834: if (referer != chosen) {
835: referer.setDuplicate(Boolean.TRUE);
836: } else {
837: referer.setDuplicate(Boolean.FALSE);
838: referer.setVisible(visible);
839: }
840: saveReferer(referer);
841: }
842:
843: }
844: } else {
845: // It is not a linkback, but store it anyway
846: saveReferer(mReferer);
847:
848: log.info("No excerpt found at refering URL "
849: + mReferer.getRefererUrl());
850: }
851: } catch (Exception e) {
852: log.error("Processing linkback", e);
853: } finally {
854: strategy.release();
855: }
856:
857: }
858:
859: }
860:
861: public void release() {
862: }
863: }
|