001: // yacyNewsActions.java
002: // -----------------------
003: // part of YaCy
004: // (C) by Michael Peter Christen; mc@anomic.de
005: // first published on http://www.anomic.de
006: // Frankfurt, Germany, 2005
007: //
008: // $LastChangedDate: 2008-01-24 22:49:00 +0000 (Do, 24 Jan 2008) $
009: // $LastChangedRevision: 4397 $
010: // $LastChangedBy: orbiter $
011: //
012: // This program is free software; you can redistribute it and/or modify
013: // it under the terms of the GNU General Public License as published by
014: // the Free Software Foundation; either version 2 of the License, or
015: // (at your option) any later version.
016: //
017: // This program is distributed in the hope that it will be useful,
018: // but WITHOUT ANY WARRANTY; without even the implied warranty of
019: // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
020: // GNU General Public License for more details.
021: //
022: // You should have received a copy of the GNU General Public License
023: // along with this program; if not, write to the Free Software
024: // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
025: //
026: // Using this software in any meaning (reading, learning, copying, compiling,
027: // running) means that you agree that the Author(s) is (are) not responsible
028: // for cost, loss of data or any harm that may be caused directly or indirectly
029: // by usage of this softare or this documentation. The usage of this software
030: // is on your own risk. The installation and usage (starting/running) of this
031: // software may allow other people or application to access your computer and
032: // any attached devices and is highly dependent on the configuration of the
033: // software which must be done by the user of the software; the author(s) is
034: // (are) also not responsible for proper configuration and usage of the
035: // software, even if provoked by documentation provided together with
036: // the software.
037: //
038: // Any changes to this file according to the GPL as documented in the file
039: // gpl.txt aside this file in the shipment you received can be done to the
040: // lines that follows this copyright notice here, but changes must not be
041: // done inside the copyright notice above. A re-distribution must contain
042: // the intact and unchanged copyright notice.
043: // Contributions and changes to the program code must be marked as such.
044:
045: package de.anomic.yacy;
046:
047: import java.io.File;
048: import java.io.IOException;
049: import java.util.HashSet;
050: import java.util.Iterator;
051: import java.util.Map;
052:
053: import de.anomic.plasma.plasmaSwitchboard;
054: import de.anomic.plasma.urlPattern.plasmaURLPattern;
055:
056: public class yacyNewsPool {
057:
058: public static final int INCOMING_DB = 0;
059: public static final int PROCESSED_DB = 1;
060: public static final int OUTGOING_DB = 2;
061: public static final int PUBLISHED_DB = 3;
062:
063: /* ========================================================================
064: * CATEGORIES for YACY NEWS
065: * ======================================================================== */
066: /* ------------------------------------------------------------------------
067: * PROFILE related CATEGORIES
068: * ------------------------------------------------------------------------ */
069: public static final String CATEGORY_PROFILE = "prfl";
070: /**
071: * a profile entry was updated (implemented)
072: */
073: public static final String CATEGORY_PROFILE_UPDATE = "prfleupd";
074: /**
075: * a peer starts up and renews its profile broadcast; used to implement supporter page
076: */
077: public static final String CATEGORY_PROFILE_BROADCAST = "prflecst";
078: /**
079: * a peer has done something good (i.e. served good search results)
080: * and gets a positive vote so it can rise on the supporter page
081: */
082: public static final String CATEGORY_PROFILE_VOTE_GOOD = "prflegvt";
083: /**
084: * a peer has done something bad (i.e. spammed) and gets a negative vote
085: */
086: public static final String CATEGORY_PROFILE_VOTE_BAD = "prflebvt";
087:
088: /* ------------------------------------------------------------------------
089: * CRAWLING related CATEGORIES
090: * ------------------------------------------------------------------------ */
091: public static final String CATEGORY_CRAWL = "crwl";
092: /**
093: * a crawl with remote indexing was startet
094: */
095: public static final String CATEGORY_CRAWL_START = "crwlstrt";
096: /**
097: * a crawl with remote indexing was stopped
098: */
099: public static final String CATEGORY_CRAWL_STOP = "crwlstop";
100: /**
101: * a comment on a crawl with remote indexing
102: */
103: public static final String CATEGORY_CRAWL_COMMENT = "crwlcomm";
104:
105: /* ------------------------------------------------------------------------
106: * BLACKLIST related CATEGORIES
107: * ------------------------------------------------------------------------ */
108: public static final String CATEGORY_BLACKLIST = "blckl";
109: /**
110: * a public blacklist entry was added
111: */
112: public static final String CATEGORY_BLACKLIST_ADD = "blckladd";
113: /**
114: * a vote and comment on a public blacklist add
115: */
116: public static final String CATEGORY_BLACKLIST_VOTE_ADD = "blcklavt";
117: /**
118: * a public blacklist entry was deleted
119: */
120: public static final String CATEGORY_BLACKLIST_DELETE = "blckldel";
121: /**
122: * a vote and comment on a public blacklist delete
123: */
124: public static final String CATEGORY_BLACKLIST_VOTE_DEL = "blckldvt";
125:
126: /* ------------------------------------------------------------------------
127: * FLIE-SHARE related CATEGORIES
128: * ------------------------------------------------------------------------ */
129: public static final String CATEGORY_FILESHARE = "flshr";
130: /**
131: * a file was added to the file share
132: */
133: public static final String CATEGORY_FILESHARE_ADD = "flshradd";
134: /**
135: * a file was added to the file share
136: */
137: public static final String CATEGORY_FILESHARE_DEL = "flshrdel";
138: /**
139: * a comment to a file share entry
140: */
141: public static final String CATEGORY_FILESHARE_COMMENT = "flshrcom";
142:
143: /* ------------------------------------------------------------------------
144: * BOOKMARK related CATEGORIES
145: * ------------------------------------------------------------------------ */
146: public static final String CATEGORY_BOOKMARK = "bkmrk";
147: /**
148: * a bookmark was added/created
149: */
150: public static final String CATEGORY_BOOKMARK_ADD = "bkmrkadd";
151: /**
152: * a vote and comment on a bookmark add
153: */
154: public static final String CATEGORY_BOOKMARK_VOTE_ADD = "bkmrkavt";
155: /**
156: * a bookmark was moved
157: */
158: public static final String CATEGORY_BOOKMARK_MOVE = "bkmrkmov";
159: /**
160: * a vote and comment on a bookmark move
161: */
162: public static final String CATEGORY_BOOKMARK_VOTE_MOVE = "bkmrkmvt";
163: /**
164: * a bookmark was deleted
165: */
166: public static final String CATEGORY_BOOKMARK_DEL = "bkmrkdel";
167: /**
168: * a vote and comment on a bookmark delete
169: */
170: public static final String CATEGORY_BOOKMARK_VOTE_DEL = "bkmrkdvt";
171:
172: /* ------------------------------------------------------------------------
173: * SURFTIPP related CATEGORIES
174: * ------------------------------------------------------------------------ */
175: public static final String CATEGORY_SURFTIPP = "stipp";
176: /**
177: * a surf tipp was added
178: */
179: public static final String CATEGORY_SURFTIPP_ADD = "stippadd";
180: /**
181: * a vote and comment on a surf tipp
182: */
183: public static final String CATEGORY_SURFTIPP_VOTE_ADD = "stippavt";
184:
185: /* ------------------------------------------------------------------------
186: * WIKI related CATEGORIES
187: * ------------------------------------------------------------------------ */
188: public static final String CATEGORY_WIKI = "wiki";
189: /**
190: * a wiki page was updated
191: */
192: public static final String CATEGORY_WIKI_UPDATE = "wiki_upd";
193: /**
194: * a wiki page das deleted
195: */
196: public static final String CATEGORY_WIKI_DEL = "wiki_del";
197:
198: /* ------------------------------------------------------------------------
199: * BLOG related CATEGORIES
200: * ------------------------------------------------------------------------ */
201: public static final String CATEGORY_BLOG = "blog";
202: /**
203: * a blog entry was added
204: */
205: public static final String CATEGORY_BLOG_ADD = "blog_add";
206: /**
207: * a blog page das deleted
208: */
209: public static final String CATEGORY_BLOG_DEL = "blog_del";
210:
211: /* ========================================================================
212: * ARRAY of valid CATEGORIES
213: * ======================================================================== */
214: public static final String[] category = {
215: // PROFILE related CATEGORIES
216: CATEGORY_PROFILE_UPDATE,
217: CATEGORY_PROFILE_BROADCAST,
218: CATEGORY_PROFILE_VOTE_GOOD,
219: CATEGORY_PROFILE_VOTE_BAD,
220:
221: // CRAWLING related CATEGORIES
222: CATEGORY_CRAWL_START,
223: CATEGORY_CRAWL_STOP,
224: CATEGORY_CRAWL_COMMENT,
225:
226: // BLACKLIST related CATEGORIES
227: CATEGORY_BLACKLIST_ADD,
228: CATEGORY_BLACKLIST_VOTE_ADD,
229: CATEGORY_BLACKLIST_DELETE,
230: CATEGORY_BLACKLIST_VOTE_DEL,
231:
232: // FILESHARE related CATEGORIES
233: CATEGORY_FILESHARE_ADD,
234: CATEGORY_FILESHARE_DEL,
235: CATEGORY_FILESHARE_COMMENT,
236:
237: // BOOKMARK related CATEGORIES
238: CATEGORY_BOOKMARK_ADD, CATEGORY_BOOKMARK_VOTE_ADD,
239: CATEGORY_BOOKMARK_MOVE, CATEGORY_BOOKMARK_VOTE_MOVE,
240: CATEGORY_BOOKMARK_DEL, CATEGORY_BOOKMARK_VOTE_DEL,
241:
242: // SURFTIPP related CATEGORIES
243: CATEGORY_SURFTIPP_ADD, CATEGORY_SURFTIPP_VOTE_ADD,
244:
245: // WIKI related CATEGORIE
246: CATEGORY_WIKI_UPDATE, CATEGORY_WIKI_DEL,
247:
248: // BLOG related CATEGORIES
249: CATEGORY_BLOG_ADD, CATEGORY_BLOG_DEL };
250: public static HashSet<String> categories;
251: static {
252: categories = new HashSet<String>();
253: for (int i = 0; i < category.length; i++)
254: categories.add(category[i]);
255: }
256:
257: private yacyNewsDB newsDB;
258: private yacyNewsQueue outgoingNews, publishedNews, incomingNews,
259: processedNews;
260: private int maxDistribution;
261:
262: public yacyNewsPool(File yacyDBPath) {
263: newsDB = new yacyNewsDB(new File(yacyDBPath, "news2.db"));
264: outgoingNews = new yacyNewsQueue(new File(yacyDBPath,
265: "newsOut1.stack"), newsDB);
266: publishedNews = new yacyNewsQueue(new File(yacyDBPath,
267: "newsPublished1.stack"), newsDB);
268: incomingNews = new yacyNewsQueue(new File(yacyDBPath,
269: "newsIn1.stack"), newsDB);
270: processedNews = new yacyNewsQueue(new File(yacyDBPath,
271: "newsProcessed1.stack"), newsDB);
272: maxDistribution = 30;
273: }
274:
275: public synchronized void close() {
276: newsDB.close();
277: outgoingNews.close();
278: publishedNews.close();
279: incomingNews.close();
280: processedNews.close();
281: }
282:
283: public int dbSize() {
284: return newsDB.size();
285: }
286:
287: public Iterator<yacyNewsRecord> recordIterator(int dbKey, boolean up) {
288: // returns an iterator of yacyNewsRecord-type objects
289: yacyNewsQueue queue = switchQueue(dbKey);
290: return queue.records(up);
291: }
292:
293: public void publishMyNews(yacyNewsRecord record) {
294: // this shall be called if our peer generated a new news record and wants to publish it
295: if (record == null)
296: return;
297: try {
298: if (newsDB.get(record.id()) == null) {
299: incomingNews.push(record); // we want to see our own news..
300: outgoingNews.push(record); // .. and put it on the publishing list
301: }
302: } catch (IOException e) {
303: }
304: }
305:
306: public yacyNewsRecord myPublication() throws IOException {
307: // generate a record for next peer-ping
308: if (outgoingNews.size() == 0)
309: return null;
310: yacyNewsRecord record = outgoingNews.topInc();
311: if ((record != null)
312: && (record.distributed() >= maxDistribution)) {
313: // move record to its final position. This is only for history
314: publishedNews.push(outgoingNews.pop());
315: }
316: return record;
317: }
318:
319: public void enqueueIncomingNews(yacyNewsRecord record)
320: throws IOException {
321: // called if a news is attached to a seed
322:
323: // check consistency
324: if (record.id() == null)
325: return;
326: if (record.id().length() != yacyNewsRecord.idLength)
327: return;
328: if (record.category() == null)
329: return;
330: if (!(categories.contains(record.category())))
331: return;
332: if (record.created().getTime() == 0)
333: return;
334: Map<String, String> attributes = record.attributes();
335: if (attributes.containsKey("url")) {
336: if (plasmaSwitchboard.urlBlacklist.isListed(
337: plasmaURLPattern.BLACKLIST_NEWS, new yacyURL(
338: (String) attributes.get("url"), null))) {
339: System.out
340: .println("DEBUG: ignored news-entry url blacklisted: "
341: + attributes.get("url"));
342: return;
343: }
344: }
345: if (attributes.containsKey("startURL")) {
346: if (plasmaSwitchboard.urlBlacklist.isListed(
347: plasmaURLPattern.BLACKLIST_NEWS, new yacyURL(
348: (String) attributes.get("startURL"), null))) {
349: System.out
350: .println("DEBUG: ignored news-entry url blacklisted: "
351: + attributes.get("startURL"));
352: return;
353: }
354: }
355:
356: // double-check with old news
357: if (newsDB.get(record.id()) != null)
358: return;
359: incomingNews.push(record);
360: }
361:
362: public int size(int dbKey) {
363: return switchQueue(dbKey).size();
364: }
365:
366: public int automaticProcess() throws IOException,
367: InterruptedException {
368: // processes news in the incoming-db
369: // returns number of processes
370: yacyNewsRecord record;
371: int pc = 0;
372: synchronized (this .incomingNews) {
373: Iterator<yacyNewsRecord> i = incomingNews.records(true);
374: while (i.hasNext()) {
375: // check for interruption
376: if (Thread.currentThread().isInterrupted())
377: throw new InterruptedException(
378: "Shutdown in progress");
379:
380: // get next news record
381: record = (yacyNewsRecord) i.next();
382: if (automaticProcessP(record)) {
383: this .processedNews.push(record);
384: i.remove();
385: pc++;
386: }
387: }
388: }
389: return pc;
390: }
391:
392: long day = 1000 * 60 * 60 * 24;
393:
394: private boolean automaticProcessP(yacyNewsRecord record) {
395: if (record == null)
396: return false;
397: if (record.category() == null)
398: return true;
399: if ((System.currentTimeMillis() - record.created().getTime()) > (14 * day)) {
400: // remove everything after 1 week
401: return true;
402: }
403: if ((record.category().equals(CATEGORY_WIKI_UPDATE))
404: && ((System.currentTimeMillis() - record.created()
405: .getTime()) > (3 * day))) {
406: return true;
407: }
408: if ((record.category().equals(CATEGORY_BLOG_ADD))
409: && ((System.currentTimeMillis() - record.created()
410: .getTime()) > (3 * day))) {
411: return true;
412: }
413: if ((record.category().equals(CATEGORY_PROFILE_UPDATE))
414: && ((System.currentTimeMillis() - record.created()
415: .getTime()) > (7 * day))) {
416: return true;
417: }
418: if ((record.category().equals(CATEGORY_CRAWL_START))
419: && ((System.currentTimeMillis() - record.created()
420: .getTime()) > (2 * day))) {
421: yacySeed seed = yacyCore.seedDB.get(record.originator());
422: if (seed == null)
423: return false;
424: try {
425: return (Integer
426: .parseInt(seed.get(yacySeed.ISPEED, "-")) < 10);
427: } catch (NumberFormatException ee) {
428: return true;
429: }
430: }
431: return false;
432: }
433:
434: public synchronized yacyNewsRecord getSpecific(int dbKey,
435: String category, String key, String value) {
436: yacyNewsQueue queue = switchQueue(dbKey);
437: yacyNewsRecord record;
438: String s;
439: Iterator<yacyNewsRecord> i = queue.records(true);
440: while (i.hasNext()) {
441: record = i.next();
442: if ((record != null)
443: && (record.category().equals(category))) {
444: s = (String) record.attributes().get(key);
445: if ((s != null) && (s.equals(value)))
446: return record;
447: }
448: }
449: return null;
450: }
451:
452: public synchronized yacyNewsRecord getByOriginator(int dbKey,
453: String category, String originatorHash) {
454: yacyNewsQueue queue = switchQueue(dbKey);
455: yacyNewsRecord record;
456: Iterator<yacyNewsRecord> i = queue.records(true);
457: while (i.hasNext()) {
458: record = i.next();
459: if ((record != null)
460: && (record.category().equals(category))
461: && (record.originator().equals(originatorHash))) {
462: return record;
463: }
464: }
465: return null;
466: }
467:
468: public synchronized yacyNewsRecord getByID(int dbKey, String id) {
469: switch (dbKey) {
470: case INCOMING_DB:
471: return incomingNews.get(id);
472: case PROCESSED_DB:
473: return processedNews.get(id);
474: case OUTGOING_DB:
475: return outgoingNews.get(id);
476: case PUBLISHED_DB:
477: return publishedNews.get(id);
478: }
479: return null;
480: }
481:
482: private yacyNewsQueue switchQueue(int dbKey) {
483: switch (dbKey) {
484: case INCOMING_DB:
485: return incomingNews;
486: case PROCESSED_DB:
487: return processedNews;
488: case OUTGOING_DB:
489: return outgoingNews;
490: case PUBLISHED_DB:
491: return publishedNews;
492: }
493: return null;
494: }
495:
496: public void clear(int dbKey) {
497: // clear a table
498: switch (dbKey) {
499: case INCOMING_DB:
500: incomingNews.clear();
501: break;
502: case PROCESSED_DB:
503: processedNews.clear();
504: break;
505: case OUTGOING_DB:
506: outgoingNews.clear();
507: break;
508: case PUBLISHED_DB:
509: publishedNews.clear();
510: break;
511: }
512: }
513:
514: public void moveOff(int dbKey, String id) throws IOException {
515: // this is called if a queue element shall be moved to another queue or off the queue
516: // it depends on the dbKey how the record is handled
517: switch (dbKey) {
518: case INCOMING_DB:
519: moveOff(incomingNews, processedNews, id);
520: break;
521: case PROCESSED_DB:
522: moveOff(processedNews, null, id);
523: break;
524: case OUTGOING_DB:
525: moveOff(outgoingNews, publishedNews, id);
526: break;
527: case PUBLISHED_DB:
528: moveOff(publishedNews, null, id);
529: break;
530: }
531: }
532:
533: private boolean moveOff(yacyNewsQueue fromqueue,
534: yacyNewsQueue toqueue, String id) throws IOException {
535: // called if a published news shall be removed
536: yacyNewsRecord record = fromqueue.remove(id);
537: if (record == null) {
538: return false;
539: }
540: if (toqueue != null) {
541: toqueue.push(record);
542: } else if ((incomingNews.get(id) == null)
543: && (processedNews.get(id) == null)
544: && (outgoingNews.get(id) == null)
545: && (publishedNews.get(id) == null)) {
546: newsDB.remove(id);
547: }
548: return true;
549: }
550:
551: public void moveOffAll(int dbKey) throws IOException {
552: // this is called if a queue element shall be moved to another queue or off the queue
553: // it depends on the dbKey how the record is handled
554: switch (dbKey) {
555: case INCOMING_DB:
556: moveOffAll(incomingNews, processedNews);
557: break;
558: case PROCESSED_DB:
559: processedNews.clear();
560: break;
561: case OUTGOING_DB:
562: moveOffAll(outgoingNews, publishedNews);
563: break;
564: case PUBLISHED_DB:
565: publishedNews.clear();
566: break;
567: }
568: }
569:
570: private int moveOffAll(yacyNewsQueue fromqueue,
571: yacyNewsQueue toqueue) throws IOException {
572: // move off all news from a specific queue to another queue
573: Iterator<yacyNewsRecord> i = fromqueue.records(true);
574: yacyNewsRecord record;
575: if (toqueue == null)
576: return 0;
577: int c = 0;
578: while (i.hasNext()) {
579: record = (yacyNewsRecord) i.next();
580: if (record == null)
581: continue;
582: toqueue.push(record);
583: c++;
584: }
585: fromqueue.clear();
586: return c;
587: }
588:
589: }
|