001: //LogParserPLASMA.java
002: //-------------------------------------
003: //part of YACY
004: //(C) by Michael Peter Christen; mc@anomic.de
005: //first published on http://www.anomic.de
006: //Frankfurt, Germany, 2004
007: //
008: //This file ist contributed by Matthias Soehnholz
009: //last major change: $LastChangedDate: 2008-01-23 23:08:32 +0000 (Mi, 23 Jan 2008) $ by $LastChangedBy: orbiter $
010: //Revision: $LastChangedRevision: 4382 $
011: //
012: //This program is free software; you can redistribute it and/or modify
013: //it under the terms of the GNU General Public License as published by
014: //the Free Software Foundation; either version 2 of the License, or
015: //(at your option) any later version.
016: //
017: //This program is distributed in the hope that it will be useful,
018: //but WITHOUT ANY WARRANTY; without even the implied warranty of
019: //MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
020: //GNU General Public License for more details.
021: //
022: //You should have received a copy of the GNU General Public License
023: //along with this program; if not, write to the Free Software
024: //Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
025: //
026: //Using this software in any meaning (reading, learning, copying, compiling,
027: //running) means that you agree that the Author(s) is (are) not responsible
028: //for cost, loss of data or any harm that may be caused directly or indirectly
029: //by usage of this softare or this documentation. The usage of this software
030: //is on your own risk. The installation and usage (starting/running) of this
031: //software may allow other people or application to access your computer and
032: //any attached devices and is highly dependent on the configuration of the
033: //software which must be done by the user of the software; the author(s) is
034: //(are) also not responsible for proper configuration and usage of the
035: //software, even if provoked by documentation provided together with
036: //the software.
037: //
038: //Any changes to this file according to the GPL as documented in the file
039: //gpl.txt aside this file in the shipment you received can be done to the
040: //lines that follows this copyright notice here, but changes must not be
041: //done inside the copyright notive above. A re-distribution must contain
042: //the intact and unchanged copyright notice.
043: //Contributions and changes to the program code must be marked as such.
044:
045: package de.anomic.server.logging.logParsers;
046:
047: import java.util.HashSet;
048: import java.util.Hashtable;
049: import java.util.regex.Matcher;
050: import java.util.regex.Pattern;
051:
052: public class LogParserPLASMA implements LogParser {
053:
054: /** the version of the LogParser - <strong>Double</strong>*/
055: public static final String PARSER_VERSION = "version";
056:
057: /** the amount of URLs received during DHT - <strong>Integer</strong> */
058: public static final String URLS_RECEIVED = "urlSum";
059:
060: /** the amount of URLs requested during DHT - <strong>Integer</strong> */
061: public static final String URLS_REQUESTED = "urlReqSum";
062:
063: /** the amount of URLs blocked during DHT because they match the peer's blacklist - <strong>Integer</strong> */
064: public static final String URLS_BLOCKED = "blockedURLSum";
065:
066: /** the amount of words received during DHT - <strong>Integer</strong> */
067: public static final String WORDS_RECEIVED = "wordsSum";
068:
069: /** the amount of RWIs received during DHT - <strong>Integer</strong> */
070: public static final String RWIS_RECEIVED = "rwiSum";
071:
072: /** the amount of RWIs blocked during DHT because their entries match the peer's blacklist - <strong>Integer</strong> */
073: public static final String RWIS_BLOCKED = "blockedRWISum";
074:
075: /** total time receiving RWIs during DHT in milli seconds - <strong>Long</strong> */
076: public static final String RWIS_RECEIVED_TIME = "rwiTimeSum";
077:
078: /** total time receiving URLs during DHT in milli seconds - <strong>Long</strong> */
079: public static final String URLS_RECEIVED_TIME = "urlTimeSum";
080:
081: /** the traffic sent during DHT in bytes - <strong>Long</strong> */
082: public static final String DHT_TRAFFIC_SENT = "DHTSendTraffic";
083:
084: /** the amount of URLs requested by other peers and sent by this one - <strong>Integer</strong> */
085: public static final String DHT_URLS_SENT = "DHTSendURLs";
086:
087: /** the amount of rejected DHT transfers from other peers (i.e. because this peer was busy) - <strong>Integer</strong> */
088: public static final String DHT_REJECTED = "RWIRejectCount";
089:
090: /** the peer-names from whose DHT transfers were rejected - <strong>HasSet</strong> */
091: public static final String DHT_REJECTED_PEERS_NAME = "DHTRejectPeerNames";
092:
093: /** the peer-hashes from whose DHT transfers were rejected - <strong>HasSet</strong> */
094: public static final String DHT_REJECTED_PEERS_HASH = "DHTRejectPeerHashs";
095:
096: /** the peer-names this peer sent DHT chunks to - <strong>HasSet</strong> */
097: public static final String DHT_SENT_PEERS_NAME = "DHTPeerNames";
098:
099: /** the peer-hashes this peer sent DHT chunks to - <strong>HasSet</strong> */
100: public static final String DHT_SENT_PEERS_HASH = "DHTPeerHashs";
101:
102: /** total amount of selected peers for index distribution - <strong>Integer</strong> */
103: public static final String DHT_SELECTED = "DHTSelectionTargetCount";
104:
105: /** total amount of words selected for index distribution - <strong>Integer</strong> */
106: public static final String DHT_WORDS_SELECTED = "DHTSelectionWordsCount";
107:
108: /** total time selecting words for index distribution - <strong>Integer</strong> */
109: public static final String DHT_WORDS_SELECTED_TIME = "DHTSelectionWordsTimeCount";
110:
111: /** the minimal DHT distance during peer-selection for index distribution - <strong>Double</strong> */
112: public static final String DHT_DISTANCE_MIN = "minDHTDist";
113:
114: /** the maximal DHT distance during peer-selection for index distribution - <strong>Double</strong> */
115: public static final String DHT_DISTANCE_MAX = "maxDHTDist";
116:
117: /** the average DHT distance during peer-selection for index distribution - <strong>Double</strong> */
118: public static final String DHT_DISTANCE_AVERAGE = "avgDHTDist";
119:
120: /** how many times remote peers were too busy to accept the index transfer - <strong>Integer</strong> */
121: public static final String PEERS_BUSY = "busyPeerCount";
122:
123: /** how many times not enough peers for index distribution were found - <strong>Integer</strong> */
124: public static final String PEERS_TOO_LESS = "notEnoughDHTPeers";
125:
126: /** how many times the index distribution failed (i.e. due to time-out or other reasons) - <strong>Integer</strong> */
127: public static final String DHT_SENT_FAILED = "failedIndexDistributionCount";
128:
129: /** how many times the error "<code>tried to create left child-node twice</code>" occured - <strong>Integer</strong> */
130: public static final String ERROR_CHILD_TWICE_LEFT = "leftChildTwiceCount";
131:
132: /** how many times the error "<code>tried to create right child-node twice</code>" occured - <strong>Integer</strong> */
133: public static final String ERROR_CHILD_TWICE_RIGHT = "rightChildTwiceCount";
134:
135: /** how many ranking distributions were executed successfully - <strong>Integer</strong> */
136: public static final String RANKING_DIST = "rankingDistributionCount";
137:
138: /** total time the ranking distributions took - <strong>Integer</strong> */
139: public static final String RANKING_DIST_TIME = "rankingDistributionTime";
140:
141: /** how many ranking distributions failed - <strong>Integer</strong> */
142: public static final String RANKING_DIST_FAILED = "rankingDistributionFailCount";
143:
144: /** how many times the error "<code>Malformed URL</code>" occured - <strong>Integer</strong> */
145: public static final String ERROR_MALFORMED_URL = "malformedURLCount";
146:
147: /** the amount of indexed sites - <strong>Integer</strong> */
148: public static final String INDEXED_SITES = "indexedSites";
149:
150: /** total amount of indexed words - <strong>Integer</strong> */
151: public static final String INDEXED_WORDS = "indexedWords";
152:
153: /** total size of all indexed sites - <strong>Integer</strong> */
154: public static final String INDEXED_SITES_SIZE = "indexedSiteSizeSum";
155:
156: /** total amount of indexed anchors - <strong>Integer</strong> */
157: public static final String INDEXED_ANCHORS = "indexedAnchors";
158:
159: /** total time needed for stacking the site of an indexing - <strong>Integer</strong> */
160: public static final String INDEXED_STACK_TIME = "indexedStackingTime";
161:
162: /** total time needed for parsing during indexing - <strong>Integer</strong> */
163: public static final String INDEXED_PARSE_TIME = "indexedParsingTime";
164:
165: /** total time needed for the actual indexing during indexing - <strong>Integer</strong> */
166: public static final String INDEXED_INDEX_TIME = "indexedIndexingTime";
167:
168: /** total time needed for storing the results of an indexing - <strong>Integer</strong> */
169: public static final String INDEXED_STORE_TIME = "indexedStorageTime";
170:
171: /** accumulated time needed to parse the log entries up to now (in ms)*/
172: public static final String TOTAL_PARSER_TIME = "totalParserTime";
173:
174: /** times the parser was called, respectively amount of independant log-lines */
175: public static final String TOTAL_PARSER_RUNS = "totalParserRuns";
176:
177: private final double parserVersion = 0.1;
178: private final String parserType = "PLASMA";
179:
180: //RegExp for LogLevel I
181: private static final Pattern i1 = Pattern
182: .compile("Received (\\d*) URLs from peer [\\w-_]{12}:[\\w-_]*/[\\w.-]* in (\\d*) ms, Blocked (\\d*) URLs");
183: private static final Pattern i2 = Pattern
184: .compile("Received (\\d*) Entries (\\d*) Words \\[[\\w-_]{12} .. [\\w-_]{12}\\]/[\\w.-]* from [\\w-_]{12}:[\\w-_]*/[\\w.-]*, processed in (\\d*) milliseconds, requesting (\\d*)/(\\d*) URLs, blocked (\\d*) RWIs");
185: private static final Pattern i2_2 = Pattern
186: .compile("Received (\\d*) Entries (\\d*) Words \\[[\\w-_]{12} .. [\\w-_]{12}\\]/[\\w.-]* from [\\w-_]{12}:[\\w-_]*, processed in (\\d*) milliseconds, requesting (\\d*)/(\\d*) URLs, blocked (\\d*) RWIs");
187: private static final Pattern i3 = Pattern
188: .compile("Index transfer of (\\d*) words \\[[\\w-_]{12} .. [\\w-_]{12}\\] to peer ([\\w-_]*):([\\w-_]{12}) in (\\d*) seconds successful \\((\\d*) words/s, (\\d*) Bytes\\)");
189: private static final Pattern i4 = Pattern
190: .compile("Index transfer of (\\d*) entries (\\d*) words \\[[\\w-_]{12} .. [\\w-_]{12}\\] and (\\d*) URLs to peer ([\\w-_]*):([\\w-_]{12}) in (\\d*) seconds successful \\((\\d*) words/s, (\\d*) Bytes\\)");
191: private static final Pattern i5 = Pattern
192: .compile("Selected \\w* DHT target peer ([\\w-_]*):([\\w-_]{12}), distance = ([\\w.-]*)");
193: private static final Pattern i6 = Pattern
194: .compile("Rejecting RWIs from peer ([\\w-_]{12}):([\\w-_]*)/([\\w.]*) ([\\w. ]*)");
195: private static final Pattern i7 = Pattern
196: .compile("DHT distribution: transfer to peer [\\w-]* finished.");
197: private static final Pattern i8 = Pattern
198: .compile("Index selection of (\\d*) words \\[[\\w-_]{12} .. [\\w-_]{12}\\] in (\\d*) seconds");
199: private static final Pattern i9 = Pattern
200: .compile("RankingDistribution - transmitted file [\\w-:.\\\\]* to [\\w.]*:\\d* successfully in (\\d)* seconds");
201: private static final Pattern i10 = Pattern
202: .compile("RankingDistribution - error transmitting file");
203: private static final Pattern i11 = Pattern
204: .compile("Peer [\\w-_]*:[\\w-_]{12} is busy\\. Waiting \\d* ms\\.");
205: //private static Pattern i12 = Pattern.compile("\\*Indexed \\d* words in URL [\\w:.&/%-~$\u00A7@=]* \\[[\\w-_]{12}\\]");
206: private static final Pattern i13 = Pattern
207: .compile("WROTE HEADER for |LOCALCRAWL\\[\\d*, \\d*, \\d*, \\d*\\]|REJECTED WRONG STATUS TYPE");
208: //RegExp for LogLevel W
209: private static final Pattern w1 = Pattern
210: .compile("found not enough \\(\\d*\\) peers for distribution");
211: private static final Pattern w2 = Pattern
212: .compile("Transfer to peer ([\\w-_]*):([\\w-_]{12}) failed:'(\\w*)'");
213: //RegExp for LogLevel E
214: private static final Pattern e1 = Pattern
215: .compile("INTERNAL ERROR AT plasmaCrawlLURL:store:de.anomic.kelondro.kelondroException: tried to create (\\w*) node twice in db");
216: private static final Pattern e2 = Pattern
217: .compile("INTERNAL ERROR [\\w./: ]* java.net.MalformedURLException");
218:
219: private Matcher m;
220: //RegExp for advancedParser
221: //private Pattern adv1 = Pattern.compile("\\*Indexed (\\d*) words in URL [\\w:.&?/%-=]* \\[[\\w-_]{12}\\]\\n\\tDescription: ([\\w- ]*)\\n\\tMimeType: ([\\w-_/]*) \\| Size: (\\d*) bytes \\| Anchors: (\\d*)\\n\\tStackingTime: (\\d*) ms \\| ParsingTime: (\\d*) ms \\| IndexingTime: (\\d*) ms \\| StorageTime: (\\d*) ms");
222: private static Pattern adv1 = Pattern
223: .compile("\\*Indexed (\\d+) words in URL [\\w:.&/%-~;$\u00A7@=]* \\[[\\w_-]{12}\\]\\r?\\n?"
224: + "\\tDescription: +([\\w-\\.,:!='\"|/+@\\(\\) \\t]*)\\r?\\n?"
225: + "\\tMimeType: ([\\w_~/-]*) \\| Charset: ([\\w-]*) \\| Size: (\\d+) bytes \\| Anchors: (\\d+)\\r?\\n?"
226: + "\\tStackingTime:[ ]*(\\d+) ms \\| ParsingTime:[ ]*(\\d+) ms \\| IndexingTime: (\\d+) ms \\| StorageTime: (\\d+) ms");
227:
228: private int urlSum = 0;
229: private int urlReqSum = 0;
230: private int blockedURLSum = 0;
231: private int wordsSum = 0;
232: private int rwiSum = 0;
233: private int blockedRWISum = 0;
234: private long urlTimeSum = 0;
235: private long rwiTimeSum = 0;
236: private long DHTSendTraffic = 0;
237: private int DHTSendURLs = 0;
238: private int RWIRejectCount = 0;
239: private HashSet<String> RWIRejectPeerNames = new HashSet<String>();
240: private HashSet<String> RWIRejectPeerHashs = new HashSet<String>();
241: private HashSet<String> DHTPeerNames = new HashSet<String>();
242: private HashSet<String> DHTPeerHashs = new HashSet<String>();
243: private int DHTSelectionTargetCount = 0;
244: private int DHTSelectionWordsCount = 0;
245: private int DHTSelectionWordsTimeCount = 0;
246: private double minDHTDist = 1;
247: private double maxDHTDist = 0;
248: private double avgDHTDist = 0;
249: private int busyPeerCount = 0;
250: private int notEnoughDHTPeers = 0;
251: private int failedIndexDistributionCount = 0;
252: private int leftChildTwiceCount = 0;
253: private int rightChildTwiceCount = 0;
254: private int rankingDistributionCount = 0;
255: private int rankingDistributionTime = 0;
256: private int rankingDistributionFailCount = 0;
257: private int malformedURLCount = 0;
258: private int indexedSites = 0;
259: private int indexedWordSum = 0;
260: private int indexedSiteSizeSum = 0;
261: private int indexedAnchorsCount = 0;
262: private int indexedStackingTime = 0;
263: private int indexedParsingTime = 0;
264: private int indexedIndexingTime = 0;
265: private int indexedStorageTime = 0;
266: private long totalParserTime = 0;
267: private int totalParserRuns = 0;
268:
269: public int parse(String logLevel, String logLine) {
270: long start = System.currentTimeMillis();
271: if (logLevel.equals("INFO")) {
272: m = i1.matcher(logLine);
273:
274: if (m.find() && m.groupCount() >= 3) {
275: //System.out.println(m.group(1) + " " + m.group(2) + " " + m.group(3));
276: urlSum += Integer.parseInt(m.group(1));
277: urlTimeSum += Integer.parseInt(m.group(2));
278: blockedURLSum += Integer.parseInt(m.group(3));
279: totalParserTime += (System.currentTimeMillis() - start);
280: totalParserRuns++;
281: return 0;
282: }
283: m = i2.matcher(logLine);
284:
285: if (m.find() && m.groupCount() >= 6) {
286: rwiSum += Integer.parseInt(m.group(1));
287: wordsSum += Integer.parseInt(m.group(2));
288: rwiTimeSum += Integer.parseInt(m.group(3));
289: urlReqSum += Integer.parseInt(m.group(4));
290: blockedRWISum += Integer.parseInt(m.group(6));
291: totalParserTime += (System.currentTimeMillis() - start);
292: totalParserRuns++;
293: return 0;
294: }
295: m = i2_2.matcher(logLine);
296:
297: if (m.find() && m.groupCount() >= 6) {
298: rwiSum += Integer.parseInt(m.group(1));
299: wordsSum += Integer.parseInt(m.group(2));
300: rwiTimeSum += Integer.parseInt(m.group(3));
301: urlReqSum += Integer.parseInt(m.group(4));
302: blockedRWISum += Integer.parseInt(m.group(6));
303: totalParserTime += (System.currentTimeMillis() - start);
304: totalParserRuns++;
305: return 0;
306: }
307: m = i3.matcher(logLine);
308:
309: if (m.find() && m.groupCount() >= 6) {
310: DHTSendTraffic += Integer.parseInt(m.group(6));
311: DHTPeerNames.add(m.group(2));
312: DHTPeerHashs.add(m.group(3));
313: totalParserTime += (System.currentTimeMillis() - start);
314: totalParserRuns++;
315: return 0;
316: }
317: m = i4.matcher(logLine);
318:
319: if (m.find() && m.groupCount() >= 8) {
320: DHTSendTraffic += Integer.parseInt(m.group(8));
321: DHTSendURLs += Integer.parseInt(m.group(3));
322: DHTPeerNames.add(m.group(4));
323: DHTPeerHashs.add(m.group(5));
324: totalParserTime += (System.currentTimeMillis() - start);
325: totalParserRuns++;
326: return 0;
327: }
328: m = i5.matcher(logLine);
329:
330: if (m.find() && m.groupCount() >= 3) {
331: minDHTDist = Math.min(minDHTDist, Double.parseDouble(m
332: .group(3)));
333: maxDHTDist = Math.max(maxDHTDist, Double.parseDouble(m
334: .group(3)));
335: avgDHTDist += Double.parseDouble(m.group(3));
336: DHTSelectionTargetCount++;
337: totalParserTime += (System.currentTimeMillis() - start);
338: totalParserRuns++;
339: return 0;
340: }
341: m = i6.matcher(logLine);
342:
343: if (m.find() && m.groupCount() >= 2) {
344: RWIRejectPeerNames.add(m.group(2));
345: RWIRejectPeerHashs.add(m.group(1));
346: RWIRejectCount++;
347: totalParserTime += (System.currentTimeMillis() - start);
348: totalParserRuns++;
349: return 0;
350: }
351: m = i7.matcher(logLine);
352:
353: if (m.find()) {
354: totalParserTime += (System.currentTimeMillis() - start);
355: totalParserRuns++;
356: return 0;
357: }
358: m = i8.matcher(logLine);
359:
360: if (m.find() && m.groupCount() >= 2) {
361: DHTSelectionWordsCount += Double
362: .parseDouble(m.group(1));
363: DHTSelectionWordsTimeCount += Double.parseDouble(m
364: .group(2));
365: totalParserTime += (System.currentTimeMillis() - start);
366: totalParserRuns++;
367: return 0;
368: }
369: m = i9.matcher(logLine);
370:
371: if (m.find() && m.groupCount() >= 1) {
372: rankingDistributionCount++;
373: rankingDistributionTime += Integer.parseInt(m.group(1));
374: totalParserTime += (System.currentTimeMillis() - start);
375: totalParserRuns++;
376: return 0;
377: }
378: m = i10.matcher(logLine);
379:
380: if (m.find()) {
381: rankingDistributionFailCount++;
382: totalParserTime += (System.currentTimeMillis() - start);
383: totalParserRuns++;
384: return 0;
385: }
386: m = i11.matcher(logLine);
387:
388: if (m.find()) {
389: busyPeerCount++;
390: totalParserTime += (System.currentTimeMillis() - start);
391: totalParserRuns++;
392: return 0;
393: }
394: // m = i12.matcher (logLine);
395: //
396: // if (m.find ()) {
397: // return 3;
398: // }
399: m = i13.matcher(logLine);
400:
401: if (m.find()) {
402: totalParserTime += (System.currentTimeMillis() - start);
403: totalParserRuns++;
404: return 0;
405: }
406: m = adv1.matcher(logLine);
407:
408: if (m.find() && m.groupCount() >= 10) {
409: indexedSites++;
410: indexedWordSum += Integer.parseInt(m.group(1));
411: indexedSiteSizeSum += Integer.parseInt(m.group(5));
412: indexedAnchorsCount += Integer.parseInt(m.group(6));
413: indexedStackingTime += Integer.parseInt(m.group(7));
414: indexedParsingTime += Integer.parseInt(m.group(8));
415: indexedIndexingTime += Integer.parseInt(m.group(9));
416: indexedStorageTime += Integer.parseInt(m.group(10));
417: totalParserTime += (System.currentTimeMillis() - start);
418: totalParserRuns++;
419: return 0;
420: }
421:
422: } else if (logLevel.equals("WARNING")) {
423: m = w1.matcher(logLine);
424:
425: if (m.find()) {
426: notEnoughDHTPeers++;
427: totalParserTime += (System.currentTimeMillis() - start);
428: totalParserRuns++;
429: return 0;
430: }
431: m = w2.matcher(logLine);
432:
433: if (m.find()) {
434: failedIndexDistributionCount++;
435: totalParserTime += (System.currentTimeMillis() - start);
436: totalParserRuns++;
437: return 0;
438: }
439: } else if (logLevel.equals("SEVERE")) {
440: m = e1.matcher(logLine);
441:
442: if (m.find() && m.groupCount() >= 1) {
443: if (m.group(1).equals("leftchild"))
444: leftChildTwiceCount++;
445: else if (m.group(1).equals("rightchild"))
446: rightChildTwiceCount++;
447: totalParserTime += (System.currentTimeMillis() - start);
448: totalParserRuns++;
449: return 0;
450: }
451: m = e2.matcher(logLine);
452:
453: if (m.find()) {
454: malformedURLCount++;
455: totalParserTime += (System.currentTimeMillis() - start);
456: totalParserRuns++;
457: return 0;
458: }
459: }
460: totalParserTime += (System.currentTimeMillis() - start);
461: totalParserRuns++;
462: return -1;
463: }
464:
465: public Hashtable<String, Object> getResults() {
466: Hashtable<String, Object> results = new Hashtable<String, Object>();
467: results.put(PARSER_VERSION, new Double(parserVersion));
468: results.put(URLS_RECEIVED, new Integer(urlSum));
469: results.put(URLS_REQUESTED, new Integer(urlReqSum));
470: results.put(URLS_BLOCKED, new Integer(blockedURLSum));
471: results.put(WORDS_RECEIVED, new Integer(wordsSum));
472: results.put(RWIS_RECEIVED, new Integer(rwiSum));
473: results.put(RWIS_BLOCKED, new Integer(blockedRWISum));
474: results.put(URLS_RECEIVED_TIME, new Long(urlTimeSum));
475: results.put(RWIS_RECEIVED_TIME, new Long(rwiTimeSum));
476: results.put(DHT_TRAFFIC_SENT, new Long(DHTSendTraffic));
477: results.put(DHT_URLS_SENT, new Integer(DHTSendURLs));
478: results.put(DHT_REJECTED, new Integer(RWIRejectCount));
479: results.put(DHT_REJECTED_PEERS_NAME, RWIRejectPeerNames);
480: results.put(DHT_REJECTED_PEERS_HASH, RWIRejectPeerHashs);
481: results.put(DHT_SENT_PEERS_NAME, DHTPeerNames);
482: results.put(DHT_SENT_PEERS_HASH, DHTPeerHashs);
483: results.put(DHT_SELECTED, new Integer(DHTSelectionTargetCount));
484: results.put(DHT_WORDS_SELECTED, new Integer(
485: DHTSelectionWordsCount));
486: results.put(DHT_WORDS_SELECTED_TIME, new Integer(
487: DHTSelectionWordsTimeCount));
488: results.put(DHT_DISTANCE_MIN, new Double(minDHTDist));
489: results.put(DHT_DISTANCE_MAX, new Double(maxDHTDist));
490: results.put(DHT_DISTANCE_AVERAGE, new Double(avgDHTDist
491: / DHTSelectionTargetCount));
492: results.put(PEERS_BUSY, new Integer(busyPeerCount));
493: results.put(PEERS_TOO_LESS, new Integer(notEnoughDHTPeers));
494: results.put(DHT_SENT_FAILED, new Integer(
495: failedIndexDistributionCount));
496: results.put(ERROR_CHILD_TWICE_LEFT, new Integer(
497: leftChildTwiceCount));
498: results.put(ERROR_CHILD_TWICE_RIGHT, new Integer(
499: rightChildTwiceCount));
500: results
501: .put(RANKING_DIST,
502: new Integer(rankingDistributionCount));
503: results.put(RANKING_DIST_TIME, new Integer(
504: rankingDistributionTime));
505: results.put(RANKING_DIST_FAILED, new Integer(
506: rankingDistributionFailCount));
507: results
508: .put(ERROR_MALFORMED_URL,
509: new Integer(malformedURLCount));
510: results.put(INDEXED_SITES, new Integer(indexedSites));
511: results.put(INDEXED_WORDS, new Integer(indexedWordSum));
512: results
513: .put(INDEXED_SITES_SIZE,
514: new Integer(indexedSiteSizeSum));
515: results.put(INDEXED_ANCHORS, new Integer(indexedAnchorsCount));
516: results.put(INDEXED_STACK_TIME,
517: new Integer(indexedStackingTime));
518: results
519: .put(INDEXED_PARSE_TIME,
520: new Integer(indexedParsingTime));
521: results.put(INDEXED_INDEX_TIME,
522: new Integer(indexedIndexingTime));
523: results
524: .put(INDEXED_STORE_TIME,
525: new Integer(indexedStorageTime));
526: results.put(TOTAL_PARSER_TIME, new Long(totalParserTime));
527: results.put(TOTAL_PARSER_RUNS, new Integer(totalParserRuns));
528: return results;
529: }
530:
531: public String getParserType() {
532: return parserType;
533: }
534:
535: public double getParserVersion() {
536: return parserVersion;
537: }
538:
539: public void printResults() {
540: if (rankingDistributionCount == 0)
541: rankingDistributionCount = 1;
542: if (DHTSelectionWordsTimeCount == 0)
543: DHTSelectionWordsTimeCount = 1;
544: if (indexedSites != 0)
545: indexedSites++;
546: System.out.println("INDEXER: Indexed "
547: + indexedSites
548: + " sites in "
549: + (indexedStackingTime + indexedParsingTime
550: + indexedIndexingTime + indexedStorageTime)
551: + " milliseconds.");
552: System.out.println("INDEXER: Indexed " + indexedWordSum
553: + " words on " + indexedSites
554: + " sites. (avg. words per site: "
555: + (indexedWordSum / indexedSites) + ").");
556: System.out.println("INDEXER: Total Size of indexed sites: "
557: + indexedSiteSizeSum + " bytes (avg. size per site: "
558: + (indexedSiteSizeSum / indexedSites) + " bytes).");
559: System.out.println("INDEXER: Total Number of Anchors found: "
560: + indexedAnchorsCount + "(avg. Anchors per site: "
561: + (indexedAnchorsCount / indexedSites) + ").");
562: System.out.println("INDEXER: Total StackingTime: "
563: + indexedStackingTime
564: + " milliseconds (avg. StackingTime: "
565: + (indexedStackingTime / indexedSites)
566: + " milliseconds).");
567: System.out.println("INDEXER: Total ParsingTime: "
568: + indexedParsingTime
569: + " milliseconds (avg. ParsingTime: "
570: + (indexedParsingTime / indexedSites)
571: + " milliseconds).");
572: System.out.println("INDEXER: Total IndexingTime: "
573: + indexedIndexingTime
574: + " milliseconds (avg. IndexingTime: "
575: + (indexedIndexingTime / indexedSites)
576: + " milliseconds).");
577: System.out.println("INDEXER: Total StorageTime: "
578: + indexedStorageTime
579: + " milliseconds (avg. StorageTime: "
580: + (indexedStorageTime / indexedSites)
581: + " milliseconds).");
582: if (urlSum != 0)
583: urlSum++;
584: System.out.println("DHT: Recieved " + urlSum + " Urls in "
585: + urlTimeSum + " ms. Blocked " + blockedURLSum
586: + " URLs.");
587: System.out.println("DHT: " + urlTimeSum / urlSum
588: + " milliseconds per URL.");
589: if (rwiSum != 0)
590: rwiSum++;
591: System.out.println("DHT: Recieved " + rwiSum + " RWIs from "
592: + wordsSum + " Words in " + rwiTimeSum + " ms. "
593: + urlReqSum + " requested URLs.");
594: System.out
595: .println("DHT: Blocked "
596: + blockedRWISum
597: + " RWIs before requesting URLs, because URL-Hash was blacklisted.");
598: System.out.println("DHT: " + rwiTimeSum / rwiSum
599: + " milliseconds per RWI.");
600: System.out.println("DHT: Rejected " + RWIRejectCount
601: + " Indextransfers from " + RWIRejectPeerNames.size()
602: + " PeerNames with " + RWIRejectPeerHashs.size()
603: + " PeerHashs.");
604: System.out.println("DHT: "
605: + ((double) Math.round(DHTSendTraffic * 100
606: / (1024 * 1024))) / 100 + " MegaBytes ("
607: + DHTSendTraffic + " Bytes) of DHT-Transfertraffic.");
608: System.out.println("DHT: Sended " + DHTSendURLs
609: + " URLs via DHT.");
610: System.out.println("DHT: DHT Transfers send to "
611: + DHTPeerNames.size() + " Peernames with "
612: + DHTPeerHashs.size() + " Peerhashs.");
613: System.out.println("DHT: Totally selected "
614: + DHTSelectionWordsCount + " words in "
615: + DHTSelectionWordsTimeCount + " seconds ("
616: + (float) DHTSelectionWordsCount
617: / DHTSelectionWordsTimeCount + " words/s)");
618: System.out.println("DHT: Selected " + DHTSelectionTargetCount
619: + " possible DHT Targets (min. Distance: " + minDHTDist
620: + " max. Distance: " + maxDHTDist + " avg. Distance: "
621: + ((double) avgDHTDist / DHTSelectionTargetCount));
622: System.out
623: .println("DHT: "
624: + busyPeerCount
625: + " times a targetpeer was too busy to accept a transfer.");
626: System.out
627: .println("DHT: "
628: + notEnoughDHTPeers
629: + " times there were not enought targetpeers for the selected DHTChunk");
630: System.out.println("DHT: IndexDistribution failed "
631: + failedIndexDistributionCount + " times.");
632: System.out.println("RANKING: Transmitted "
633: + rankingDistributionCount + " Rankingfiles in "
634: + rankingDistributionTime + " seconds ("
635: + rankingDistributionTime / rankingDistributionCount
636: + " seconds/file)");
637: System.out.println("RANKING: RankingDistribution failed "
638: + rankingDistributionFailCount + " times.");
639: if (leftChildTwiceCount != 0)
640: System.out.println("ERRORS: tried " + leftChildTwiceCount
641: + " times to create leftchild node twice in db");
642: if (rightChildTwiceCount != 0)
643: System.out.println("ERRORS: tried " + rightChildTwiceCount
644: + " times to create rightchild node twice in db");
645: if (malformedURLCount != 0)
646: System.out.println("ERRORS: " + malformedURLCount
647: + " MalformedURLExceptions accord.");
648: }
649:
650: }
|