001: /* Copyright (C) 2003 Internet Archive.
002: *
003: * This file is part of the Heritrix web crawler (crawler.archive.org).
004: *
005: * Heritrix is free software; you can redistribute it and/or modify
006: * it under the terms of the GNU Lesser Public License as published by
007: * the Free Software Foundation; either version 2.1 of the License, or
008: * any later version.
009: *
010: * Heritrix is distributed in the hope that it will be useful,
011: * but WITHOUT ANY WARRANTY; without even the implied warranty of
012: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
013: * GNU Lesser Public License for more details.
014: *
015: * You should have received a copy of the GNU Lesser Public License
016: * along with Heritrix; if not, write to the Free Software
017: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
018: *
019: * ToePool.java
020: * Created on Oct 1, 2003
021: *
022: * $Header$
023: */
024: package org.archive.crawler.framework;
025:
026: import java.io.PrintWriter;
027: import java.util.Date;
028: import java.util.Iterator;
029: import java.util.TreeSet;
030:
031: import org.archive.util.ArchiveUtils;
032: import org.archive.util.Histotable;
033: import org.archive.util.Reporter;
034:
035: /**
036: * A collection of ToeThreads. The class manages the ToeThreads currently
037: * running. Including increasing and decreasing their number, keeping track
038: * of their state and it can be used to kill hung threads.
039: *
040: * @author Gordon Mohr
041: * @author Kristinn Sigurdsson
042: *
043: * @see org.archive.crawler.framework.ToeThread
044: */
045: public class ToePool extends ThreadGroup implements Reporter {
046: /** run worker thread slightly lower than usual */
047: public static int DEFAULT_TOE_PRIORITY = Thread.NORM_PRIORITY - 1;
048:
049: protected CrawlController controller;
050: protected int nextSerialNumber = 1;
051: protected int targetSize = 0;
052:
053: /**
054: * Constructor. Creates a pool of ToeThreads.
055: *
056: * @param c A reference to the CrawlController for the current crawl.
057: */
058: public ToePool(CrawlController c) {
059: super ("ToeThreads");
060: this .controller = c;
061: setDaemon(true);
062: }
063:
064: public void cleanup() {
065: this .controller = null;
066: }
067:
068: /**
069: * @return The number of ToeThreads that are not available (Approximation).
070: */
071: public int getActiveToeCount() {
072: Thread[] toes = getToes();
073: int count = 0;
074: for (int i = 0; i < toes.length; i++) {
075: if ((toes[i] instanceof ToeThread)
076: && ((ToeThread) toes[i]).isActive()) {
077: count++;
078: }
079: }
080: return count;
081: }
082:
083: /**
084: * @return The number of ToeThreads. This may include killed ToeThreads
085: * that were not replaced.
086: */
087: public int getToeCount() {
088: Thread[] toes = getToes();
089: int count = 0;
090: for (int i = 0; i < toes.length; i++) {
091: if ((toes[i] instanceof ToeThread)) {
092: count++;
093: }
094: }
095: return count;
096: }
097:
098: private Thread[] getToes() {
099: Thread[] toes = new Thread[activeCount() + 10];
100: this .enumerate(toes);
101: return toes;
102: }
103:
104: /**
105: * Change the number of ToeThreads.
106: *
107: * @param newsize The new number of ToeThreads.
108: */
109: public void setSize(int newsize) {
110: targetSize = newsize;
111: int difference = newsize - getToeCount();
112: if (difference > 0) {
113: // must create threads
114: for (int i = 1; i <= difference; i++) {
115: startNewThread();
116: }
117: } else {
118: // must retire extra threads
119: int retainedToes = targetSize;
120: Thread[] toes = this .getToes();
121: for (int i = 0; i < toes.length; i++) {
122: if (!(toes[i] instanceof ToeThread)) {
123: continue;
124: }
125: retainedToes--;
126: if (retainedToes >= 0) {
127: continue; // this toe is spared
128: }
129: // otherwise:
130: ToeThread tt = (ToeThread) toes[i];
131: tt.retire();
132: }
133: }
134: }
135:
136: /**
137: * Kills specified thread. Killed thread can be optionally replaced with a
138: * new thread.
139: *
140: * <p><b>WARNING:</b> This operation should be used with great care. It may
141: * destabilize the crawler.
142: *
143: * @param threadNumber Thread to kill
144: * @param replace If true then a new thread will be created to take the
145: * killed threads place. Otherwise the total number of threads
146: * will decrease by one.
147: */
148: public void killThread(int threadNumber, boolean replace) {
149:
150: Thread[] toes = getToes();
151: for (int i = 0; i < toes.length; i++) {
152: if (!(toes[i] instanceof ToeThread)) {
153: continue;
154: }
155: ToeThread toe = (ToeThread) toes[i];
156: if (toe.getSerialNumber() == threadNumber) {
157: toe.kill();
158: }
159: }
160:
161: if (replace) {
162: // Create a new toe thread to take its place. Replace toe
163: startNewThread();
164: }
165: }
166:
167: private synchronized void startNewThread() {
168: ToeThread newThread = new ToeThread(this , nextSerialNumber++);
169: newThread.setPriority(DEFAULT_TOE_PRIORITY);
170: newThread.start();
171: }
172:
173: /**
174: * @return Instance of CrawlController.
175: */
176: public CrawlController getController() {
177: return controller;
178: }
179:
180: //
181: // Reporter implementation
182: //
183:
184: public static String STANDARD_REPORT = "standard";
185: public static String COMPACT_REPORT = "compact";
186: protected static String[] REPORTS = { STANDARD_REPORT,
187: COMPACT_REPORT };
188:
189: public String[] getReports() {
190: return REPORTS;
191: }
192:
193: public void reportTo(String name, PrintWriter writer) {
194: if (COMPACT_REPORT.equals(name)) {
195: compactReportTo(writer);
196: return;
197: }
198: if (name != null && !STANDARD_REPORT.equals(name)) {
199: writer.print(name);
200: writer.print(" not recognized: giving standard report/n");
201: }
202: standardReportTo(writer);
203: }
204:
205: /* (non-Javadoc)
206: * @see org.archive.util.Reporter#reportTo(java.io.Writer)
207: */
208: protected void standardReportTo(PrintWriter writer) {
209: writer.print("Toe threads report - "
210: + ArchiveUtils.get12DigitDate() + "\n");
211: writer
212: .print(" Job being crawled: "
213: + this .controller.getOrder()
214: .getCrawlOrderName() + "\n");
215: writer.print(" Number of toe threads in pool: " + getToeCount()
216: + " (" + getActiveToeCount() + " active)\n");
217:
218: Thread[] toes = this .getToes();
219: synchronized (toes) {
220: for (int i = 0; i < toes.length; i++) {
221: if (!(toes[i] instanceof ToeThread)) {
222: continue;
223: }
224: ToeThread tt = (ToeThread) toes[i];
225: if (tt != null) {
226: writer.print(" ToeThread #"
227: + tt.getSerialNumber() + "\n");
228: tt.reportTo(writer);
229: }
230: }
231: }
232: }
233:
234: /* (non-Javadoc)
235: * @see org.archive.util.Reporter#reportTo(java.io.Writer)
236: */
237: protected void compactReportTo(PrintWriter writer) {
238: writer.print(getToeCount() + " threads (" + getActiveToeCount()
239: + " active)\n");
240:
241: Thread[] toes = this .getToes();
242: boolean legendWritten = false;
243: // TODO: sort by activity: those with curi the longest at front
244: synchronized (toes) {
245: for (int i = 0; i < toes.length; i++) {
246: if (!(toes[i] instanceof ToeThread)) {
247: continue;
248: }
249: ToeThread tt = (ToeThread) toes[i];
250: if (tt != null) {
251: if (!legendWritten) {
252: writer.println(tt.singleLineLegend());
253: legendWritten = true;
254: }
255: tt.singleLineReportTo(writer);
256: }
257: }
258: }
259: }
260:
261: public void singleLineReportTo(PrintWriter w) {
262: Histotable<Object> ht = new Histotable<Object>();
263: Thread[] toes = getToes();
264: for (int i = 0; i < toes.length; i++) {
265:
266: if (!(toes[i] instanceof ToeThread)) {
267: continue;
268: }
269: ToeThread tt = (ToeThread) toes[i];
270: if (tt != null) {
271: ht.tally(tt.getStep());
272: }
273: }
274: TreeSet sorted = ht.getSortedByCounts();
275: w.print(getToeCount());
276: w.print(" threads: ");
277: w.print(Histotable.entryString(sorted.first()));
278: if (sorted.size() > 1) {
279: Iterator iter = sorted.iterator();
280: iter.next();
281: w.print("; ");
282: w.print(Histotable.entryString(iter.next()));
283: }
284: if (sorted.size() > 2) {
285: w.print("; etc...");
286: }
287: }
288:
289: /* (non-Javadoc)
290: * @see org.archive.util.Reporter#singleLineLegend()
291: */
292: public String singleLineLegend() {
293: return "total: mostCommonStateTotal secondMostCommonStateTotal";
294: }
295:
296: public String singleLineReport() {
297: return ArchiveUtils.singleLineReport(this );
298: }
299:
300: public void reportTo(PrintWriter writer) {
301: reportTo(null, writer);
302: }
303: }
|