001: // /xml.queues/indexing_p.java
002: // -------------------------------
003: // part of the AnomicHTTPD caching proxy
004: // (C) by Michael Peter Christen; mc@anomic.de
005: // first published on http://www.anomic.de
006: // Frankfurt, Germany, 2004, 2005
007: // last major change: 28.10.2005
008: // this file is contributed by Alexander Schier
009: //
010: // This program is free software; you can redistribute it and/or modify
011: // it under the terms of the GNU General Public License as published by
012: // the Free Software Foundation; either version 2 of the License, or
013: // (at your option) any later version.
014: //
015: // This program is distributed in the hope that it will be useful,
016: // but WITHOUT ANY WARRANTY; without even the implied warranty of
017: // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
018: // GNU General Public License for more details.
019: //
020: // You should have received a copy of the GNU General Public License
021: // along with this program; if not, write to the Free Software
022: // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
023: //
024: // Using this software in any meaning (reading, learning, copying, compiling,
025: // running) means that you agree that the Author(s) is (are) not responsible
026: // for cost, loss of data or any harm that may be caused directly or indirectly
027: // by usage of this softare or this documentation. The usage of this software
028: // is on your own risk. The installation and usage (starting/running) of this
029: // software may allow other people or application to access your computer and
030: // any attached devices and is highly dependent on the configuration of the
031: // software which must be done by the user of the software; the author(s) is
032: // (are) also not responsible for proper configuration and usage of the
033: // software, even if provoked by documentation provided together with
034: // the software.
035: //
036: // Any changes to this file according to the GPL as documented in the file
037: // gpl.txt aside this file in the shipment you received can be done to the
038: // lines that follows this copyright notice here, but changes must not be
039: // done inside the copyright notive above. A re-distribution must contain
040: // the intact and unchanged copyright notice.
041: // Contributions and changes to the program code must be marked as such.
042:
043: // You must compile this file with
044: // javac -classpath .:../classes IndexCreate_p.java
045: // if the shell's current path is HTROOT
046:
047: //package xml.queues;
048: package xml;
049:
050: import java.text.SimpleDateFormat;
051: import java.util.ArrayList;
052: import java.util.Date;
053: import java.util.Iterator;
054: import java.util.Locale;
055:
056: import de.anomic.http.httpHeader;
057: import de.anomic.plasma.plasmaCrawlEntry;
058: import de.anomic.plasma.plasmaCrawlNURL;
059: import de.anomic.plasma.plasmaSwitchboard;
060: import de.anomic.plasma.plasmaSwitchboardQueue;
061: import de.anomic.server.serverObjects;
062: import de.anomic.server.serverSwitch;
063: import de.anomic.yacy.yacyCore;
064: import de.anomic.yacy.yacySeed;
065:
066: public class queues_p {
067:
068: public static final String STATE_RUNNING = "running";
069: public static final String STATE_PAUSED = "paused";
070:
071: private static SimpleDateFormat dayFormatter = new SimpleDateFormat(
072: "yyyy/MM/dd", Locale.US);
073:
074: private static String daydate(Date date) {
075: if (date == null)
076: return "";
077: return dayFormatter.format(date);
078: }
079:
080: public static serverObjects respond(httpHeader header,
081: serverObjects post, serverSwitch env) {
082: // return variable that accumulates replacements
083: plasmaSwitchboard sb = (plasmaSwitchboard) env;
084: //wikiCode wikiTransformer = new wikiCode(switchboard);
085: serverObjects prop = new serverObjects();
086: if (post == null || !post.containsKey("html"))
087: prop.setLocalized(false);
088: prop.put("rejected", "0");
089: //int showRejectedCount = 10;
090:
091: yacySeed initiator;
092:
093: //indexing queue
094: prop.putNum("indexingSize", sb.getThread(
095: plasmaSwitchboard.INDEXER).getJobCount()
096: + sb.indexingTasksInProcess.size());
097: prop.putNum("indexingMax", (int) sb.getConfigLong(
098: plasmaSwitchboard.INDEXER_SLOTS, 30));
099: prop.putNum("urlpublictextSize", sb.wordIndex.loadedURL.size());
100: prop.putNum("rwipublictextSize", sb.wordIndex.size());
101: if ((sb.sbQueue.size() == 0)
102: && (sb.indexingTasksInProcess.size() == 0)) {
103: prop.put("list", "0"); //is empty
104: } else {
105: plasmaSwitchboardQueue.Entry pcentry;
106: int inProcessCount = 0;
107: long totalSize = 0;
108: int i = 0; //counter
109: ArrayList<plasmaSwitchboardQueue.Entry> entryList = new ArrayList<plasmaSwitchboardQueue.Entry>();
110:
111: // getting all entries that are currently in process
112: synchronized (sb.indexingTasksInProcess) {
113: inProcessCount = sb.indexingTasksInProcess.size();
114: entryList.addAll(sb.indexingTasksInProcess.values());
115: }
116:
117: // getting all enqueued entries
118: if ((sb.sbQueue.size() > 0)) {
119: Iterator<plasmaSwitchboardQueue.Entry> i1 = sb.sbQueue
120: .entryIterator(false);
121: while (i1.hasNext())
122: entryList.add(i1.next());
123: }
124:
125: int size = (post == null) ? entryList.size() : post.getInt(
126: "num", entryList.size());
127: if (size > entryList.size())
128: size = entryList.size();
129:
130: int ok = 0;
131: for (i = 0; i < size; i++) {
132: boolean inProcess = i < inProcessCount;
133: pcentry = (plasmaSwitchboardQueue.Entry) entryList
134: .get(i);
135: if ((pcentry != null) && (pcentry.url() != null)) {
136: long entrySize = pcentry.size();
137: totalSize += entrySize;
138: initiator = yacyCore.seedDB.getConnected(pcentry
139: .initiator());
140: prop.put("list-indexing_" + i + "_profile",
141: (pcentry.profile() != null) ? pcentry
142: .profile().name() : "deleted");
143: prop.put("list-indexing_" + i + "_initiator",
144: ((initiator == null) ? "proxy" : initiator
145: .getName()));
146: prop.put("list-indexing_" + i + "_depth", pcentry
147: .depth());
148: prop.put("list-indexing_" + i + "_modified",
149: pcentry.getModificationDate());
150: prop.putHTML("list-indexing_" + i + "_anchor",
151: (pcentry.anchorName() == null) ? ""
152: : pcentry.anchorName(), true);
153: prop.putHTML("list-indexing_" + i + "_url", pcentry
154: .url().toNormalform(false, true), true);
155: prop.putNum("list-indexing_" + i + "_size",
156: entrySize);
157: prop.put("list-indexing_" + i + "_inProcess",
158: (inProcess) ? "1" : "0");
159: prop.put("list-indexing_" + i + "_hash", pcentry
160: .urlHash());
161: ok++;
162: }
163: }
164: prop.put("list-indexing", ok);
165: }
166:
167: //loader queue
168: prop.put("loaderSize", Integer.toString(sb.crawlQueues.size()));
169: prop.put("loaderMax", sb.getConfig(
170: plasmaSwitchboard.CRAWLER_THREADS_ACTIVE_MAX, "10"));
171: if (sb.crawlQueues.size() == 0) {
172: prop.put("list-loader", "0");
173: } else {
174: plasmaCrawlEntry[] w = sb.crawlQueues.activeWorker();
175: int count = 0;
176: for (int i = 0; i < w.length; i++) {
177: if (w[i] == null)
178: continue;
179: prop.put("list-loader_" + count + "_profile", w[i]
180: .profileHandle());
181: initiator = yacyCore.seedDB.getConnected(w[i]
182: .initiator());
183: prop.put("list-loader_" + count + "_initiator",
184: ((initiator == null) ? "proxy" : initiator
185: .getName()));
186: prop.put("list-loader_" + count + "_depth", w[i]
187: .depth());
188: prop.putHTML("list-loader_" + count + "_url", w[i]
189: .url().toString(), true);
190: count++;
191: }
192: prop.put("list-loader", count);
193: }
194:
195: //local crawl queue
196: prop.putNum("localCrawlSize", Integer.toString(sb.getThread(
197: plasmaSwitchboard.CRAWLJOB_LOCAL_CRAWL).getJobCount()));
198: prop
199: .put(
200: "localCrawlState",
201: sb
202: .crawlJobIsPaused(plasmaSwitchboard.CRAWLJOB_LOCAL_CRAWL) ? STATE_PAUSED
203: : STATE_RUNNING);
204: int stackSize = sb.crawlQueues.noticeURL
205: .stackSize(plasmaCrawlNURL.STACK_TYPE_CORE);
206: addNTable(prop, "list-local", sb.crawlQueues.noticeURL.top(
207: plasmaCrawlNURL.STACK_TYPE_CORE, Math
208: .min(10, stackSize)));
209:
210: //global crawl queue
211: prop.putNum("limitCrawlSize", Integer.toString(sb.crawlQueues
212: .limitCrawlJobSize()));
213: prop.put("limitCrawlState", STATE_RUNNING);
214: stackSize = sb.crawlQueues.noticeURL
215: .stackSize(plasmaCrawlNURL.STACK_TYPE_LIMIT);
216:
217: //global crawl queue
218: prop.putNum("remoteCrawlSize", Integer.toString(sb.getThread(
219: plasmaSwitchboard.CRAWLJOB_REMOTE_TRIGGERED_CRAWL)
220: .getJobCount()));
221: prop
222: .put(
223: "remoteCrawlState",
224: sb
225: .crawlJobIsPaused(plasmaSwitchboard.CRAWLJOB_REMOTE_TRIGGERED_CRAWL) ? STATE_PAUSED
226: : STATE_RUNNING);
227: stackSize = sb.crawlQueues.noticeURL
228: .stackSize(plasmaCrawlNURL.STACK_TYPE_LIMIT);
229:
230: if (stackSize == 0) {
231: prop.put("list-remote", "0");
232: } else {
233: addNTable(prop, "list-remote", sb.crawlQueues.noticeURL
234: .top(plasmaCrawlNURL.STACK_TYPE_LIMIT, Math.min(10,
235: stackSize)));
236: }
237:
238: // return rewrite properties
239: return prop;
240: }
241:
242: public static final void addNTable(serverObjects prop,
243: String tableName, plasmaCrawlEntry[] crawlerList) {
244:
245: int showNum = 0;
246: plasmaCrawlEntry urle;
247: yacySeed initiator;
248: for (int i = 0; i < crawlerList.length; i++) {
249: urle = crawlerList[i];
250: if ((urle != null) && (urle.url() != null)) {
251: initiator = yacyCore.seedDB.getConnected(urle
252: .initiator());
253: prop.put(tableName + "_" + showNum + "_profile", urle
254: .profileHandle());
255: prop.put(tableName + "_" + showNum + "_initiator",
256: ((initiator == null) ? "proxy" : initiator
257: .getName()));
258: prop.put(tableName + "_" + showNum + "_depth", urle
259: .depth());
260: prop.put(tableName + "_" + showNum + "_modified",
261: daydate(urle.loaddate()));
262: prop.putHTML(tableName + "_" + showNum + "_anchor",
263: urle.name(), true);
264: prop.putHTML(tableName + "_" + showNum + "_url", urle
265: .url().toNormalform(false, true), true);
266: prop.put(tableName + "_" + showNum + "_hash", urle
267: .url().hash());
268: showNum++;
269: }
270: }
271: prop.put(tableName, showNum);
272:
273: }
274: }
|