001: /*
002: * <copyright>
003: *
004: * Copyright 1997-2004 BBNT Solutions, LLC
005: * under sponsorship of the Defense Advanced Research Projects
006: * Agency (DARPA).
007: *
008: * You can redistribute this software and/or modify it under the
009: * terms of the Cougaar Open Source License as published on the
010: * Cougaar Open Source Website (www.cougaar.org).
011: *
012: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
013: * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
014: * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
015: * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
016: * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
017: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
018: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
019: * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
020: * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
021: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
022: * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
023: *
024: * </copyright>
025: */
026:
027: package org.cougaar.core.thread;
028:
029: import java.util.TimerTask;
030:
031: import org.cougaar.core.component.ServiceBroker;
032: import org.cougaar.core.node.NodeControlService;
033: import org.cougaar.core.service.LoggingService;
034: import org.cougaar.core.service.ThreadControlService;
035:
036: /**
037: * This plugin periodically scans all the schedulable and prints out
038: * error messages if they are taking a long time to complete. Error
039: * messages are printed when a schedulable is holding a pooled thread
040: * (in run state) at 10, 30, 100, 300 and 1000 seconds.
041: *
042: * This is designed to be a Node-level plugin.
043: */
044: final class RogueThreadDetector extends TimerTask implements
045: ThreadStatusService.Body {
046: private ThreadStatusService statusService;
047: private ThreadControlService controlService;
048: private LoggingService loggingService;
049: private long samplePeriod;
050: private long[] limits;
051: private int warnTime;
052: private int infoTime;
053:
054: RogueThreadDetector(ServiceBroker sb, long samplePeriod) {
055: this .samplePeriod = samplePeriod;
056:
057: // Less ugly than writing a proper math function (really).
058: long too_long = samplePeriod * 2;
059: limits = new long[7];
060: limits[0] = too_long;
061: limits[1] = too_long * 3;
062: limits[2] = too_long * 10;
063: limits[3] = too_long * 30;
064: limits[4] = too_long * 100;
065: limits[5] = too_long * 300;
066: limits[6] = too_long * 1000;
067:
068: NodeControlService ncs = (NodeControlService) sb.getService(
069: this , NodeControlService.class, null);
070:
071: if (ncs == null) {
072: throw new RuntimeException("Unable to obtain service");
073: }
074:
075: ServiceBroker rootsb = ncs.getRootServiceBroker();
076:
077: loggingService = (LoggingService) sb.getService(this ,
078: LoggingService.class, null);
079:
080: statusService = (ThreadStatusService) rootsb.getService(this ,
081: ThreadStatusService.class, null);
082: controlService = (ThreadControlService) rootsb.getService(this ,
083: ThreadControlService.class, null);
084: if (statusService == null) {
085: throw new RuntimeException("Unable to obtain service");
086: }
087: }
088:
089: void setWarnTime(int warnTime) {
090: this .warnTime = warnTime;
091: }
092:
093: void setInfoTime(int infoTime) {
094: this .infoTime = infoTime;
095: }
096:
097: private boolean timeToLog(long deltaT) {
098: for (int i = 0; i < limits.length; i++) {
099: long lowerBound = limits[i];
100: if (deltaT < lowerBound)
101: return false;
102: long upperBound = lowerBound + samplePeriod;
103: if (deltaT < upperBound)
104: return true;
105: }
106: return false;
107: }
108:
109: private String warningMessage(String scheduler,
110: Schedulable schedulable, long elapsed) {
111: int blocking_type = schedulable.getBlockingType();
112: String blocking_excuse = schedulable.getBlockingExcuse();
113: String b_string = SchedulableStatus.statusString(blocking_type,
114: blocking_excuse);
115: return "Schedulable running for too long: Millisec=" + elapsed
116: + " Level=" + scheduler + " Schedulable="
117: + schedulable.getName() + " Client="
118: + schedulable.getConsumer() + " Blocking=" + b_string;
119:
120: }
121:
122: int running = 0;
123: int queued = 0;
124:
125: public void run(String scheduler, Schedulable schedulable) {
126: int state = schedulable.getState();
127: if (state == CougaarThread.THREAD_RUNNING) {
128: running++;
129: long elapsed = System.currentTimeMillis()
130: - schedulable.getTimestamp();
131: if (loggingService.isWarnEnabled() && timeToLog(elapsed)) {
132: if (elapsed >= warnTime)
133: loggingService.warn(warningMessage(scheduler,
134: schedulable, elapsed));
135: else if (loggingService.isInfoEnabled()
136: && elapsed >= infoTime)
137: loggingService.info(warningMessage(scheduler,
138: schedulable, elapsed));
139: }
140: } else if (state == CougaarThread.THREAD_PENDING) {
141: queued++;
142: }
143:
144: }
145:
146: public void run() {
147: running = 0;
148: queued = 0;
149: statusService.iterateOverStatus(this );
150:
151: if (controlService != null && loggingService.isInfoEnabled()) {
152: int max = controlService.maxRunningThreadCount();
153: if (running >= max || queued >= 1) {
154: // running can be > max because the construction of
155: // the status list isn't synchronized.
156: loggingService
157: .info("ThreadService is using all the pooled threads: running="
158: + running + " queued=" + queued);
159: }
160: }
161:
162: }
163: }
|