001: // DNSResolver.java
002: // $Id: DNSResolver.java,v 1.5 2000/08/16 21:37:50 ylafon Exp $
003: // (c) COPYRIGHT MIT, INRIA and Keio, 1996-1999.
004: // Please first read the full copyright statement in file COPYRIGHT.html
005:
006: // a sample CLF/ELF log file name resolver post processing tool
007: // @author Yves Lafon <ylafon@w3.org>
008:
009: package org.w3c.tools.log;
010:
011: import java.util.Date;
012: import java.util.Hashtable;
013:
014: import java.io.BufferedReader;
015: import java.io.FileInputStream;
016: import java.io.FileOutputStream;
017: import java.io.IOException;
018: import java.io.ObjectInputStream;
019: import java.io.ObjectOutputStream;
020: import java.io.PrintStream;
021:
022: import java.net.InetAddress;
023: import java.net.UnknownHostException;
024:
025: import java.text.DateFormat;
026: import java.text.SimpleDateFormat;
027:
028: import org.w3c.util.ThreadCache;
029:
030: public class DNSResolver {
031:
032: /**
033: * the thread in charge of doing DNS resolution
034: * It works better if the getHostName() call is not blocking
035: * the whole JVM :)
036: */
037:
038: private class ResolverThread implements Runnable {
039: String line;
040: DateFormat dfp; // date format parser
041:
042: public void run() {
043: boolean ok;
044: String host;
045: DNSEntry entry;
046: String res;
047: String ip_str = line.substring(0, line.indexOf(' '));
048: byte a[] = ip_str.getBytes();
049:
050: if (resolve) {
051: ok = true;
052: for (int i = 0; ok && i < a.length; i++) {
053: if (a[i] == '.')
054: continue;
055: if (a[i] < '0' || a[i] > '9')
056: ok = false;
057: }
058: } else
059: ok = false;
060: if (!ok) {
061: try {
062: // probably a resolved line, print and continue :)
063: if (dfp != null) {
064: String date_str = line.substring(line
065: .indexOf('[') + 1, line.indexOf(']'));
066: long stamp = 0;
067: try {
068: stamp = dfp.parse(date_str).getTime();
069: } catch (Exception ex) {
070: // invalid date, should we skip? use 0 as a default...
071: }
072: // rewrite the log entry :)
073: synchronized (System.out) {
074: System.out.println(Long.toString(stamp)
075: + " " + line);
076: }
077: } else {
078: synchronized (System.out) {
079: System.out.println(line);
080: }
081: }
082: } catch (Exception parsex) {
083: // exit cleanly
084: }
085: return;
086: }
087: // ok so it is a REAL ip string :)
088: host = (String) badHosts.get(ip_str);
089: if (host == null) {
090: entry = (DNSEntry) hosts.get(ip_str);
091:
092: if (entry == null || !entry.isResolved()) {
093: try {
094: host = InetAddress.getByName(ip_str)
095: .getHostName();
096: if (host.equals(ip_str)) {
097: badHosts.put(ip_str, ip_str);
098: if (entry != null)
099: entry.notFound();
100: else {
101: entry = new DNSEntry(ip_str, false);
102: hosts.put(ip_str, entry);
103: }
104: } else {
105: if (entry != null)
106: entry.setHost(host);
107: else {
108: entry = new DNSEntry(host);
109: hosts.put(ip_str, entry);
110: }
111: }
112: } catch (UnknownHostException uhe) {
113: host = ip_str;
114: badHosts.put(ip_str, ip_str);
115: if (entry != null)
116: entry.notFound();
117: else {
118: entry = new DNSEntry(ip_str, false);
119: hosts.put(ip_str, entry);
120: }
121: }
122: } else
123: host = entry.host;
124: }
125:
126: // ok, now we have the host :)
127: res = line.substring(line.indexOf(' '));
128: if (dfp != null) {
129: // and add the timestamp!
130: String date_str = res.substring(res.indexOf('[') + 1,
131: res.indexOf(']'));
132: long stamp = 0;
133: try {
134: stamp = dfp.parse(date_str).getTime();
135: } catch (Exception ex) {
136: // invalid date, should we skip? use 0 as a default...
137: }
138: // rewrite the log entry :)
139: synchronized (System.out) {
140: System.out.println(Long.toString(stamp) + " "
141: + host + res);
142: }
143: } else {
144: // rewrite the log entry :)
145: synchronized (System.out) {
146: System.out.println(host + res);
147: }
148: }
149: }
150:
151: /**
152: * create a new resolver thread, with the full ECLF entry
153: */
154: ResolverThread(String line, boolean timestamp) {
155: this .line = line;
156: dfp = (timestamp) ? new SimpleDateFormat(
157: "dd/MMM/yyyy:HH:mm:ss z") : null;
158: }
159: }
160:
161: private BufferedReader bf;
162: private Hashtable hosts;
163: private Hashtable badHosts;
164: private ThreadCache threadCache;
165: private boolean timestamp;
166: private boolean resolve;
167: private int cacheSize;
168:
169: /**
170: * the main loop, works on the reader provided at initialization
171: */
172:
173: public void readLog() {
174: String read;
175: boolean ok, done;
176: int pos, qpos;
177: String host;
178: String pass_date;
179: String request;
180: int resp_code;
181: int resp_size;
182: String referer = null;
183: String user_agent = null;
184: String res;
185: String tmp;
186: DNSEntry entry;
187:
188: try {
189: while ((read = bf.readLine()) != null) {
190: done = false;
191: if (read.length() < 40) { // remove bad lines
192: continue;
193: }
194: ResolverThread rt = new ResolverThread(read, timestamp);
195: if (!threadCache.getThread(rt, true)) {
196: System.err
197: .println("*** unable to process :" + read);
198: }
199: }
200: threadCache.waitForCompletion();
201: // save the hashtable in a file
202: FileOutputStream fileOut = null;
203: try {
204: fileOut = new FileOutputStream("dns.oj");
205: ObjectOutputStream out = null;
206: try {
207: out = new ObjectOutputStream(fileOut);
208: out.writeObject(hosts);
209: } catch (Exception e) {
210: } finally {
211: try {
212: out.close();
213: } catch (Exception e) {
214: }
215: }
216: } catch (Exception e) {
217: } finally {
218: try {
219: fileOut.close();
220: } catch (Exception e) {
221: }
222: }
223:
224: } catch (IOException ex) {
225: }
226: }
227:
228: /**
229: * create a new Resovler engine
230: * @param bf, a buffered reader, the log source
231: * @param cacheSize the number of threads used to do resolution
232: * @param timestamp if set, the resolver will add a numeric timestamp
233: * useful to sort mixed entries
234: * @param resolve if set, it will do DNS resolution of the entry
235: */
236:
237: public DNSResolver(BufferedReader bf, int cacheSize,
238: boolean timestamp, boolean resolve) {
239: this .bf = bf;
240: this .resolve = resolve;
241: this .badHosts = new Hashtable(201);
242: this .timestamp = timestamp;
243: this .cacheSize = cacheSize;
244:
245: // load the hashtable from a file.
246: FileInputStream fileIn = null;
247: try {
248: fileIn = new FileInputStream("dns.oj");
249: ObjectInputStream in = null;
250: try {
251: in = new ObjectInputStream(fileIn);
252: hosts = (Hashtable) in.readObject();
253: } catch (Exception e) {
254: hosts = new Hashtable(1001);
255: } finally {
256: try {
257: in.close();
258: } catch (Exception e) {
259: }
260: }
261: } catch (Exception e) {
262: hosts = new Hashtable(1001);
263: } finally {
264: try {
265: fileIn.close();
266: } catch (Exception e) {
267: }
268: }
269: threadCache = new ThreadCache("resolver");
270: threadCache.setThreadPriority(5);
271: threadCache.setCachesize(cacheSize);
272: threadCache.initialize();
273: }
274:
275: public DNSResolver(BufferedReader bf, int cacheSize) {
276: this (bf, cacheSize, false, true);
277: }
278:
279: public DNSResolver(BufferedReader bf, boolean timestamp,
280: boolean resolve) {
281: this (bf, 50, timestamp, resolve);
282: }
283:
284: public DNSResolver(BufferedReader bf) {
285: this (bf, 50, false, true);
286: }
287: }
|