001: /* Copyright (C) 2003 Internet Archive.
002: *
003: * This file is part of the Heritrix web crawler (crawler.archive.org).
004: *
005: * Heritrix is free software; you can redistribute it and/or modify
006: * it under the terms of the GNU Lesser Public License as published by
007: * the Free Software Foundation; either version 2.1 of the License, or
008: * any later version.
009: *
010: * Heritrix is distributed in the hope that it will be useful,
011: * but WITHOUT ANY WARRANTY; without even the implied warranty of
012: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
013: * GNU Lesser Public License for more details.
014: *
015: * You should have received a copy of the GNU Lesser Public License
016: * along with Heritrix; if not, write to the Free Software
017: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
018: *
019: * CrawlHost.java
020: * Created on Aug 5, 2003
021: *
022: * $Header$
023: */
024: package org.archive.crawler.datamodel;
025:
026: import java.io.Serializable;
027: import java.net.InetAddress;
028: import java.util.logging.Level;
029: import java.util.logging.Logger;
030:
031: import org.archive.util.InetAddressUtil;
032:
033: /**
034: * Represents a single remote "host".
035: *
036: * An host is a name for which there is a dns record or an IP-address. This
037: * might be a machine or a virtual host.
038: *
039: * @author gojomo
040: */
041: public class CrawlHost implements Serializable,
042: CrawlSubstats.HasCrawlSubstats {
043:
044: private static final long serialVersionUID = -5494573967890942895L;
045:
046: private static final Logger logger = Logger
047: .getLogger(CrawlHost.class.getName());
048: /** Flag value indicating always-valid IP */
049: public static final long IP_NEVER_EXPIRES = -1;
050: /** Flag value indicating an IP has not yet been looked up */
051: public static final long IP_NEVER_LOOKED_UP = -2;
052: private String hostname;
053: private String countryCode;
054: private InetAddress ip;
055: private long ipFetched = IP_NEVER_LOOKED_UP;
056: protected CrawlSubstats substats = new CrawlSubstats();
057: /**
058: * TTL gotten from dns record.
059: *
060: * From rfc2035:
061: * <pre>
062: * TTL a 32 bit unsigned integer that specifies the time
063: * interval (in seconds) that the resource record may be
064: * cached before it should be discarded. Zero values are
065: * interpreted to mean that the RR can only be used for the
066: * transaction in progress, and should not be cached.
067: * </pre>
068: */
069: private long ipTTL = IP_NEVER_LOOKED_UP;
070:
071: // Used when bandwith constraint are used
072: private long earliestNextURIEmitTime = 0;
073:
074: /**
075: * Create a new CrawlHost object.
076: *
077: * @param hostname the host name for this host.
078: */
079: public CrawlHost(String hostname) {
080: this (hostname, null);
081: }
082:
083: /**
084: * Create a new CrawlHost object.
085: *
086: * @param hostname the host name for this host.
087: * @param countryCode the country code for this host.
088: */
089: public CrawlHost(String hostname, String countryCode) {
090: this .hostname = hostname;
091: this .countryCode = countryCode;
092: InetAddress tmp = InetAddressUtil.getIPHostAddress(hostname);
093: if (tmp != null) {
094: setIP(tmp, IP_NEVER_EXPIRES);
095: }
096: }
097:
098: /** Return true if the IP for this host has been looked up.
099: *
100: * Returns true even if the lookup failed.
101: *
102: * @return true if the IP for this host has been looked up.
103: */
104: public boolean hasBeenLookedUp() {
105: return ipFetched != IP_NEVER_LOOKED_UP;
106: }
107:
108: /**
109: * Set the IP address for this host.
110: *
111: * @param address
112: * @param ttl the TTL from the dns record in seconds or -1 if it should live
113: * forever (is a numeric IP).
114: */
115: public void setIP(InetAddress address, long ttl) {
116: this .ip = address;
117: // Assume that a lookup as occurred by the time
118: // a caller decides to set this (even to null)
119: this .ipFetched = System.currentTimeMillis();
120: this .ipTTL = ttl;
121: if (logger.isLoggable(Level.FINE)) {
122: logger
123: .fine(hostname
124: + ": "
125: + ((address != null) ? address.toString()
126: : "null"));
127: }
128: }
129:
130: /** Get the IP address for this host.
131: *
132: * @return the IP address for this host.
133: */
134: public InetAddress getIP() {
135: return ip;
136: }
137:
138: /** Get the time when the IP address for this host was last looked up.
139: *
140: * @return the time when the IP address for this host was last looked up.
141: */
142: public long getIpFetched() {
143: return ipFetched;
144: }
145:
146: /**
147: * Get the TTL value from the dns record for this host.
148: *
149: * @return the TTL value from the dns record for this host -- in seconds --
150: * or -1 if this lookup should be valid forever (numeric ip).
151: */
152: public long getIpTTL() {
153: return this .ipTTL;
154: }
155:
156: public String toString() {
157: return "CrawlHost<" + hostname + "(ip:" + ip + ")>";
158: }
159:
160: /**
161: * Get the host name.
162: * @return Returns the host name.
163: */
164: public String getHostName() {
165: return hostname;
166: }
167:
168: /**
169: * Get the earliest time a URI for this host could be emitted.
170: * This only has effect if constraints on bandwidth per host is set.
171: *
172: * @return Returns the earliestNextURIEmitTime.
173: */
174: public long getEarliestNextURIEmitTime() {
175: return earliestNextURIEmitTime;
176: }
177:
178: /**
179: * Set the earliest time a URI for this host could be emitted.
180: * This only has effect if constraints on bandwidth per host is set.
181: *
182: * @param earliestNextURIEmitTime The earliestNextURIEmitTime to set.
183: */
184: public void setEarliestNextURIEmitTime(long earliestNextURIEmitTime) {
185: this .earliestNextURIEmitTime = earliestNextURIEmitTime;
186: }
187:
188: /**
189: * Get country code of this host
190: *
191: * @return Retruns country code or null if not availabe
192: */
193: public String getCountryCode() {
194: return countryCode;
195: }
196:
197: /**
198: * Set country code for this hos
199: *
200: * @param countryCode The country code of this host
201: */
202: public void setCountryCode(String countryCode) {
203: this .countryCode = countryCode;
204: }
205:
206: /* (non-Javadoc)
207: * @see org.archive.crawler.datamodel.CrawlSubstats.HasCrawlSubstats#getSubstats()
208: */
209: public CrawlSubstats getSubstats() {
210: return substats;
211: }
212: }
|