001: /*
002: * TimespanCriteria
003: *
004: * $Id: TimespanCriteria.java 3704 2005-07-18 17:30:21Z stack-sf $
005: *
006: * Created on Apr 8, 2004
007: *
008: * Copyright (C) 2004 Internet Archive.
009: *
010: * This file is part of the Heritrix web crawler (crawler.archive.org).
011: *
012: * Heritrix is free software; you can redistribute it and/or modify it under the
013: * terms of the GNU Lesser Public License as published by the Free Software
014: * Foundation; either version 2.1 of the License, or any later version.
015: *
016: * Heritrix is distributed in the hope that it will be useful, but WITHOUT ANY
017: * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
018: * A PARTICULAR PURPOSE. See the GNU Lesser Public License for more details.
019: *
020: * You should have received a copy of the GNU Lesser Public License along with
021: * Heritrix; if not, write to the Free Software Foundation, Inc., 59 Temple
022: * Place, Suite 330, Boston, MA 02111-1307 USA
023: */
024: package org.archive.crawler.settings.refinements;
025:
026: import java.text.DateFormat;
027: import java.text.ParseException;
028: import java.text.SimpleDateFormat;
029: import java.util.Date;
030: import java.util.TimeZone;
031:
032: import org.archive.net.UURI;
033:
034: /**
035: * A refinement criteria that checks if a URI is requested within a specific
036: * time frame. <p/>
037: *
038: * The timeframe's resolution is minutes and always operates in 24h GMT. The
039: * format is <code>hhmm</code>, exmaples:
040: * <p>
041: * <code> 1200</code> for noon GMT <br>
042: * <code> 1805</code> for 5 minutes past six in the afternoon GMT.
043: *
044: * @author John Erik Halse
045: */
046: public class TimespanCriteria implements Criteria {
047:
048: private static DateFormat timeFormat;
049: static {
050: final TimeZone TZ = TimeZone.getTimeZone("GMT");
051: timeFormat = new SimpleDateFormat("HHmm");
052: timeFormat.setTimeZone(TZ);
053: }
054:
055: private Date from;
056:
057: private Date to;
058:
059: /**
060: * Create a new instance of TimespanCriteria.
061: *
062: * @param from start of the time frame (inclusive).
063: * @param to end of the time frame (inclusive).
064: * @throws ParseException
065: */
066: public TimespanCriteria(String from, String to)
067: throws ParseException {
068: setFrom(from);
069: setTo(to);
070: }
071:
072: public boolean isWithinRefinementBounds(UURI uri) {
073: try {
074: Date now = timeFormat.parse(timeFormat.format(new Date()));
075: if (from.before(to)) {
076: if (now.getTime() >= from.getTime()
077: && now.getTime() <= to.getTime()) {
078: return true;
079: }
080: } else {
081: if (!(now.getTime() > to.getTime() && now.getTime() < from
082: .getTime())) {
083: return true;
084: }
085: }
086: } catch (ParseException e) {
087: // Should never happen since we are only parsing system time at
088: // this place.
089: e.printStackTrace();
090: }
091:
092: return false;
093: }
094:
095: /**
096: * Get the beginning of the time frame to check against.
097: *
098: * @return Returns the from.
099: */
100: public String getFrom() {
101: return timeFormat.format(from);
102: }
103:
104: /**
105: * Set the beginning of the time frame to check against.
106: *
107: * @param from The from to set.
108: * @throws ParseException
109: */
110: public void setFrom(String from) throws ParseException {
111: this .from = timeFormat.parse(from);
112: }
113:
114: /**
115: * Get the end of the time frame to check against.
116: *
117: * @return Returns the to.
118: */
119: public String getTo() {
120: return timeFormat.format(to);
121: }
122:
123: /**
124: * Set the end of the time frame to check against.
125: *
126: * @param to The to to set.
127: * @throws ParseException
128: */
129: public void setTo(String to) throws ParseException {
130: this .to = timeFormat.parse(to);
131: }
132:
133: public boolean equals(Object o) {
134: if (o instanceof TimespanCriteria) {
135: TimespanCriteria other = (TimespanCriteria) o;
136: if (this .from.equals(other.from)
137: && this .to.equals(other.to)) {
138: return true;
139: }
140: }
141: return false;
142: }
143:
144: /* (non-Javadoc)
145: * @see org.archive.crawler.settings.refinements.Criteria#getName()
146: */
147: public String getName() {
148: return "Time of day criteria";
149: }
150:
151: /* (non-Javadoc)
152: * @see org.archive.crawler.settings.refinements.Criteria#getDescription()
153: */
154: public String getDescription() {
155: return "Accept any URIs between the hours of " + getFrom()
156: + "(GMT) and " + getTo() + "(GMT) each day.";
157: }
158: }
|