001: /* PortnumberCriteria
002: *
003: * $Id: PortnumberCriteria.java 3704 2005-07-18 17:30:21Z stack-sf $
004: *
005: * Created on Apr 8, 2004
006: *
007: * Copyright (C) 2004 Internet Archive.
008: *
009: * This file is part of the Heritrix web crawler (crawler.archive.org).
010: *
011: * Heritrix is free software; you can redistribute it and/or modify
012: * it under the terms of the GNU Lesser Public License as published by
013: * the Free Software Foundation; either version 2.1 of the License, or
014: * any later version.
015: *
016: * Heritrix is distributed in the hope that it will be useful,
017: * but WITHOUT ANY WARRANTY; without even the implied warranty of
018: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
019: * GNU Lesser Public License for more details.
020: *
021: * You should have received a copy of the GNU Lesser Public License
022: * along with Heritrix; if not, write to the Free Software
023: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
024: */
025: package org.archive.crawler.settings.refinements;
026:
027: import org.archive.net.UURI;
028:
029: /**
030: * A refinement criterion that checks if a URI matches a specific port number.
031: * <p/>
032: * If the port number is not known it will try to use the default port number
033: * for the URI's scheme.
034: *
035: * @author John Erik Halse
036: */
037: public class PortnumberCriteria implements Criteria {
038: private int portNumber = 0;
039:
040: /**
041: * Create a new instance of PortnumberCriteria.
042: */
043: public PortnumberCriteria() {
044: super ();
045: }
046:
047: /**
048: * Create a new instance of PortnumberCriteria.
049: *
050: * @param portNumber the port number for this criteria.
051: */
052: public PortnumberCriteria(String portNumber) {
053: setPortNumber(portNumber);
054: }
055:
056: /* (non-Javadoc)
057: * @see org.archive.crawler.settings.refinements.Criteria#isWithinRefinementBounds(org.archive.crawler.datamodel.UURI, int)
058: */
059: public boolean isWithinRefinementBounds(UURI uri) {
060: int port = uri.getPort();
061: if (port < 0) {
062: if (uri.getScheme().equals("http")) {
063: port = 80;
064: } else if (uri.getScheme().equals("https")) {
065: port = 443;
066: }
067: }
068:
069: return (port == portNumber) ? true : false;
070: }
071:
072: /**
073: * Get the port number that is to be checked against a URI.
074: *
075: * @return Returns the portNumber.
076: */
077: public String getPortNumber() {
078: return String.valueOf(portNumber);
079: }
080:
081: /**
082: * Set the port number that is to be checked against a URI.
083: *
084: * @param portNumber The portNumber to set.
085: */
086: public void setPortNumber(String portNumber) {
087: this .portNumber = Integer.parseInt(portNumber);
088: }
089:
090: /* (non-Javadoc)
091: * @see org.archive.crawler.settings.refinements.Criteria#getName()
092: */
093: public String getName() {
094: return "Port number criteria";
095: }
096:
097: /* (non-Javadoc)
098: * @see org.archive.crawler.settings.refinements.Criteria#getDescription()
099: */
100: public String getDescription() {
101: return "Accept URIs on port " + getPortNumber();
102: }
103: }
|