001: /* Credential
002: *
003: * Created on Apr 1, 2004
004: *
005: * Copyright (C) 2004 Internet Archive.
006: *
007: * This file is part of the Heritrix web crawler (crawler.archive.org).
008: *
009: * Heritrix is free software; you can redistribute it and/or modify
010: * it under the terms of the GNU Lesser Public License as published by
011: * the Free Software Foundation; either version 2.1 of the License, or
012: * any later version.
013: *
014: * Heritrix is distributed in the hope that it will be useful,
015: * but WITHOUT ANY WARRANTY; without even the implied warranty of
016: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
017: * GNU Lesser Public License for more details.
018: *
019: * You should have received a copy of the GNU Lesser Public License
020: * along with Heritrix; if not, write to the Free Software
021: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
022: */
023: package org.archive.crawler.datamodel.credential;
024:
025: import java.util.Iterator;
026: import java.util.Set;
027: import java.util.logging.Logger;
028:
029: import javax.management.Attribute;
030: import javax.management.AttributeNotFoundException;
031: import javax.management.InvalidAttributeValueException;
032:
033: import org.apache.commons.httpclient.HttpClient;
034: import org.apache.commons.httpclient.HttpMethod;
035: import org.archive.crawler.datamodel.CrawlURI;
036: import org.archive.crawler.framework.CrawlController;
037: import org.archive.crawler.settings.CrawlerSettings;
038: import org.archive.crawler.settings.ModuleType;
039: import org.archive.crawler.settings.SimpleType;
040: import org.archive.crawler.settings.Type;
041:
042: /**
043: * Credential type.
044: *
045: * Let this be also a credential in the JAAS sense to in that this is what
046: * gets added to a subject on successful authentication since it contains
047: * data needed to authenticate (realm, login, password, etc.).
048: *
049: * <p>Settings system assumes that subclasses implement a constructor that
050: * takes a name only.
051: *
052: * @author stack
053: * @version $Revision: 2993 $, $Date: 2005-01-04 02:24:59 +0000 (Tue, 04 Jan 2005) $
054: */
055: public abstract class Credential extends ModuleType {
056:
057: private static final Logger logger = Logger
058: .getLogger(Credential.class.getName());
059:
060: private static final String ATTR_CREDENTIAL_DOMAIN = "credential-domain";
061:
062: /**
063: * Constructor.
064: *
065: * @param name Name of this credential.
066: * @param description Descrtiption of this particular credential.
067: */
068: public Credential(String name, String description) {
069: super (name, description);
070: Type t = addElementToDefinition(new SimpleType(
071: ATTR_CREDENTIAL_DOMAIN,
072: "The root domain this credential goes against:"
073: + " E.g. www.archive.org", ""));
074: t.setOverrideable(false);
075: t.setExpertSetting(true);
076: }
077:
078: /**
079: * @param context Context to use when searching for credential domain.
080: * @return The domain/root URI this credential is to go against.
081: * @throws AttributeNotFoundException If attribute not found.
082: */
083: public String getCredentialDomain(CrawlURI context)
084: throws AttributeNotFoundException {
085: return (String) getAttribute(ATTR_CREDENTIAL_DOMAIN, context);
086: }
087:
088: /**
089: * @param context Context to use when searching for credential domain.
090: * @param domain New domain.
091: * @throws AttributeNotFoundException
092: * @throws InvalidAttributeValueException
093: */
094: public void setCredentialDomain(CrawlerSettings context,
095: String domain) throws InvalidAttributeValueException,
096: AttributeNotFoundException {
097: setAttribute(context, new Attribute(ATTR_CREDENTIAL_DOMAIN,
098: domain));
099: }
100:
101: /**
102: * Attach this credentials avatar to the passed <code>curi</code> .
103: *
104: * Override if credential knows internally what it wants to attach as
105: * payload. Otherwise, if payload is external, use the below
106: * {@link #attach(CrawlURI, String)}.
107: *
108: * @param curi CrawlURI to load with credentials.
109: */
110: public void attach(CrawlURI curi) {
111: attach(curi, null);
112: }
113:
114: /**
115: * Attach this credentials avatar to the passed <code>curi</code> .
116: *
117: * @param curi CrawlURI to load with credentials.
118: * @param payload Payload to carry in avatar. Usually credentials.
119: */
120: public void attach(CrawlURI curi, String payload) {
121: CredentialAvatar ca = null;
122: try {
123: ca = (payload == null) ? new CredentialAvatar(this
124: .getClass(), getKey(curi)) : new CredentialAvatar(
125: this .getClass(), getKey(curi), payload);
126: curi.addCredentialAvatar(ca);
127: } catch (AttributeNotFoundException e) {
128: logger.severe("Failed attach of " + this + " for " + curi);
129: }
130: }
131:
132: /**
133: * Detach this credential from passed curi.
134: *
135: * @param curi
136: * @return True if we detached a Credential reference.
137: */
138: public boolean detach(CrawlURI curi) {
139: boolean result = false;
140: if (!curi.hasCredentialAvatars()) {
141: logger.severe("This curi " + curi
142: + " has no cred when it should");
143: } else {
144: Set avatars = curi.getCredentialAvatars();
145: for (Iterator i = avatars.iterator(); i.hasNext();) {
146: CredentialAvatar ca = (CredentialAvatar) i.next();
147: try {
148: if (ca.match(getClass(), getKey(curi))) {
149: result = curi.removeCredentialAvatar(ca);
150: }
151: } catch (AttributeNotFoundException e) {
152: logger.severe("Failed detach of " + ca + " from "
153: + curi);
154: }
155: }
156: }
157: return result;
158: }
159:
160: /**
161: * Detach all credentials of this type from passed curi.
162: *
163: * @param curi
164: * @return True if we detached references.
165: */
166: public boolean detachAll(CrawlURI curi) {
167: boolean result = false;
168: if (!curi.hasCredentialAvatars()) {
169: logger.severe("This curi " + curi
170: + " has no creds when it should.");
171: } else {
172: Set avatars = curi.getCredentialAvatars();
173: for (Iterator i = avatars.iterator(); i.hasNext();) {
174: CredentialAvatar ca = (CredentialAvatar) i.next();
175: if (ca.match(getClass())) {
176: result = curi.removeCredentialAvatar(ca);
177: }
178: }
179: }
180: return result;
181: }
182:
183: /**
184: * @param curi CrawlURI to look at.
185: * @return True if this credential IS a prerequisite for passed
186: * CrawlURI.
187: */
188: public abstract boolean isPrerequisite(CrawlURI curi);
189:
190: /**
191: * @param curi CrawlURI to look at.
192: * @return True if this credential HAS a prerequisite for passed CrawlURI.
193: */
194: public abstract boolean hasPrerequisite(CrawlURI curi);
195:
196: /**
197: * Return the authentication URI, either absolute or relative, that serves
198: * as prerequisite the passed <code>curi</code>.
199: *
200: * @param curi CrawlURI to look at.
201: * @return Prerequisite URI for the passed curi.
202: */
203: public abstract String getPrerequisite(CrawlURI curi);
204:
205: /**
206: * @param context Context to use when searching for credential domain.
207: * @return Key that is unique to this credential type.
208: * @throws AttributeNotFoundException
209: */
210: public abstract String getKey(CrawlURI context)
211: throws AttributeNotFoundException;
212:
213: /**
214: * @return True if this credential is of the type that needs to be offered
215: * on each visit to the server (e.g. Rfc2617 is such a type).
216: */
217: public abstract boolean isEveryTime();
218:
219: /**
220: * @param curi CrawlURI to as for context.
221: * @param http Instance of httpclient.
222: * @param method Method to populate.
223: * @param payload Avatar payload to use populating the method.
224: * @return True if added a credentials.
225: */
226: public abstract boolean populate(CrawlURI curi, HttpClient http,
227: HttpMethod method, String payload);
228:
229: /**
230: * @param curi CrawlURI to look at.
231: * @return True if this credential is to be posted. Return false if the
232: * credential is to be GET'd or if POST'd or GET'd are not pretinent to this
233: * credential type.
234: */
235: public abstract boolean isPost(CrawlURI curi);
236:
237: /**
238: * Test passed curi matches this credentials rootUri.
239: * @param controller
240: * @param curi CrawlURI to test.
241: * @return True if domain for credential matches that of the passed curi.
242: */
243: public boolean rootUriMatch(CrawlController controller,
244: CrawlURI curi) {
245: String cd = null;
246: try {
247: cd = getCredentialDomain(curi);
248: } catch (AttributeNotFoundException e) {
249: logger.severe("Failed to get credential domain " + curi
250: + ": " + e.getMessage());
251: }
252:
253: // TODO: Account for port. Currently we do not distingush between
254: // http and https; they both get same crawl server instance.
255: String serverName = controller.getServerCache().getServerFor(
256: curi).getName();
257: logger.fine("RootURI: Comparing " + serverName + " " + cd);
258: return cd != null && serverName != null
259: && serverName.equalsIgnoreCase(cd);
260: }
261: }
|