| org.archive.crawler.datamodel.CoreAttributeConstants
All known Subclasses: org.archive.crawler.extractor.ExtractorCSS, org.archive.crawler.fetcher.FetchHTTP, org.archive.crawler.frontier.WorkQueueFrontier, org.archive.crawler.framework.WriterPoolProcessor, org.archive.crawler.extractor.ExtractorJS, org.archive.crawler.admin.SeedRecord, org.archive.crawler.io.RuntimeErrorFormatter, org.archive.crawler.extractor.ExtractorImpliedURI, org.archive.crawler.datamodel.CandidateURI, org.archive.crawler.processor.recrawl.FetchHistoryProcessor, org.archive.crawler.extractor.ExtractorHTMLTest, org.archive.crawler.util.CrawledBytesHistotable, org.archive.crawler.extractor.ExtractorHTTP, org.archive.crawler.extractor.ExtractorPDF, org.archive.crawler.extractor.JerichoExtractorHTML, org.archive.crawler.writer.ARCWriterProcessor, org.archive.crawler.postprocessor.CrawlStateUpdater, org.archive.crawler.io.LocalErrorFormatter, org.archive.crawler.frontier.AbstractFrontier, org.archive.crawler.extractor.ExtractorDOC, org.archive.crawler.deciderules.recrawl.IdenticalDigestDecideRule, org.archive.crawler.extractor.ExtractorHTML, org.archive.crawler.writer.ExperimentalV10WARCWriterProcessor, org.archive.crawler.extractor.JerichoExtractorHTMLTest, org.archive.crawler.fetcher.FetchDNS, org.archive.crawler.prefetch.PreconditionEnforcer, org.archive.crawler.extractor.ExtractorURI, org.archive.crawler.extractor.ExtractorSWF, org.archive.crawler.frontier.AdaptiveRevisitFrontier, org.archive.crawler.writer.MirrorWriterProcessor, org.archive.crawler.writer.ExperimentalWARCWriterProcessor, org.archive.crawler.fetcher.FetchFTP, org.archive.crawler.extractor.ExtractorXML, org.archive.crawler.io.UriProcessingFormatter, org.archive.crawler.io.UriErrorFormatter, org.archive.crawler.writer.Kw3WriterProcessor, org.archive.crawler.extractor.ExtractorUniversal, org.archive.crawler.framework.ToeThread, org.archive.crawler.deciderules.NotExceedsDocumentLengthTresholdDecideRule, org.archive.crawler.extractor.TrapSuppressExtractor,
CoreAttributeConstants | public interface CoreAttributeConstants (Code) | | CrawlURI attribute keys used by the core crawler
classes.
author: gojomo |
A_ANNOTATIONS | public static String A_ANNOTATIONS(Code) | | shorthand string tokens indicating notable occurences,
separated by commas
|
A_CONTENT_DIGEST | final public static String A_CONTENT_DIGEST(Code) | | content digest
|
A_CONTENT_TYPE | public static String A_CONTENT_TYPE(Code) | | Extracted MIME type of fetched content; should be
set immediately by fetching module if possible
(rather than waiting for a later analyzer)
|
A_CREDENTIAL_AVATARS_KEY | final public static String A_CREDENTIAL_AVATARS_KEY(Code) | | Key to get credential avatars from A_LIST.
|
A_DELAY_FACTOR | public static String A_DELAY_FACTOR(Code) | | Multiplier of last fetch duration to wait before
fetching another item of the same class (eg host)
|
A_DISTANCE_FROM_SEED | public static String A_DISTANCE_FROM_SEED(Code) | | |
A_DNS_SERVER_IP_LABEL | public static String A_DNS_SERVER_IP_LABEL(Code) | | |
A_ETAG_HEADER | final public static String A_ETAG_HEADER(Code) | | header name (and AList key) for ETag
|
A_FETCH_BEGAN_TIME | public static String A_FETCH_BEGAN_TIME(Code) | | |
A_FETCH_COMPLETED_TIME | public static String A_FETCH_COMPLETED_TIME(Code) | | |
A_FETCH_HISTORY | final public static String A_FETCH_HISTORY(Code) | | fetch history array
|
A_FORCE_RETIRE | final public static String A_FORCE_RETIRE(Code) | | flag indicating the containing queue should be retired
|
A_HERITABLE_KEYS | final public static String A_HERITABLE_KEYS(Code) | | Key to (optional) attribute specifying a list of keys that
are passed to CandidateURIs that 'descend' (are discovered
via) this URI.
|
A_HTTP_PROXY_HOST | final public static String A_HTTP_PROXY_HOST(Code) | | local override of proxy host
|
A_HTTP_PROXY_PORT | final public static String A_HTTP_PROXY_PORT(Code) | | local override of proxy port
|
A_HTTP_TRANSACTION | public static String A_HTTP_TRANSACTION(Code) | | |
A_LAST_MODIFIED_HEADER | final public static String A_LAST_MODIFIED_HEADER(Code) | | header name (and AList key) for last-modified timestamp
|
A_LOCALIZED_ERRORS | public static String A_LOCALIZED_ERRORS(Code) | | |
A_MINIMUM_DELAY | public static String A_MINIMUM_DELAY(Code) | | Minimum delay before fetching another item of th
same class (eg host). Even if lastFetchTime*delayFactor
is less than this, this period will be waited.
|
A_MIRROR_PATH | public static String A_MIRROR_PATH(Code) | | Define for org.archive.crawler.writer.MirrorWriterProcessor.
|
A_PREREQUISITE_URI | public static String A_PREREQUISITE_URI(Code) | | |
A_REFERENCE_LENGTH | final public static String A_REFERENCE_LENGTH(Code) | | reference length (content length or virtual length
|
A_RRECORD_SET_LABEL | public static String A_RRECORD_SET_LABEL(Code) | | |
A_RUNTIME_EXCEPTION | public static String A_RUNTIME_EXCEPTION(Code) | | |
A_SOURCE_TAG | public static String A_SOURCE_TAG(Code) | | a 'source' (usu. URI) that's inherited by discovered URIs
|
A_STATUS | final public static String A_STATUS(Code) | | key for status (when in history)
|
TRUNC_SUFFIX | final public static String TRUNC_SUFFIX(Code) | | Fetch truncation codes present in
CrawlURI annotations.
All truncation annotations have a TRUNC_SUFFIX suffix (TODO:
Make for-sure unique or redo truncation so definitive flag marked
against
CrawlURI ).
|
|
|