001: /* $Id: ArchiveRecordHeader.java 4547 2006-08-28 23:44:20Z stack-sf $
002: *
003: * Created on August 21st, 2006
004: *
005: * Copyright (C) 2006 Internet Archive.
006: *
007: * This file is part of the Heritrix web crawler (crawler.archive.org).
008: *
009: * Heritrix is free software; you can redistribute it and/or modify
010: * it under the terms of the GNU Lesser Public License as published by
011: * the Free Software Foundation; either version 2.1 of the License, or
012: * any later version.
013: *
014: * Heritrix is distributed in the hope that it will be useful,
015: * but WITHOUT ANY WARRANTY; without even the implied warranty of
016: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
017: * GNU Lesser Public License for more details.
018: *
019: * You should have received a copy of the GNU Lesser Public License
020: * along with Heritrix; if not, write to the Free Software
021: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
022: */
023: package org.archive.io;
024:
025: import java.util.Map;
026: import java.util.Set;
027:
028: /**
029: * Archive Record Header.
030: * @author stack
031: * @version $Date: 2006-08-28 23:44:20 +0000 (Mon, 28 Aug 2006) $ $Version$
032: */
033: public interface ArchiveRecordHeader {
034: /**
035: * Get the time when the record was created.
036: * @return Date in 14 digit time format (UTC).
037: * @see org.archive.util.ArchiveUtils#parse14DigitDate(String)
038: */
039: public abstract String getDate();
040:
041: /**
042: * @return Return length of record.
043: */
044: public abstract long getLength();
045:
046: /**
047: * @return Record subject-url.
048: */
049: public abstract String getUrl();
050:
051: /**
052: * @return Record mimetype.
053: */
054: public abstract String getMimetype();
055:
056: /**
057: * @return Record version.
058: */
059: public abstract String getVersion();
060:
061: /**
062: * @return Offset into Archive file at which this record begins.
063: */
064: public abstract long getOffset();
065:
066: /**
067: * @param key Key to use looking up field value.
068: * @return value for passed key of null if no such entry.
069: */
070: public abstract Object getHeaderValue(final String key);
071:
072: /**
073: * @return Header field name keys.
074: */
075: public abstract Set getHeaderFieldKeys();
076:
077: /**
078: * @return Map of header fields.
079: */
080: public abstract Map getHeaderFields();
081:
082: /**
083: * @return Returns identifier for current Archive file. Be aware this
084: * may not be a file name or file path. It may just be an URL. Depends
085: * on how Archive file was made.
086: */
087: public abstract String getReaderIdentifier();
088:
089: /**
090: * @return Identifier for the record. If ARC, the URL + date. If WARC,
091: * the GUID assigned.
092: */
093: public abstract String getRecordIdentifier();
094:
095: /**
096: * @return Returns digest as String for this record. Only available after
097: * the record has been read in totality.
098: */
099: public abstract String getDigest();
100:
101: /**
102: * Offset at which the content begins.
103: * For ARCs, its used to delimit where http headers end and content begins.
104: * For WARCs, its end of Named Fields before payload starts.
105: */
106: public int getContentBegin();
107:
108: public abstract String toString();
109: }
|