001: /* $Id: ArchiveFileConstants.java 4620 2006-09-12 19:41:23Z stack-sf $
002: *
003: * Created on August 16th, 2006.
004: *
005: * Copyright (C) 2006 Internet Archive.
006: *
007: * This file is part of the Heritrix web crawler (crawler.archive.org).
008: *
009: * Heritrix is free software; you can redistribute it and/or modify
010: * it under the terms of the GNU Lesser Public License as published by
011: * the Free Software Foundation; either version 2.1 of the License, or
012: * any later version.
013: *
014: * Heritrix is distributed in the hope that it will be useful,
015: * but WITHOUT ANY WARRANTY; without even the implied warranty of
016: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
017: * GNU Lesser Public License for more details.
018: *
019: * You should have received a copy of the GNU Lesser Public License
020: * along with Heritrix; if not, write to the Free Software
021: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
022: */
023: package org.archive.io;
024:
025: /**
026: * Constants used by Archive files and in Archive file processing.
027: * @author stack
028: * @version $Date: 2006-09-12 19:41:23 +0000 (Tue, 12 Sep 2006) $ $Revision: 4620 $
029: */
030: public interface ArchiveFileConstants {
031: /**
032: * Suffix given to files currently in use.
033: */
034: public static final String OCCUPIED_SUFFIX = ".open";
035:
036: /**
037: * Suffix appended to 'broken' files.
038: */
039: public static final String INVALID_SUFFIX = ".invalid";
040:
041: /**
042: * Compressed file extention.
043: */
044: public static final String COMPRESSED_FILE_EXTENSION = "gz";
045:
046: /**
047: * Dot plus compressed file extention.
048: */
049: public static final String DOT_COMPRESSED_FILE_EXTENSION = "."
050: + COMPRESSED_FILE_EXTENSION;
051:
052: /**
053: * Key for the Archive File version field.
054: */
055: public static final String VERSION_FIELD_KEY = "version";
056:
057: /**
058: * Key for the Archive File length field.
059: */
060: public static final String LENGTH_FIELD_KEY = "length";
061:
062: /**
063: * Key for the Archive File type field.
064: */
065: public static final String TYPE_FIELD_KEY = "type";
066:
067: /**
068: * Key for the Archive File URL field.
069: */
070: public static final String URL_FIELD_KEY = "subject-uri";
071:
072: /**
073: * Key for the Archive File Creation Date field.
074: */
075: public static final String DATE_FIELD_KEY = "creation-date";
076:
077: /**
078: * Key for the Archive File mimetype field.
079: */
080: public static final String MIMETYPE_FIELD_KEY = "content-type";
081:
082: /**
083: * Key for the Archive File record field.
084: */
085: public static final String RECORD_IDENTIFIER_FIELD_KEY = "record-identifier";
086:
087: /**
088: * Key for the Archive Record absolute offset into Archive file.
089: */
090: public static final String ABSOLUTE_OFFSET_KEY = "absolute-offset";
091:
092: public static final String READER_IDENTIFIER_FIELD_KEY = "reader-identifier";
093:
094: /**
095: * Size used to preallocate stringbuffer used outputting a cdx line.
096: * The numbers below are guesses at sizes of each of the cdx fields.
097: * The ones in the below are spaces. Here is the legend used outputting
098: * the cdx line: CDX b e a m s c V n g. Consult cdx documentation on
099: * meaning of each of these fields.
100: */
101: public static final int CDX_LINE_BUFFER_SIZE = 14 + 1 + 15 + 1
102: + 1024 + 1 + 24 + 1 + +3 + 1 + 32 + 1 + 20 + 1 + 20 + 1
103: + 64;
104:
105: public static final String DEFAULT_DIGEST_METHOD = "SHA-1";
106:
107: public static final char SINGLE_SPACE = ' ';
108:
109: public static final String CRLF = "\r\n";
110:
111: public static final String CDX = "cdx";
112: public static final String DUMP = "dump";
113: public static final String GZIP_DUMP = "gzipdump";
114: public static final String HEADER = "header";
115: public static final String NOHEAD = "nohead";
116: public static final String CDX_FILE = "cdxfile";
117: }
|