001: /*
002: * Copyright Aduna (http://www.aduna-software.com/) (c) 1997-2007.
003: *
004: * Licensed under the Aduna BSD-style license.
005: */
006: package org.openrdf.query.resultio.binary;
007:
008: import java.io.DataInput;
009: import java.io.DataOutput;
010:
011: /**
012: * Interface defining constants for the binary table result format. Files in
013: * this format consist of a header followed by zero or more records. Data fields
014: * are encoded as specified in the interfaces {@link DataInput} and
015: * {@link DataOutput}, except for the encoding of string values. String values
016: * are encoded in UTF-8 and are preceeded by a 32-bit integer specifying the
017: * length in bytes of this UTF-8 encoded string.
018: * <p>
019: * The file header is 13 bytes long:
020: * <ul>
021: * <li>Bytes 1-4 contain the ASCII codes for the string "BRTR", which stands
022: * for Binary RDF Table Result.
023: * <li>Bytes 5-8 specify the format version (an integer).
024: * <li>Byte 9 specifies some flags, specifically 'distinct' and 'ordered'.
025: * <li>Bytes 10-13 specify the number of columns of the query result that will
026: * follow (an integer).
027: * </ul>
028: * Following this are the column headers, which are encoded as UTF-8 strings.
029: * There are as many column headers as the number of columns that has been
030: * specified in the header.
031: * <p>
032: * Zero or more records follow after the column headers. This can be a mixture
033: * of records describing a result and supporting records. The results table is
034: * described by the result records which are written from left to right, from
035: * top to bottom. Each record starts with a record type marker (a single byte).
036: * The following records are defined in the current format:
037: * <ul>
038: * <li><tt>NULL</tt> (byte value: 0):<br>
039: * This indicates a NULL value in the table and consists of nothing more than
040: * the record type marker.
041: * <li><tt>REPEAT</tt> (byte value: 1):<br>
042: * This indicates that the next value is identical to the value in the same
043: * column in the previous row. The REPEAT record consists of nothing more than
044: * the record type marker.
045: * <li><tt>NAMESPACE</tt> (byte value: 2):<br>
046: * This is a supporting record that assigns an ID (non-negative integer) to a
047: * namespace. This ID can later be used in in a QNAME record to combine it with
048: * a local name to form a full URI. The record type marker is followed by a
049: * non-negative integer for the ID and an UTF-8 encoded string for the
050: * namespace.
051: * <li><tt>QNAME </tt>(byte value: 3):<br>
052: * This indicates a URI value, the value of which is encoded as a namespace ID
053: * and a local name. The namespace ID is required to be mapped to a namespace in
054: * a previous NAMESPACE record. The record type marker is followed by a
055: * non-negative integer (the namespace ID) and an UTF-8 encoded string for the
056: * local name.
057: * <li><tt>URI</tt> (byte value: 4):<br>
058: * This also indicates a URI value, but one that does not use a namespace ID.
059: * This record type marker is simply followed by an UTF-8 encoded string for the
060: * full URI.
061: * <li><tt>BNODE</tt> (byte value: 5):<br>
062: * This indicates a blank node. The record type marker is followed by an UTF-8
063: * encoded string for the bnode ID.
064: * <li><tt>PLAIN_LITERAL</tt> (byte value: 6):<br>
065: * This indicates a plain literal value. The record type marker is followed by
066: * an UTF-8 encoded string for the literal's label.
067: * <li><tt>LANG_LITERAL</tt> (byte value: 7):<br>
068: * This indicates a literal value with a language attribute. The record type
069: * marker is followed by an UTF-8 encoded string for the literal's label,
070: * followed by an UTF-8 encoded string for the language attribute.
071: * <li><tt>DATATYPE_LITERAL</tt> (byte value: 8):<br>
072: * This indicates a datatyped literal. The record type marker is followed by an
073: * UTF-8 encoded string for the literal's label. Following this label is either
074: * a QNAME or URI record for the literal's datatype.
075: * <li><tt>ERROR</tt> (byte value: 126):<br>
076: * This record indicates a error. The type of error is indicates by the byte
077: * directly following the record type marker: <tt>1</tt> for a malformed query
078: * error, <tt>2</tt> for a query evaluation error. The error type byte is
079: * followed by an UTF-8 string for the error message.
080: * <li><tt>TABLE_END</tt> (byte value: 127):<br>
081: * This is a special record that indicates the end of the results table and
082: * consists of nothing more than the record type marker. Any data following this
083: * record should be ignored.
084: * </ul>
085: *
086: * @author Arjohn Kampman
087: */
088: public interface BinaryQueryResultConstants {
089:
090: /**
091: * Magic number for Binary RDF Table Result files.
092: */
093: public static final byte[] MAGIC_NUMBER = new byte[] { 'B', 'R',
094: 'T', 'R' };
095:
096: /**
097: * The version number of the current format.
098: */
099: public static final int FORMAT_VERSION = 3;
100:
101: /* RECORD TYPES */
102: public static final int NULL_RECORD_MARKER = 0;
103:
104: public static final int REPEAT_RECORD_MARKER = 1;
105:
106: public static final int NAMESPACE_RECORD_MARKER = 2;
107:
108: public static final int QNAME_RECORD_MARKER = 3;
109:
110: public static final int URI_RECORD_MARKER = 4;
111:
112: public static final int BNODE_RECORD_MARKER = 5;
113:
114: public static final int PLAIN_LITERAL_RECORD_MARKER = 6;
115:
116: public static final int LANG_LITERAL_RECORD_MARKER = 7;
117:
118: public static final int DATATYPE_LITERAL_RECORD_MARKER = 8;
119:
120: public static final int ERROR_RECORD_MARKER = 126;
121:
122: public static final int TABLE_END_RECORD_MARKER = 127;
123:
124: /* ERROR TYPES */
125: public static final int MALFORMED_QUERY_ERROR = 1;
126:
127: public static final int QUERY_EVALUATION_ERROR = 2;
128: }
|