001: /*
002: * Copyright Aduna (http://www.aduna-software.com/) (c) 1997-2006.
003: *
004: * Licensed under the Aduna BSD-style license.
005: */
006: package org.openrdf.query.resultio.binary;
007:
008: import static org.openrdf.query.resultio.binary.BinaryQueryResultConstants.BNODE_RECORD_MARKER;
009: import static org.openrdf.query.resultio.binary.BinaryQueryResultConstants.DATATYPE_LITERAL_RECORD_MARKER;
010: import static org.openrdf.query.resultio.binary.BinaryQueryResultConstants.ERROR_RECORD_MARKER;
011: import static org.openrdf.query.resultio.binary.BinaryQueryResultConstants.FORMAT_VERSION;
012: import static org.openrdf.query.resultio.binary.BinaryQueryResultConstants.LANG_LITERAL_RECORD_MARKER;
013: import static org.openrdf.query.resultio.binary.BinaryQueryResultConstants.MAGIC_NUMBER;
014: import static org.openrdf.query.resultio.binary.BinaryQueryResultConstants.MALFORMED_QUERY_ERROR;
015: import static org.openrdf.query.resultio.binary.BinaryQueryResultConstants.NAMESPACE_RECORD_MARKER;
016: import static org.openrdf.query.resultio.binary.BinaryQueryResultConstants.NULL_RECORD_MARKER;
017: import static org.openrdf.query.resultio.binary.BinaryQueryResultConstants.PLAIN_LITERAL_RECORD_MARKER;
018: import static org.openrdf.query.resultio.binary.BinaryQueryResultConstants.QNAME_RECORD_MARKER;
019: import static org.openrdf.query.resultio.binary.BinaryQueryResultConstants.QUERY_EVALUATION_ERROR;
020: import static org.openrdf.query.resultio.binary.BinaryQueryResultConstants.REPEAT_RECORD_MARKER;
021: import static org.openrdf.query.resultio.binary.BinaryQueryResultConstants.TABLE_END_RECORD_MARKER;
022: import static org.openrdf.query.resultio.binary.BinaryQueryResultConstants.URI_RECORD_MARKER;
023:
024: import java.io.DataInputStream;
025: import java.io.EOFException;
026: import java.io.IOException;
027: import java.io.InputStream;
028: import java.nio.ByteBuffer;
029: import java.nio.CharBuffer;
030: import java.nio.charset.Charset;
031: import java.nio.charset.CharsetDecoder;
032: import java.util.ArrayList;
033: import java.util.Arrays;
034: import java.util.Collections;
035: import java.util.List;
036:
037: import info.aduna.io.IOUtil;
038:
039: import org.openrdf.model.BNode;
040: import org.openrdf.model.Literal;
041: import org.openrdf.model.URI;
042: import org.openrdf.model.Value;
043: import org.openrdf.model.ValueFactory;
044: import org.openrdf.model.impl.ValueFactoryImpl;
045: import org.openrdf.query.TupleQueryResultHandlerException;
046: import org.openrdf.query.impl.ListBindingSet;
047: import org.openrdf.query.resultio.QueryResultParseException;
048: import org.openrdf.query.resultio.TupleQueryResultFormat;
049: import org.openrdf.query.resultio.TupleQueryResultParserBase;
050:
051: /**
052: * Reader for the binary tuple result format. The format is explained in
053: * {@link BinaryQueryResultConstants}.
054: */
055: public class BinaryQueryResultParser extends TupleQueryResultParserBase {
056:
057: /*-----------*
058: * Variables *
059: *-----------*/
060:
061: private DataInputStream in;
062:
063: private int formatVersion;
064:
065: private CharsetDecoder charsetDecoder = Charset.forName("UTF-8")
066: .newDecoder();
067:
068: private String[] namespaceArray = new String[32];
069:
070: /*--------------*
071: * Constructors *
072: *--------------*/
073:
074: /**
075: * Creates a new parser for the binary query result format that will use an
076: * instance of {@link ValueFactoryImpl} to create Value objects.
077: */
078: public BinaryQueryResultParser() {
079: super ();
080: }
081:
082: /**
083: * Creates a new parser for the binary query result format that will use the
084: * supplied ValueFactory to create Value objects.
085: */
086: public BinaryQueryResultParser(ValueFactory valueFactory) {
087: super (valueFactory);
088: }
089:
090: /*---------*
091: * Methods *
092: *---------*/
093:
094: public final TupleQueryResultFormat getTupleQueryResultFormat() {
095: return TupleQueryResultFormat.BINARY;
096: }
097:
098: public synchronized void parse(InputStream in) throws IOException,
099: QueryResultParseException, TupleQueryResultHandlerException {
100: if (in == null) {
101: throw new IllegalArgumentException(
102: "Input stream can not be 'null'");
103: }
104: if (handler == null) {
105: throw new IllegalArgumentException(
106: "listener can not be 'null'");
107: }
108:
109: this .in = new DataInputStream(in);
110:
111: // Check magic number
112: byte[] magicNumber = IOUtil.readBytes(in, MAGIC_NUMBER.length);
113: if (!Arrays.equals(magicNumber, MAGIC_NUMBER)) {
114: throw new QueryResultParseException(
115: "File does not contain a binary RDF table result");
116: }
117:
118: // Check format version (parser is backward-compatible with version 1 and version 2)
119: formatVersion = this .in.readInt();
120: if (formatVersion != FORMAT_VERSION && formatVersion != 1
121: && formatVersion != 2) {
122: throw new QueryResultParseException(
123: "Incompatible format version: " + formatVersion);
124: }
125:
126: if (formatVersion == 2) {
127: // read format version 2 FLAG byte (ordered and distinct flags) and ignore them
128: this .in.readByte();
129: }
130:
131: // Read column headers
132: int columnCount = this .in.readInt();
133: if (columnCount < 1) {
134: throw new QueryResultParseException(
135: "Illegal column count specified: " + columnCount);
136: }
137:
138: List<String> columnHeaders = new ArrayList<String>(columnCount);
139: for (int i = 0; i < columnCount; i++) {
140: columnHeaders.add(readString());
141: }
142: columnHeaders = Collections.unmodifiableList(columnHeaders);
143:
144: handler.startQueryResult(columnHeaders);
145:
146: // Read value tuples
147: List<Value> currentTuple = new ArrayList<Value>(columnCount);
148: List<Value> previousTuple = Collections.nCopies(columnCount,
149: (Value) null);
150:
151: int recordTypeMarker = this .in.readByte();
152:
153: while (recordTypeMarker != TABLE_END_RECORD_MARKER) {
154: if (recordTypeMarker == ERROR_RECORD_MARKER) {
155: processError();
156: } else if (recordTypeMarker == NAMESPACE_RECORD_MARKER) {
157: processNamespace();
158: } else {
159: Value value = null;
160: switch (recordTypeMarker) {
161: case NULL_RECORD_MARKER:
162: break; // do nothing
163: case REPEAT_RECORD_MARKER:
164: value = previousTuple.get(currentTuple.size());
165: break;
166: case QNAME_RECORD_MARKER:
167: value = readQName();
168: break;
169: case URI_RECORD_MARKER:
170: value = readURI();
171: break;
172: case BNODE_RECORD_MARKER:
173: value = readBnode();
174: break;
175: case PLAIN_LITERAL_RECORD_MARKER:
176: case LANG_LITERAL_RECORD_MARKER:
177: case DATATYPE_LITERAL_RECORD_MARKER:
178: value = readLiteral(recordTypeMarker);
179: break;
180: default:
181: throw new IOException("Unkown record type: "
182: + recordTypeMarker);
183: }
184:
185: currentTuple.add(value);
186:
187: if (currentTuple.size() == columnCount) {
188: previousTuple = Collections
189: .unmodifiableList(currentTuple);
190: currentTuple = new ArrayList<Value>(columnCount);
191:
192: handler.handleSolution(new ListBindingSet(
193: columnHeaders, previousTuple));
194: }
195: }
196:
197: recordTypeMarker = this .in.readByte();
198: }
199:
200: handler.endQueryResult();
201: }
202:
203: private void processError() throws IOException,
204: QueryResultParseException {
205: byte errTypeFlag = in.readByte();
206:
207: QueryErrorType errType = null;
208: if (errTypeFlag == MALFORMED_QUERY_ERROR) {
209: errType = QueryErrorType.MALFORMED_QUERY_ERROR;
210: } else if (errTypeFlag == QUERY_EVALUATION_ERROR) {
211: errType = QueryErrorType.QUERY_EVALUATION_ERROR;
212: } else {
213: throw new QueryResultParseException("Unkown error type: "
214: + errTypeFlag);
215: }
216:
217: String msg = readString();
218:
219: // FIXME: is this the right thing to do upon encountering an error?
220: throw new QueryResultParseException(errType + ": " + msg);
221: }
222:
223: private void processNamespace() throws IOException {
224: int namespaceID = in.readInt();
225: String namespace = readString();
226:
227: if (namespaceID >= namespaceArray.length) {
228: int newSize = Math.max(namespaceID,
229: namespaceArray.length * 2);
230: String[] newArray = new String[newSize];
231: System.arraycopy(namespaceArray, 0, newArray, 0,
232: namespaceArray.length);
233: namespaceArray = newArray;
234: }
235:
236: namespaceArray[namespaceID] = namespace;
237: }
238:
239: private URI readQName() throws IOException {
240: int nsID = in.readInt();
241: String localName = readString();
242:
243: return valueFactory.createURI(namespaceArray[nsID], localName);
244: }
245:
246: private URI readURI() throws IOException {
247: String uri = readString();
248:
249: return valueFactory.createURI(uri);
250: }
251:
252: private BNode readBnode() throws IOException {
253: String bnodeID = readString();
254: return valueFactory.createBNode(bnodeID);
255: }
256:
257: private Literal readLiteral(int recordTypeMarker)
258: throws IOException, QueryResultParseException {
259: String label = readString();
260:
261: if (recordTypeMarker == DATATYPE_LITERAL_RECORD_MARKER) {
262: URI datatype = null;
263:
264: int dtTypeMarker = in.readByte();
265: switch (dtTypeMarker) {
266: case QNAME_RECORD_MARKER:
267: datatype = readQName();
268: break;
269: case URI_RECORD_MARKER:
270: datatype = readURI();
271: break;
272: default:
273: throw new QueryResultParseException(
274: "Illegal record type marker for literal's datatype");
275: }
276:
277: return valueFactory.createLiteral(label, datatype);
278: } else if (recordTypeMarker == LANG_LITERAL_RECORD_MARKER) {
279: String language = readString();
280: return valueFactory.createLiteral(label, language);
281: } else {
282: return valueFactory.createLiteral(label);
283: }
284: }
285:
286: private String readString() throws IOException {
287: if (formatVersion == 1) {
288: return readStringV1();
289: } else {
290: return readStringV2();
291: }
292: }
293:
294: /**
295: * Reads a string from the version 1 format, i.e. in Java's
296: * {@link DataInput#modified-utf-8 Modified UTF-8}.
297: */
298: private String readStringV1() throws IOException {
299: return in.readUTF();
300: }
301:
302: /**
303: * Reads a string from the version 2 format. Strings are encoded as UTF-8 and
304: * are preceeded by a 32-bit integer (high byte first) specifying the length
305: * of the encoded string.
306: */
307: private String readStringV2() throws IOException {
308: int stringLength = in.readInt();
309: byte[] encodedString = IOUtil.readBytes(in, stringLength);
310:
311: if (encodedString.length != stringLength) {
312: throw new EOFException("Attempted to read " + stringLength
313: + " bytes but no more than " + encodedString.length
314: + " were available");
315: }
316:
317: ByteBuffer byteBuf = ByteBuffer.wrap(encodedString);
318: CharBuffer charBuf = charsetDecoder.decode(byteBuf);
319:
320: return charBuf.toString();
321: }
322: }
|