001: /*
002: * (c) Copyright 2001, 2003, 2004, 2005, 2006, 2007, 2008 Hewlett-Packard Development Company, LP
003: * All rights reserved.
004: *
005: * Redistribution and use in source and binary forms, with or without
006: * modification, are permitted provided that the following conditions
007: * are met:
008: * 1. Redistributions of source code must retain the above copyright
009: * notice, this list of conditions and the following disclaimer.
010: * 2. Redistributions in binary form must reproduce the above copyright
011: * notice, this list of conditions and the following disclaimer in the
012: * documentation and/or other materials provided with the distribution.
013: * 3. The name of the author may not be used to endorse or promote products
014: * derived from this software without specific prior written permission.
015:
016: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
017: * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
018: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
019: * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
020: * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
021: * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
022: * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
023: * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
024: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
025: * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
026: *
027: * $Id: NTripleReader.java,v 1.17 2008/01/02 12:05:04 andy_seaborne Exp $
028: */
029:
030: package com.hp.hpl.jena.rdf.model.impl;
031:
032: import org.apache.commons.logging.Log;
033: import org.apache.commons.logging.LogFactory;
034:
035: import com.hp.hpl.jena.graph.GraphEvents;
036: import com.hp.hpl.jena.rdf.model.*;
037: import com.hp.hpl.jena.util.FileUtils;
038: import com.hp.hpl.jena.shared.*;
039:
040: import java.net.URL;
041: import java.io.*;
042: import java.util.*;
043:
044: /** N-Triple Reader
045: *
046: * @author Brian McBride, Jeremy Carroll, Dave Banks
047: * @version Release=$Name: $ Date=$Date: 2008/01/02 12:05:04 $
048: */
049: public class NTripleReader extends Object implements RDFReader {
050: static final Log log = LogFactory.getLog(NTripleReader.class);
051:
052: private Model model = null;
053: private Hashtable anons = new Hashtable();
054:
055: private IStream in = null;
056: private boolean inErr = false;
057: private int errCount = 0;
058: private static final int sbLength = 200;
059:
060: private RDFErrorHandler errorHandler = new RDFDefaultErrorHandler();
061:
062: /**
063: * Already with ": " at end for error messages.
064: */
065: private String base;
066:
067: NTripleReader() {
068: }
069:
070: public void read(Model model, InputStream in, String base) {
071: // N-Triples must be in ASCII, we permit UTF-8.
072: read(model, FileUtils.asUTF8(in), base);
073: }
074:
075: public void read(Model model, Reader reader, String base) {
076:
077: if (!(reader instanceof BufferedReader)) {
078: reader = new BufferedReader(reader);
079: }
080:
081: this .model = model;
082: this .base = base == null ? "" : (base + ": ");
083: in = new IStream(reader);
084: readRDF();
085: if (errCount != 0) {
086: throw new SyntaxError("unknown");
087: }
088: }
089:
090: public void read(Model model, String url) {
091: try {
092: read(model, new InputStreamReader(((new URL(url)))
093: .openStream()), url);
094: } catch (Exception e) {
095: throw new JenaException(e);
096: } finally {
097: if (errCount != 0) {
098: throw new SyntaxError("unknown");
099: }
100: }
101: }
102:
103: public Object setProperty(String propName, Object propValue) {
104: errorHandler.error(new UnknownPropertyException(propName));
105: return null;
106: }
107:
108: public RDFErrorHandler setErrorHandler(RDFErrorHandler errHandler) {
109: RDFErrorHandler old = this .errorHandler;
110: this .errorHandler = errHandler;
111: return old;
112: }
113:
114: protected void readRDF() {
115: try {
116: model.notifyEvent(GraphEvents.startRead);
117: unwrappedReadRDF();
118: } finally {
119: model.notifyEvent(GraphEvents.finishRead);
120: }
121: }
122:
123: protected final void unwrappedReadRDF() {
124: Resource subject;
125: Property predicate = null;
126: RDFNode object;
127:
128: while (!in.eof()) {
129: while (!in.eof()) {
130: inErr = false;
131:
132: skipWhiteSpace();
133: if (in.eof()) {
134: return;
135: }
136:
137: subject = readResource();
138: if (inErr)
139: break;
140:
141: skipWhiteSpace();
142: try {
143: predicate = model.createProperty(readResource()
144: .getURI());
145: } catch (Exception e1) {
146: errorHandler.fatalError(e1);
147: }
148: if (inErr)
149: break;
150:
151: skipWhiteSpace();
152: object = readNode();
153: if (inErr)
154: break;
155:
156: skipWhiteSpace();
157: if (badEOF())
158: break;
159:
160: if (!expect("."))
161: break;
162:
163: try {
164: model.add(subject, predicate, object);
165: } catch (Exception e2) {
166: errorHandler.fatalError(e2);
167: }
168: }
169: if (inErr) {
170: errCount++;
171: while (!in.eof() && in.readChar() != '\n') {
172: }
173: }
174: }
175: }
176:
177: public Resource readResource() {
178: char inChar = in.readChar();
179: if (badEOF())
180: return null;
181:
182: if (inChar == '_') { // anon resource
183: if (!expect(":"))
184: return null;
185: String name = readName();
186: if (name == null) {
187: syntaxError("expected bNode label");
188: return null;
189: }
190: return lookupResource(name);
191: } else if (inChar == '<') { // uri
192: String uri = readURI();
193: if (uri == null) {
194: inErr = true;
195: return null;
196: }
197: inChar = in.readChar();
198: if (inChar != '>') {
199: syntaxError("expected '>'");
200: return null;
201: }
202: return model.createResource(uri);
203: } else {
204: syntaxError("unexpected input");
205: return null;
206: }
207: }
208:
209: public RDFNode readNode() {
210: skipWhiteSpace();
211: switch (in.nextChar()) {
212: case '"':
213: return readLiteral(false);
214: case 'x':
215: return readLiteral(true);
216: case '<':
217: case '_':
218: return readResource();
219: default:
220: syntaxError("unexpected input");
221: return null;
222: }
223: }
224:
225: protected Literal readLiteral(boolean wellFormed) {
226:
227: StringBuffer lit = new StringBuffer(sbLength);
228:
229: if (wellFormed) {
230: deprecated("Use ^^rdf:XMLLiteral not xml\"literals\", .");
231:
232: if (!expect("xml"))
233: return null;
234: }
235:
236: if (!expect("\""))
237: return null;
238:
239: while (true) {
240: char inChar = in.readChar();
241: if (badEOF())
242: return null;
243: if (inChar == '\\') {
244: char c = in.readChar();
245: if (in.eof()) {
246: inErr = true;
247: return null;
248: }
249: if (c == 'n') {
250: inChar = '\n';
251: } else if (c == 'r') {
252: inChar = '\r';
253: } else if (c == 't') {
254: inChar = '\t';
255: } else if (c == '\\' || c == '"') {
256: inChar = c;
257: } else if (c == 'u') {
258: inChar = readUnicode4Escape();
259: if (inErr)
260: return null;
261: } else {
262: syntaxError("illegal escape sequence '" + c + "'");
263: return null;
264: }
265: } else if (inChar == '"') {
266: String lang;
267: if ('@' == in.nextChar()) {
268: expect("@");
269: lang = readLang();
270: } else if ('-' == in.nextChar()) {
271: expect("-");
272: deprecated("Language tags should be introduced with @ not -.");
273: lang = readLang();
274: } else {
275: lang = "";
276: }
277: if (wellFormed) {
278: return model.createLiteral(lit.toString(),
279: // "",
280: wellFormed);
281: } else if ('^' == in.nextChar()) {
282: String datatypeURI = null;
283: if (!expect("^^<")) {
284: syntaxError("ill-formed datatype");
285: return null;
286: }
287: datatypeURI = readURI();
288: if (datatypeURI == null || !expect(">"))
289: return null;
290: if (lang.length() > 0)
291: deprecated("Language tags are not permitted on typed literals.");
292:
293: return model.createTypedLiteral(lit.toString(),
294: datatypeURI);
295: } else {
296: return model.createLiteral(lit.toString(), lang);
297: }
298: }
299: lit = lit.append(inChar);
300: }
301: }
302:
303: private char readUnicode4Escape() {
304: char buf[] = new char[] { in.readChar(), in.readChar(),
305: in.readChar(), in.readChar() };
306: if (badEOF()) {
307: return 0;
308: }
309: try {
310: return (char) Integer.parseInt(new String(buf), 16);
311: } catch (NumberFormatException e) {
312: syntaxError("bad unicode escape sequence");
313: return 0;
314: }
315: }
316:
317: private void deprecated(String s) {
318: errorHandler.warning(new SyntaxError(syntaxErrorMessage(
319: "Deprecation warning", s, in.getLinepos(), in
320: .getCharpos())));
321: }
322:
323: private void syntaxError(String s) {
324: errorHandler.error(new SyntaxError(syntaxErrorMessage(
325: "Syntax error", s, in.getLinepos(), in.getCharpos())));
326: inErr = true;
327: }
328:
329: private String readLang() {
330: StringBuffer lang = new StringBuffer(15);
331:
332: while (true) {
333: char inChar = in.nextChar();
334: if (Character.isWhitespace(inChar) || inChar == '.'
335: || inChar == '^')
336: return lang.toString();
337: lang = lang.append(in.readChar());
338: }
339: }
340:
341: private boolean badEOF() {
342: if (in.eof()) {
343: syntaxError("premature end of file");
344: }
345: return inErr;
346: }
347:
348: protected String readURI() {
349: StringBuffer uri = new StringBuffer(sbLength);
350:
351: while (in.nextChar() != '>') {
352: char inChar = in.readChar();
353:
354: if (inChar == '\\') {
355: expect("u");
356: inChar = readUnicode4Escape();
357: }
358: if (badEOF()) {
359: return null;
360: }
361: uri = uri.append(inChar);
362: }
363: return uri.toString();
364: }
365:
366: protected String readName() {
367: StringBuffer name = new StringBuffer(sbLength);
368:
369: while (!Character.isWhitespace(in.nextChar())) {
370: name = name.append(in.readChar());
371: if (badEOF())
372: return null;
373: }
374: return name.toString();
375: }
376:
377: private boolean expect(String str) {
378: for (int i = 0; i < str.length(); i++) {
379: char want = str.charAt(i);
380:
381: if (badEOF())
382: return false;
383:
384: char inChar = in.readChar();
385:
386: if (inChar != want) {
387: //System.err.println("N-triple reader error");
388: syntaxError("expected \"" + str + "\"");
389: return false;
390: }
391: }
392: return true;
393: }
394:
395: protected void skipWhiteSpace() {
396: while (Character.isWhitespace(in.nextChar())
397: || in.nextChar() == '#') {
398: char inChar = in.readChar();
399: if (in.eof()) {
400: return;
401: }
402: if (inChar == '#') {
403: while (inChar != '\n') {
404: inChar = in.readChar();
405: if (in.eof()) {
406: return;
407: }
408: }
409: }
410: }
411: }
412:
413: protected Resource lookupResource(String name) {
414: Resource r;
415: r = (Resource) anons.get(name);
416: if (r == null) {
417: r = model.createResource();
418: anons.put(name, r);
419: }
420: return r;
421: }
422:
423: protected String syntaxErrorMessage(String sort, String msg,
424: int linepos, int charpos) {
425: return base + sort + " at line " + linepos + " position "
426: + charpos + ": " + msg;
427: }
428:
429: }
430:
431: class IStream {
432:
433: // simple input stream handler
434:
435: Reader in;
436: char[] this Char = new char[1];
437: boolean eof;
438: int charpos = 1;
439: int linepos = 1;
440:
441: protected IStream(Reader in) {
442: try {
443: this .in = in;
444: eof = (in.read(this Char, 0, 1) == -1);
445: } catch (IOException e) {
446: throw new JenaException(e);
447: }
448: }
449:
450: protected char readChar() {
451: try {
452: if (eof)
453: return '\000';
454: char rv = this Char[0];
455: eof = (in.read(this Char, 0, 1) == -1);
456: if (rv == '\n') {
457: linepos++;
458: charpos = 0;
459: } else {
460: charpos++;
461: }
462: return rv;
463: } catch (java.io.IOException e) {
464: throw new JenaException(e);
465: }
466: }
467:
468: protected char nextChar() {
469: return eof ? '\000' : this Char[0];
470: }
471:
472: protected boolean eof() {
473: return eof;
474: }
475:
476: protected int getLinepos() {
477: return linepos;
478: }
479:
480: protected int getCharpos() {
481: return charpos;
482: }
483:
484: }
|