001: /*
002: * Copyright Aduna (http://www.aduna-software.com/) (c) 1997-2007.
003: *
004: * Licensed under the Aduna BSD-style license.
005: */
006: package org.openrdf.rio.turtle;
007:
008: import java.io.IOException;
009: import java.io.OutputStream;
010: import java.io.OutputStreamWriter;
011: import java.io.Writer;
012: import java.nio.charset.Charset;
013: import java.util.LinkedHashMap;
014: import java.util.Map;
015: import java.util.StringTokenizer;
016:
017: import info.aduna.io.IndentingWriter;
018:
019: import org.openrdf.model.BNode;
020: import org.openrdf.model.Literal;
021: import org.openrdf.model.Resource;
022: import org.openrdf.model.Statement;
023: import org.openrdf.model.URI;
024: import org.openrdf.model.Value;
025: import org.openrdf.model.vocabulary.RDF;
026: import org.openrdf.rio.RDFFormat;
027: import org.openrdf.rio.RDFHandlerException;
028: import org.openrdf.rio.RDFWriter;
029:
030: /**
031: * An implementation of the RDFWriter interface that writes RDF documents in
032: * Turtle format. The Turtle format is defined in <a
033: * href="http://www.dajobe.org/2004/01/turtle/">in this document</a>.
034: */
035: public class TurtleWriter implements RDFWriter {
036:
037: /*-----------*
038: * Variables *
039: *-----------*/
040:
041: protected IndentingWriter writer;
042:
043: /**
044: * Table mapping namespace names (key) to namespace prefixes (value).
045: */
046: protected Map<String, String> namespaceTable;
047:
048: protected boolean writingStarted;
049:
050: /**
051: * Flag indicating whether the last written statement has been closed.
052: */
053: protected boolean statementClosed;
054:
055: protected Resource lastWrittenSubject;
056:
057: protected URI lastWrittenPredicate;
058:
059: /*--------------*
060: * Constructors *
061: *--------------*/
062:
063: /**
064: * Creates a new TurtleWriter that will write to the supplied OutputStream.
065: *
066: * @param out
067: * The OutputStream to write the Turtle document to.
068: */
069: public TurtleWriter(OutputStream out) {
070: this (new OutputStreamWriter(out, Charset.forName("UTF-8")));
071: }
072:
073: /**
074: * Creates a new TurtleWriter that will write to the supplied Writer.
075: *
076: * @param writer
077: * The Writer to write the Turtle document to.
078: */
079: public TurtleWriter(Writer writer) {
080: this .writer = new IndentingWriter(writer);
081: namespaceTable = new LinkedHashMap<String, String>();
082: writingStarted = false;
083: statementClosed = true;
084: lastWrittenSubject = null;
085: lastWrittenPredicate = null;
086: }
087:
088: /*---------*
089: * Methods *
090: *---------*/
091:
092: public RDFFormat getRDFFormat() {
093: return RDFFormat.TURTLE;
094: }
095:
096: public void startRDF() throws RDFHandlerException {
097: if (writingStarted) {
098: throw new RuntimeException(
099: "Document writing has already started");
100: }
101:
102: writingStarted = true;
103:
104: try {
105: // Write namespace declarations
106: for (Map.Entry<String, String> entry : namespaceTable
107: .entrySet()) {
108: String name = entry.getKey();
109: String prefix = entry.getValue();
110:
111: writeNamespace(prefix, name);
112: }
113:
114: if (!namespaceTable.isEmpty()) {
115: writer.writeEOL();
116: }
117: } catch (IOException e) {
118: throw new RDFHandlerException(e);
119: }
120: }
121:
122: public void endRDF() throws RDFHandlerException {
123: if (!writingStarted) {
124: throw new RuntimeException(
125: "Document writing has not yet started");
126: }
127:
128: try {
129: closePreviousStatement();
130: writer.flush();
131: } catch (IOException e) {
132: throw new RDFHandlerException(e);
133: } finally {
134: writingStarted = false;
135: }
136: }
137:
138: public void handleNamespace(String prefix, String name)
139: throws RDFHandlerException {
140: try {
141: if (!namespaceTable.containsKey(name)) {
142: // Namespace not yet mapped to a prefix, try to give it the
143: // specified prefix
144:
145: boolean isLegalPrefix = prefix.length() == 0
146: || TurtleUtil.isLegalPrefix(prefix);
147:
148: if (!isLegalPrefix
149: || namespaceTable.containsValue(prefix)) {
150: // Specified prefix is not legal or the prefix is already in use,
151: // generate a legal unique prefix
152:
153: if (prefix.length() == 0 || !isLegalPrefix) {
154: prefix = "ns";
155: }
156:
157: int number = 1;
158:
159: while (namespaceTable
160: .containsValue(prefix + number)) {
161: number++;
162: }
163:
164: prefix += number;
165: }
166:
167: namespaceTable.put(name, prefix);
168:
169: if (writingStarted) {
170: closePreviousStatement();
171:
172: writeNamespace(prefix, name);
173: }
174: }
175: } catch (IOException e) {
176: throw new RDFHandlerException(e);
177: }
178: }
179:
180: public void handleStatement(Statement st)
181: throws RDFHandlerException {
182: if (!writingStarted) {
183: throw new RuntimeException(
184: "Document writing has not yet been started");
185: }
186:
187: Resource subj = st.getSubject();
188: URI pred = st.getPredicate();
189: Value obj = st.getObject();
190:
191: try {
192: if (subj.equals(lastWrittenSubject)) {
193: if (pred.equals(lastWrittenPredicate)) {
194: // Identical subject and predicate
195: writer.write(" , ");
196: } else {
197: // Identical subject, new predicate
198: writer.write(" ;");
199: writer.writeEOL();
200:
201: // Write new predicate
202: writePredicate(pred);
203: writer.write(" ");
204: lastWrittenPredicate = pred;
205: }
206: } else {
207: // New subject
208: closePreviousStatement();
209:
210: // Write new subject:
211: writer.writeEOL();
212: writeResource(subj);
213: writer.write(" ");
214: lastWrittenSubject = subj;
215:
216: // Write new predicate
217: writePredicate(pred);
218: writer.write(" ");
219: lastWrittenPredicate = pred;
220:
221: statementClosed = false;
222: writer.increaseIndentation();
223: }
224:
225: writeValue(obj);
226:
227: // Don't close the line just yet. Maybe the next
228: // statement has the same subject and/or predicate.
229: } catch (IOException e) {
230: throw new RDFHandlerException(e);
231: }
232: }
233:
234: public void handleComment(String comment)
235: throws RDFHandlerException {
236: try {
237: closePreviousStatement();
238:
239: if (comment.indexOf('\r') != -1
240: || comment.indexOf('\n') != -1) {
241: // Comment is not allowed to contain newlines or line feeds.
242: // Split comment in individual lines and write comment lines
243: // for each of them.
244: StringTokenizer st = new StringTokenizer(comment,
245: "\r\n");
246: while (st.hasMoreTokens()) {
247: writeCommentLine(st.nextToken());
248: }
249: } else {
250: writeCommentLine(comment);
251: }
252: } catch (IOException e) {
253: throw new RDFHandlerException(e);
254: }
255: }
256:
257: protected void writeCommentLine(String line) throws IOException {
258: writer.write("# ");
259: writer.write(line);
260: writer.writeEOL();
261: }
262:
263: protected void writeNamespace(String prefix, String name)
264: throws IOException {
265: writer.write("@prefix ");
266: writer.write(prefix);
267: writer.write(": <");
268: writer.write(TurtleUtil.encodeURIString(name));
269: writer.write("> .");
270: writer.writeEOL();
271: }
272:
273: protected void writePredicate(URI predicate) throws IOException {
274: if (predicate.equals(RDF.TYPE)) {
275: // Write short-cut for rdf:type
276: writer.write("a");
277: } else {
278: writeURI(predicate);
279: }
280: }
281:
282: protected void writeValue(Value val) throws IOException {
283: if (val instanceof Resource) {
284: writeResource((Resource) val);
285: } else {
286: writeLiteral((Literal) val);
287: }
288: }
289:
290: protected void writeResource(Resource res) throws IOException {
291: if (res instanceof URI) {
292: writeURI((URI) res);
293: } else {
294: writeBNode((BNode) res);
295: }
296: }
297:
298: protected void writeURI(URI uri) throws IOException {
299: String uriString = uri.toString();
300:
301: // Try to find a prefix for the URI's namespace
302: String prefix = null;
303:
304: int splitIdx = TurtleUtil.findURISplitIndex(uriString);
305: if (splitIdx > 0) {
306: String namespace = uriString.substring(0, splitIdx);
307: prefix = namespaceTable.get(namespace);
308: }
309:
310: if (prefix != null) {
311: // Namespace is mapped to a prefix; write abbreviated URI
312: writer.write(prefix);
313: writer.write(":");
314: writer.write(uriString.substring(splitIdx));
315: } else {
316: // Write full URI
317: writer.write("<");
318: writer.write(TurtleUtil.encodeURIString(uriString));
319: writer.write(">");
320: }
321: }
322:
323: protected void writeBNode(BNode bNode) throws IOException {
324: writer.write("_:");
325: writer.write(bNode.getID());
326: }
327:
328: protected void writeLiteral(Literal lit) throws IOException {
329: String label = lit.getLabel();
330:
331: if (label.indexOf('\n') > 0 || label.indexOf('\r') > 0
332: || label.indexOf('\t') > 0) {
333: // Write label as long string
334: writer.write("\"\"\"");
335: writer.write(TurtleUtil.encodeLongString(label));
336: writer.write("\"\"\"");
337: } else {
338: // Write label as normal string
339: writer.write("\"");
340: writer.write(TurtleUtil.encodeString(label));
341: writer.write("\"");
342: }
343:
344: if (lit.getDatatype() != null) {
345: // Append the literal's datatype (possibly written as an abbreviated
346: // URI)
347: writer.write("^^");
348: writeURI(lit.getDatatype());
349: } else if (lit.getLanguage() != null) {
350: // Append the literal's language
351: writer.write("@");
352: writer.write(lit.getLanguage());
353: }
354: }
355:
356: protected void closePreviousStatement() throws IOException {
357: if (!statementClosed) {
358: // The previous statement still needs to be closed:
359: writer.write(" .");
360: writer.writeEOL();
361: writer.decreaseIndentation();
362:
363: statementClosed = true;
364: lastWrittenSubject = null;
365: lastWrittenPredicate = null;
366: }
367: }
368: }
|