001: /*
002: * Copyright Aduna (http://www.aduna-software.com/) (c) 1997-2007.
003: *
004: * Licensed under the Aduna BSD-style license.
005: */
006: package org.openrdf.rio.rdfxml.util;
007:
008: import java.io.Closeable;
009: import java.io.Flushable;
010: import java.io.IOException;
011: import java.io.OutputStream;
012: import java.io.Writer;
013: import java.util.Stack;
014:
015: import org.openrdf.model.Literal;
016: import org.openrdf.model.Resource;
017: import org.openrdf.model.Statement;
018: import org.openrdf.model.URI;
019: import org.openrdf.model.Value;
020: import org.openrdf.model.vocabulary.RDF;
021: import org.openrdf.model.vocabulary.RDFS;
022: import org.openrdf.rio.RDFHandlerException;
023: import org.openrdf.rio.rdfxml.RDFXMLWriter;
024:
025: /**
026: * An extension of RDFXMLWriter that outputs a more concise form of RDF/XML. The
027: * resulting output is semantically equivalent to the output of an RDFXMLWriter
028: * (it produces the same set of statements), but it is usually easier to read
029: * for humans.
030: * <p>
031: * This is a quasi-streaming RDFWriter. Statements are cached as long as the
032: * striped syntax is followed (i.e. the subject of the next statement is the
033: * object of the previous statement) and written to the output when the stripe
034: * is broken.
035: * <p>
036: * The abbreviations used are <a
037: * href="http://www.w3.org/TR/rdf-syntax-grammar/#section-Syntax-typed-nodes">typed
038: * node elements</a>, <a
039: * href="http://www.w3.org/TR/rdf-syntax-grammar/#section-Syntax-empty-property-elements">empty
040: * property elements</a> and <a
041: * href="http://www.w3.org/TR/rdf-syntax-grammar/#section-Syntax-node-property-elements">striped
042: * syntax</a>. Note that these abbreviations require that statements are
043: * written in the appropriate order.
044: * <p>
045: * Striped syntax means that when the object of a statement is the subject of
046: * the next statement we can nest the descriptions in each other.
047: * <p>
048: * Example:
049: *
050: * <pre>
051: * <rdf:Seq>
052: * <rdf:li>
053: * <foaf:Person>
054: * <foaf:knows>
055: * <foaf:Person>
056: * <foaf:mbox rdf:resource="..."/>
057: * </foaf:Person>
058: * </foaf:knows>
059: * </foaf:Person>
060: * </rdf:li>
061: * </rdf:Seq>
062: * </pre>
063: *
064: * Typed node elements means that we write out type information in the short
065: * form of
066: *
067: * <pre>
068: * <foaf:Person rdf:about="...">
069: * ...
070: * </foaf:Person>
071: * </pre>
072: *
073: * instead of
074: *
075: * <pre>
076: * <rdf:Description rdf:about="...">
077: * <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Person"/>
078: * ...
079: * </rdf:Description>
080: * </pre>
081: *
082: * Empty property elements are of the form
083: *
084: * <pre>
085: * <foaf:Person>
086: * <foaf:homepage rdf:resource="http://www.cs.vu.nl/˜marta"/>
087: * </foaf:Person>
088: * </pre>
089: *
090: * instead of
091: *
092: * <pre>
093: * <foaf:Person>
094: * <foaf:homepage>
095: * <rdf:Description rdf:about="http://www.cs.vu.nl/˜marta"/>
096: * <foaf:homepage>
097: * </foaf:Person>
098: * </pre>
099: *
100: * @author Peter Mika (pmika@cs.vu.nl)
101: */
102: public class RDFXMLPrettyWriter extends RDFXMLWriter implements
103: Closeable, Flushable {
104:
105: /*-----------*
106: * Variables *
107: *-----------*/
108:
109: /*
110: * We implement striped syntax by using two stacks, one for predicates and
111: * one for subjects/objects.
112: */
113:
114: /**
115: * Stack for remembering the nodes (subjects/objects) of statements at each
116: * level.
117: */
118: private Stack<Node> nodeStack = new Stack<Node>();
119:
120: /**
121: * Stack for remembering the predicate of statements at each level.
122: */
123: private Stack<URI> predicateStack = new Stack<URI>();
124:
125: /*--------------*
126: * Constructors *
127: *--------------*/
128:
129: /**
130: * Creates a new RDFXMLPrintWriter that will write to the supplied
131: * OutputStream.
132: *
133: * @param out
134: * The OutputStream to write the RDF/XML document to.
135: */
136: public RDFXMLPrettyWriter(OutputStream out) {
137: super (out);
138: }
139:
140: /**
141: * Creates a new RDFXMLPrintWriter that will write to the supplied Writer.
142: *
143: * @param out
144: * The Writer to write the RDF/XML document to.
145: */
146: public RDFXMLPrettyWriter(Writer out) {
147: super (out);
148: }
149:
150: /*---------*
151: * Methods *
152: *---------*/
153:
154: @Override
155: protected void writeHeader() throws IOException {
156: // This export format needs the RDF Schema namespace to be defined:
157: setNamespace("rdfs", RDFS.NAMESPACE, false);
158:
159: super .writeHeader();
160: }
161:
162: public void flush() throws IOException {
163: if (writingStarted) {
164: if (!headerWritten) {
165: writeHeader();
166: }
167:
168: flushPendingStatements();
169:
170: writer.flush();
171: }
172: }
173:
174: public void close() throws IOException {
175: try {
176: if (writingStarted) {
177: endRDF();
178: }
179: } catch (RDFHandlerException e) {
180: if (e.getCause() instanceof IOException) {
181: throw (IOException) e.getCause();
182: } else {
183: IOException ioe = new IOException(e.getMessage());
184: ioe.initCause(e);
185: throw ioe;
186: }
187: } finally {
188: writer.close();
189: }
190: }
191:
192: @Override
193: protected void flushPendingStatements() throws IOException {
194: if (!nodeStack.isEmpty()) {
195: popStacks(null);
196: }
197: }
198:
199: /**
200: * Write out the stacks until we find subject. If subject == null, write out
201: * the entire stack
202: *
203: * @param newSubject
204: */
205: private void popStacks(Resource newSubject) throws IOException {
206: // Write start tags for the part of the stacks that are not yet
207: // written
208: for (int i = 0; i < nodeStack.size() - 1; i++) {
209: Node node = nodeStack.get(i);
210:
211: if (!node.isWritten()) {
212: if (i > 0) {
213: writeIndents(i * 2 - 1);
214:
215: URI predicate = predicateStack.get(i - 1);
216:
217: writeStartTag(predicate.getNamespace(), predicate
218: .getLocalName());
219: writeNewLine();
220: }
221:
222: writeIndents(i * 2);
223: writeNodeStartTag(node);
224: node.setIsWritten(true);
225: }
226: }
227:
228: // Write tags for the top subject
229: Node topNode = nodeStack.pop();
230:
231: if (predicateStack.isEmpty()) {
232: // write out an empty subject
233: writeIndents(nodeStack.size() * 2);
234: writeNodeEmptyTag(topNode);
235: writeNewLine();
236: } else {
237: URI topPredicate = predicateStack.pop();
238:
239: if (!topNode.hasType()) {
240: // we can use an abbreviated predicate
241: writeIndents(nodeStack.size() * 2 - 1);
242: writeAbbreviatedPredicate(topPredicate, topNode
243: .getValue());
244: } else {
245: // we cannot use an abbreviated predicate because the type needs to
246: // written out as well
247:
248: writeIndents(nodeStack.size() * 2 - 1);
249: writeStartTag(topPredicate.getNamespace(), topPredicate
250: .getLocalName());
251: writeNewLine();
252:
253: // write out an empty subject
254: writeIndents(nodeStack.size() * 2);
255: writeNodeEmptyTag(topNode);
256: writeNewLine();
257:
258: writeIndents(nodeStack.size() * 2 - 1);
259: writeEndTag(topPredicate.getNamespace(), topPredicate
260: .getLocalName());
261: writeNewLine();
262: }
263: }
264:
265: // Write out the end tags until we find the subject
266: while (!nodeStack.isEmpty()) {
267: Node nextElement = nodeStack.peek();
268:
269: if (nextElement.getValue().equals(newSubject)) {
270: break;
271: } else {
272: nodeStack.pop();
273:
274: // We have already written out the subject/object,
275: // but we still need to close the tag
276: writeIndents(predicateStack.size() + nodeStack.size());
277:
278: writeNodeEndTag(nextElement);
279:
280: if (predicateStack.size() > 0) {
281: URI nextPredicate = predicateStack.pop();
282:
283: writeIndents(predicateStack.size()
284: + nodeStack.size());
285:
286: writeEndTag(nextPredicate.getNamespace(),
287: nextPredicate.getLocalName());
288:
289: writeNewLine();
290: }
291: }
292: }
293: }
294:
295: @Override
296: public void handleStatement(Statement st)
297: throws RDFHandlerException {
298: if (!writingStarted) {
299: throw new RuntimeException(
300: "Document writing has not yet been started");
301: }
302:
303: Resource subj = st.getSubject();
304: URI pred = st.getPredicate();
305: Value obj = st.getObject();
306:
307: try {
308: if (!headerWritten) {
309: writeHeader();
310: }
311:
312: if (!nodeStack.isEmpty()
313: && !subj.equals(nodeStack.peek().getValue())) {
314: // Different subject than we had before, empty the stack
315: // until we find it
316: popStacks(subj);
317: }
318:
319: // Stack is either empty or contains the same subject at top
320:
321: if (nodeStack.isEmpty()) {
322: // Push subject
323: nodeStack.push(new Node(subj));
324: }
325:
326: // Stack now contains at least one element
327: Node topSubject = nodeStack.peek();
328:
329: // Check if current statement is a type statement and use a typed node
330: // element is possible
331: // FIXME: verify that an XML namespace-qualified name can be created
332: // for the type URI
333: if (pred.equals(RDF.TYPE) && obj instanceof URI
334: && !topSubject.hasType() && !topSubject.isWritten()) {
335: // Use typed node element
336: topSubject.setType((URI) obj);
337: } else {
338: // Push predicate and object
339: predicateStack.push(pred);
340: nodeStack.push(new Node(obj));
341: }
342: } catch (IOException e) {
343: throw new RDFHandlerException(e);
344: }
345: }
346:
347: /**
348: * Write out the opening tag of the subject or object of a statement up to
349: * (but not including) the end of the tag. Used both in writeStartSubject and
350: * writeEmptySubject.
351: */
352: private void writeNodeStartOfStartTag(Node node) throws IOException {
353: Value value = node.getValue();
354:
355: if (node.hasType()) {
356: // We can use abbreviated syntax
357: writeStartOfStartTag(node.getType().getNamespace(), node
358: .getType().getLocalName());
359: } else {
360: // We cannot use abbreviated syntax
361: writeStartOfStartTag(RDF.NAMESPACE, "Description");
362: }
363:
364: if (value instanceof URI) {
365: URI uri = (URI) value;
366: writeAttribute(RDF.NAMESPACE, "about", uri.toString());
367: }
368: // else {
369: // BNode bNode = (BNode)subj;
370: // writeAttribute(RDF.NAMESPACE, "nodeID", bNode.getID());
371: // }
372: }
373:
374: /**
375: * Write out the opening tag of the subject or object of a statement.
376: */
377: private void writeNodeStartTag(Node node) throws IOException {
378: writeNodeStartOfStartTag(node);
379: writeEndOfStartTag();
380: writeNewLine();
381: }
382:
383: /**
384: * Write out the closing tag for the subject or object of a statement.
385: */
386: private void writeNodeEndTag(Node node) throws IOException {
387: if (node.getType() != null) {
388: writeEndTag(node.getType().getNamespace(), node.getType()
389: .getLocalName());
390: } else {
391: writeEndTag(RDF.NAMESPACE, "Description");
392: }
393: writeNewLine();
394: }
395:
396: /**
397: * Write out an empty tag for the subject or object of a statement.
398: */
399: private void writeNodeEmptyTag(Node node) throws IOException {
400: writeNodeStartOfStartTag(node);
401: writeEndOfEmptyTag();
402: }
403:
404: /**
405: * Write out an empty property element.
406: */
407: private void writeAbbreviatedPredicate(URI pred, Value obj)
408: throws IOException {
409: writeStartOfStartTag(pred.getNamespace(), pred.getLocalName());
410:
411: if (obj instanceof Resource) {
412: Resource objRes = (Resource) obj;
413:
414: if (objRes instanceof URI) {
415: URI uri = (URI) objRes;
416: writeAttribute(RDF.NAMESPACE, "resource", uri
417: .toString());
418: }
419: // else {
420: // BNode bNode = (BNode)objRes;
421: // writeAttribute(RDF.NAMESPACE, "nodeID", bNode.getID());
422: // }
423:
424: writeEndOfEmptyTag();
425: } else if (obj instanceof Literal) {
426: Literal objLit = (Literal) obj;
427:
428: // language attribute
429: if (objLit.getLanguage() != null) {
430: writeAttribute("xml:lang", objLit.getLanguage());
431: }
432:
433: // datatype attribute
434: boolean isXmlLiteral = false;
435: URI datatype = objLit.getDatatype();
436: if (datatype != null) {
437: // Check if datatype is rdf:XMLLiteral
438: isXmlLiteral = datatype.equals(RDF.XMLLITERAL);
439:
440: if (isXmlLiteral) {
441: writeAttribute(RDF.NAMESPACE, "parseType",
442: "Literal");
443: } else {
444: writeAttribute(RDF.NAMESPACE, "datatype", datatype
445: .toString());
446: }
447: }
448:
449: writeEndOfStartTag();
450:
451: // label
452: if (isXmlLiteral) {
453: // Write XML literal as plain XML
454: writer.write(objLit.getLabel());
455: } else {
456: writeCharacterData(objLit.getLabel());
457: }
458:
459: writeEndTag(pred.getNamespace(), pred.getLocalName());
460: }
461:
462: writeNewLine();
463: }
464:
465: protected void writeStartTag(String namespace, String localName)
466: throws IOException {
467: writeStartOfStartTag(namespace, localName);
468: writeEndOfStartTag();
469: }
470:
471: /**
472: * Writes <tt>n</tt> indents.
473: */
474: protected void writeIndents(int n) throws IOException {
475: for (int i = 0; i < n; i++) {
476: writeIndent();
477: }
478: }
479:
480: /*------------------*
481: * Inner class Node *
482: *------------------*/
483:
484: private static class Node {
485:
486: private Value value;
487:
488: // type == null means that we use <rdf:Description>
489: private URI type = null;
490:
491: private boolean isWritten = false;
492:
493: /**
494: * Creates a new Node for the supplied Value.
495: */
496: public Node(Value value) {
497: this .value = value;
498: }
499:
500: public Value getValue() {
501: return value;
502: }
503:
504: public void setType(URI type) {
505: this .type = type;
506: }
507:
508: public URI getType() {
509: return type;
510: }
511:
512: public boolean hasType() {
513: return type != null;
514: }
515:
516: public void setIsWritten(boolean isWritten) {
517: this .isWritten = isWritten;
518: }
519:
520: public boolean isWritten() {
521: return isWritten;
522: }
523: }
524: }
|