001: /*
002: * (c) Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 Hewlett-Packard Development Company, LP
003: * [See end of file]
004: */
005:
006: package com.hp.hpl.jena.n3;
007:
008: //import org.apache.commons.logging.*;
009: import com.hp.hpl.jena.rdf.model.*;
010: import com.hp.hpl.jena.shared.JenaException;
011: import com.hp.hpl.jena.vocabulary.RDF;
012: import com.hp.hpl.jena.vocabulary.RDFS;
013: import com.hp.hpl.jena.util.iterator.*;
014:
015: import java.util.*;
016:
017: /** An N3 pretty printer.
018: * Tries to make N3 data look readable - works better on regular data.
019: *
020: * @author Andy Seaborne
021: * @version $Id: N3JenaWriterPP.java,v 1.23 2008/02/08 16:13:20 andy_seaborne Exp $
022: */
023:
024: public class N3JenaWriterPP extends N3JenaWriterCommon
025: /*implements RDFWriter*/
026: {
027: // This N3 writer proceeds in 2 stages. First, it analysises the model to be
028: // written to extract information that is going to be specially formatted
029: // (RDF lists, small anon nodes) and to calculate the prefixes that will be used.
030:
031: final private boolean doObjectListsAsLists = getBooleanValue(
032: "objectLists", true);
033:
034: // Data structures used in controlling the formatting
035:
036: Set rdfLists = null; // Heads of daml lists
037: Set rdfListsAll = null; // Any resources in a daml lists
038: Set rdfListsDone = null; // RDF lists written
039: Set roots = null; // Things to put at the top level
040: Set oneRefObjects = null; // Bnodes referred to once as an object - can inline
041: Set oneRefDone = null; // Things done - so we can check for missed items
042:
043: // Do we do nested (one reference) nodes?
044: boolean allowDeep = true;
045:
046: static final String objectListSep = " , ";
047:
048: // ----------------------------------------------------
049: // Prepatation stage
050:
051: protected void prepare(Model model) {
052: prepareLists(model);
053: prepareOneRefBNodes(model);
054: }
055:
056: // Find well-formed RDF lists - does not find empty lists (this is intentional)
057: // Works by finding all tails, and work backwards to the head.
058: // RDF lists may, or may not, have a type element.
059: // Should do this during preparation, not as objects found during the write
060: // phase.
061:
062: private void prepareLists(Model model) {
063: Set this ListAll = new HashSet();
064:
065: StmtIterator listTailsIter = model.listStatements(null,
066: RDF.rest, RDF.nil);
067:
068: // For every tail of a list
069: //tailLoop:
070: for (; listTailsIter.hasNext();) {
071: // The resource for the current element being considered.
072: Resource listElement = listTailsIter.nextStatement()
073: .getSubject();
074: // The resource pointing to the link we have just looked at.
075: Resource validListHead = null;
076:
077: // Chase to head of list
078: for (;;) {
079: boolean isOK = checkListElement(listElement);
080: if (!isOK)
081: break;
082:
083: // At this point the element is exactly a DAML list element.
084: if (N3JenaWriter.DEBUG)
085: out.println("# RDF list all: "
086: + formatResource(listElement));
087: validListHead = listElement;
088: this ListAll.add(listElement);
089:
090: // Find the previous node.
091: StmtIterator sPrev = model.listStatements(null,
092: RDF.rest, listElement);
093:
094: if (!sPrev.hasNext())
095: // No daml:rest link
096: break;
097:
098: // Valid pretty-able list. Might be longer.
099: listElement = sPrev.nextStatement().getSubject();
100: if (sPrev.hasNext()) {
101: if (N3JenaWriter.DEBUG)
102: out.println("# RDF shared tail from "
103: + formatResource(listElement));
104: break;
105: }
106: }
107: // At head of a pretty-able list - add its elements and its head.
108: if (N3JenaWriter.DEBUG)
109: out.println("# DAML list head: "
110: + formatResource(validListHead));
111: rdfListsAll.addAll(this ListAll);
112: if (validListHead != null)
113: rdfLists.add(validListHead);
114: }
115: listTailsIter.close();
116: }
117:
118: // Validate one list element.
119: private boolean checkListElement(Resource listElement) {
120: if (!listElement.hasProperty(RDF.rest)
121: || !listElement.hasProperty(RDF.first)) {
122: if (N3JenaWriter.DEBUG)
123: out
124: .println("# RDF list element does not have required properties: "
125: + formatResource(listElement));
126: return false;
127: }
128:
129: // Must be exactly two properties (the ones we just tested for)
130: // or three including the RDF.type RDF.List statement.
131: int numProp = countProperties(listElement);
132:
133: if (numProp == 2)
134: // Must have exactly the properties we just tested for.
135: return true;
136:
137: if (numProp == 3) {
138: if (listElement.hasProperty(RDF.type, RDF.List))
139: return true;
140: if (N3JenaWriter.DEBUG)
141: out
142: .println("# RDF list element: 3 properties but no rdf:type rdf:List"
143: + formatResource(listElement));
144: return false;
145: }
146:
147: if (N3JenaWriter.DEBUG)
148: out
149: .println("# RDF list element does not right number of properties: "
150: + formatResource(listElement));
151: return false;
152: }
153:
154: // Find bnodes that are objects of only one statement (and hence can be inlined)
155: // which are not RDF lists.
156: // Could do this testing at write time (unlike lists)
157:
158: private void prepareOneRefBNodes(Model model) {
159:
160: NodeIterator objIter = model.listObjects();
161: for (; objIter.hasNext();) {
162: RDFNode n = objIter.nextNode();
163:
164: if (testOneRefBNode(n))
165: oneRefObjects.add(n);
166: objIter.close();
167:
168: // N3JenaWriter.DEBUG
169: if (N3JenaWriter.DEBUG) {
170: out.println("# RDF Lists = " + rdfLists.size());
171: out.println("# RDF ListsAll = " + rdfListsAll.size());
172: out.println("# oneRefObjects = "
173: + oneRefObjects.size());
174: }
175: }
176: }
177:
178: private boolean testOneRefBNode(RDFNode n) {
179: if (!(n instanceof Resource))
180: return false;
181:
182: Resource obj = (Resource) n;
183:
184: if (!obj.isAnon())
185: return false;
186:
187: // In a list - done as list, not as embedded bNode.
188: if (rdfListsAll.contains(obj))
189: // RDF list (head or element)
190: return false;
191:
192: StmtIterator pointsToIter = obj.getModel().listStatements(null,
193: null, obj);
194: if (!pointsToIter.hasNext())
195: // Corrupt graph!
196: throw new JenaException("N3: found object with no arcs!");
197:
198: Statement s = pointsToIter.nextStatement();
199:
200: if (pointsToIter.hasNext())
201: return false;
202:
203: if (N3JenaWriter.DEBUG)
204: out.println("# OneRef: " + formatResource(obj));
205: return true;
206: }
207:
208: // ----------------------------------------------------
209: // Output stage
210:
211: // Property order is:
212: // 1 - rdf:type (as "a")
213: // 2 - other rdf: rdfs: namespace items (sorted)
214: // 3 - all other properties, sorted by URI (not qname)
215:
216: protected ClosableIterator preparePropertiesForSubject(Resource r) {
217: Set seen = new HashSet();
218: boolean hasTypes = false;
219: SortedMap tmp1 = new TreeMap();
220: SortedMap tmp2 = new TreeMap();
221:
222: StmtIterator sIter = r.listProperties();
223: for (; sIter.hasNext();) {
224: Property p = sIter.nextStatement().getPredicate();
225: if (seen.contains(p))
226: continue;
227: seen.add(p);
228:
229: if (p.equals(RDF.type)) {
230: hasTypes = true;
231: continue;
232: }
233:
234: if (p.getURI().startsWith(RDF.getURI())
235: || p.getURI().startsWith(RDFS.getURI())) {
236: tmp1.put(p.getURI(), p);
237: continue;
238: }
239:
240: tmp2.put(p.getURI(), p);
241: }
242: sIter.close();
243:
244: ExtendedIterator eIter = null;
245:
246: if (hasTypes)
247: eIter = new SingletonIterator(RDF.type);
248:
249: ExtendedIterator eIter2 = WrappedIterator.create(tmp1.values()
250: .iterator());
251:
252: eIter = (eIter == null) ? eIter2 : eIter.andThen(eIter2);
253:
254: eIter2 = WrappedIterator.create(tmp2.values().iterator());
255:
256: eIter = (eIter == null) ? eIter2 : eIter.andThen(eIter2);
257: return eIter;
258: }
259:
260: protected boolean skipThisSubject(Resource subj) {
261: return rdfListsAll.contains(subj)
262: || oneRefObjects.contains(subj);
263: }
264:
265: // protected void writeModel(Model model)
266: // {
267: // super.writeModel(model) ;
268: //
269: //
270:
271: // Before ...
272:
273: protected void startWriting() {
274: allocateDatastructures();
275: }
276:
277: // Flush any unwritten objects.
278: // 1 - OneRef objects
279: // Normally there are "one ref" objects left
280: // However loops of "one ref" are possible.
281: // 2 - Lists
282:
283: protected void finishWriting() {
284: oneRefObjects.removeAll(oneRefDone);
285:
286: for (Iterator leftOverIter = oneRefObjects.iterator(); leftOverIter
287: .hasNext();) {
288: out.println();
289: if (N3JenaWriter.DEBUG)
290: out.println("# One ref");
291: // Don't allow further one ref objects to be inlined.
292: allowDeep = false;
293: writeOneGraphNode((Resource) leftOverIter.next());
294: allowDeep = true;
295: }
296:
297: // Are there any unattached RDF lists?
298: // We missed these earlier (assumed all DAML lists are values of some statement)
299: for (Iterator leftOverIter = rdfLists.iterator(); leftOverIter
300: .hasNext();) {
301: Resource r = (Resource) leftOverIter.next();
302: if (rdfListsDone.contains(r))
303: continue;
304: out.println();
305: if (N3JenaWriter.DEBUG)
306: out.println("# RDF List");
307:
308: if (!r.isAnon() || countArcsTo(r) > 0) {
309: // Name it.
310: out.print(formatResource(r));
311: out.print(" :- ");
312: }
313: writeList(r);
314: out.println(" .");
315: }
316:
317: //out.println() ;
318: //writeModelSimple(model, bNodesMap, base) ;
319: out.flush();
320: clearDatastructures();
321: }
322:
323: // Need to decide between one line or many.
324: // Very hard to do a pretty thing here because the objects may be large or small or a mix.
325:
326: protected void writeObjectList(Resource subject, Property property) {
327: // if ( ! doObjectListsAsLists )
328: // {
329: // super.writeObjectList(resource, property) ;
330: // return ;
331: // }
332:
333: String propStr = formatProperty(property);
334:
335: // Find which objects are simple (i.e. not nested structures)
336:
337: StmtIterator sIter = subject.listProperties(property);
338: Set simple = new HashSet();
339: Set complex = new HashSet();
340:
341: for (; sIter.hasNext();) {
342: Statement stmt = sIter.nextStatement();
343: RDFNode obj = stmt.getObject();
344: if (isSimpleObject(obj))
345: simple.add(obj);
346: else
347: complex.add(obj);
348: }
349: sIter.close();
350: // DEBUG
351: int simpleSize = simple.size();
352: int complexSize = complex.size();
353:
354: // Write property/simple objects
355:
356: if (simple.size() > 0) {
357: String padSp = null;
358: // Simple objects - allow property to be long and alignment to be lost
359: if ((propStr.length() + minGap) <= widePropertyLen)
360: padSp = pad(calcPropertyPadding(propStr));
361:
362: if (doObjectListsAsLists) {
363: // Write all simple objects as one list.
364: out.print(propStr);
365: out.incIndent(indentObject);
366:
367: if (padSp != null)
368: out.print(padSp);
369: else
370: out.println();
371:
372: for (Iterator iter = simple.iterator(); iter.hasNext();) {
373: RDFNode n = (RDFNode) iter.next();
374: writeObject(n);
375:
376: // As an object list
377: if (iter.hasNext())
378: out.print(objectListSep);
379: }
380:
381: out.decIndent(indentObject);
382: } else {
383: for (Iterator iter = simple.iterator(); iter.hasNext();) {
384: // This is also the same as the complex case
385: // except the width the property can go in is different.
386: out.print(propStr);
387: out.incIndent(indentObject);
388: if (padSp != null)
389: out.print(padSp);
390: else
391: out.println();
392:
393: RDFNode n = (RDFNode) iter.next();
394: writeObject(n);
395: out.decIndent(indentObject);
396:
397: // As an object list
398: if (iter.hasNext())
399: out.println(" ;");
400: }
401:
402: }
403: }
404: // Now do complex objects.
405: // Write property each time for a complex object.
406: // Do not allow over long properties but same line objects.
407:
408: if (complex.size() > 0) {
409: // Finish the simple list if there was one
410: if (simple.size() > 0)
411: out.println(" ;");
412:
413: int padding = -1;
414: String padSp = null;
415:
416: // Can we fit teh start of teh complex object on this line?
417:
418: // DEBUG variable.
419: int tmp = propStr.length();
420: // Complex objects - do not allow property to be long and alignment to be lost
421: if ((propStr.length() + minGap) <= propertyCol) {
422: padding = calcPropertyPadding(propStr);
423: padSp = pad(padding);
424: }
425:
426: for (Iterator iter = complex.iterator(); iter.hasNext();) {
427: int this Indent = indentObject;
428: //if ( i )
429: out.incIndent(this Indent);
430: out.print(propStr);
431: if (padSp != null)
432: out.print(padSp);
433: else
434: out.println();
435:
436: RDFNode n = (RDFNode) iter.next();
437: writeObject(n);
438: out.decIndent(this Indent);
439: if (iter.hasNext())
440: out.println(" ;");
441: }
442: }
443: return;
444: }
445:
446: private boolean isSimpleObject(RDFNode node) {
447: if (node instanceof Literal)
448: return true;
449: Resource rObj = (Resource) node;
450: if (allowDeep && oneRefObjects.contains(rObj))
451: return false;
452: return true;
453: }
454:
455: protected void writeObject(RDFNode node) {
456: if (node instanceof Literal) {
457: writeLiteral((Literal) node);
458: return;
459: }
460:
461: Resource rObj = (Resource) node;
462: if (allowDeep && !isSimpleObject(rObj)) {
463: oneRefDone.add(rObj);
464: ClosableIterator iter = preparePropertiesForSubject(rObj);
465: if (!iter.hasNext()) {
466: // No properties.
467: out.print("[]");
468: } else {
469: out.print("[ ");
470: out.incIndent(2);
471: writePropertiesForSubject(rObj, iter);
472: out.decIndent(2);
473: out.println();
474: // Line up []
475: out.print("]");
476: }
477: iter.close();
478: return;
479: }
480:
481: if (rdfLists.contains(rObj))
482: if (countArcsTo(rObj) <= 1) {
483: writeList(rObj);
484: return;
485: }
486:
487: out.print(formatResource(rObj));
488: }
489:
490: // Need to out.print in short (all on one line) and long forms (multiple lines)
491: // That needs starts point depth tracking.
492: private void writeList(Resource resource)
493:
494: {
495: out.print("(");
496: out.incIndent(2);
497: boolean listFirst = true;
498: for (Iterator iter = rdfListIterator(resource); iter.hasNext();) {
499: if (!listFirst)
500: out.print(" ");
501: listFirst = false;
502: RDFNode n = (RDFNode) iter.next();
503: writeObject(n);
504: }
505: out.print(")");
506: out.decIndent(2);
507: rdfListsDone.add(resource);
508:
509: }
510:
511: // Called before each writing run.
512: protected void allocateDatastructures() {
513: rdfLists = new HashSet();
514: rdfListsAll = new HashSet();
515: rdfListsDone = new HashSet();
516: oneRefObjects = new HashSet();
517: oneRefDone = new HashSet();
518: }
519:
520: // Especially release large intermediate memory objects
521: protected void clearDatastructures() {
522: rdfLists = null;
523: rdfListsAll = null;
524: rdfListsDone = null;
525: oneRefObjects = null;
526: oneRefDone = null;
527: }
528: }
529:
530: /*
531: * (c) Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 Hewlett-Packard Development Company, LP
532: * All rights reserved.
533: *
534: * Redistribution and use in source and binary forms, with or without
535: * modification, are permitted provided that the following conditions
536: * are met:
537: * 1. Redistributions of source code must retain the above copyright
538: * notice, this list of conditions and the following disclaimer.
539: * 2. Redistributions in binary form must reproduce the above copyright
540: * notice, this list of conditions and the following disclaimer in the
541: * documentation and/or other materials provided with the distribution.
542: * 3. The name of the author may not be used to endorse or promote products
543: * derived from this software without specific prior written permission.
544: *
545: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
546: * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
547: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
548: * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
549: * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
550: * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
551: * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
552: * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
553: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
554: * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
555: */
|