001: /*****************************************************************************
002: * Source code information
003: * -----------------------
004: * Original author Ian Dickinson, HP Labs Bristol
005: * Author email ian.dickinson@hp.com
006: * Package Jena2
007: * Web site http://jena.sourceforge.net
008: * Created 16-Sep-2005
009: * Filename $RCSfile: rdfcat.java,v $
010: * Revision $Revision: 1.15 $
011: * Release status $State: Exp $
012: *
013: * Last modified on $Date: 2008/01/02 12:08:16 $
014: * by $Author: andy_seaborne $
015: *
016: * (c) Copyright 2003, 2004, 2005, 2006, 2007, 2008 Hewlett-Packard Development Company, LP
017: * [See end of file]
018: *****************************************************************************/package jena;
019:
020: // Imports
021: ///////////////
022:
023: import java.io.OutputStream;
024: import java.util.*;
025:
026: import com.hp.hpl.jena.rdf.model.*;
027: import com.hp.hpl.jena.rdf.model.impl.RDFWriterFImpl;
028: import com.hp.hpl.jena.shared.NoWriterForLangException;
029: import com.hp.hpl.jena.util.FileManager;
030: import com.hp.hpl.jena.util.FileUtils;
031: import com.hp.hpl.jena.vocabulary.*;
032:
033: import jena.cmdline.*;
034:
035: /**
036: * <p>
037: * An RDF utility that takes its name from the Unix utility <em>cat</em>, and
038: * is used to generate serialisations of the contents of zero or more
039: * input model serialisations. <strong>Note</strong> In a change from previous
040: * versions, but to ensure compatability with standard argument handling
041: * practice, the input language options are <em>no longer sticky</em>. In
042: * previous versions, <code>rdfcat -n A B C</code> would ensure that A, B
043: * and C were all read as N3. From Jena 2.5.2 onwards, this requires:
044: * <code>rdfcat -n A -n B -n C</code>, or the use of the <code>-in</code>
045: * option.
046: * </p>
047: * <p>Synopsis:</p>
048: * <pre>
049: * java jena.rdfcat (options|input)*
050: * where options are:
051: * -out N3 (aliases n, n3, ttl)
052: * -out N-TRIPLE (aliases t, ntriple)
053: * -out RDF/XML (aliases x, rdf, xml, rdfxml)
054: * -out RDF/XML-ABBREV (default)
055: * -in N3 (aliases n, n3, ttl)
056: * -in N-TRIPLE (aliases t, ntriple)
057: * -in RDF/XML (aliases x, rdf, xml, rdfxml)
058: * -include
059: * -noinclude (default)
060: *
061: * input is one of:
062: * -n <filename> for n3 input (aliases -n3, -N3, -ttl)
063: * -x <filename> for rdf/xml input (aliases -rdf, -xml, -rdfxml)
064: * -t <filename> for n-triple input (aliases -ntriple)
065: * or just a URL, a filename, or - for the standard input.
066: * </pre>
067: * <p>
068: * The default
069: * input language is RDF/XML, but the reader will try to guess the
070: * input language based on the file extension (e.g. N3 for file with a .n3
071: * file extension.
072: * </p>
073: * <p>The input language options set the language for the following file
074: * name only. So in the following example, input
075: * A is read as N3, inputs B, C and D are read as RDF/XML,
076: * while stdin is read as N-TRIPLE:</p>
077: * <pre>
078: * java jena.rdfcat -n A B C -t - -x D
079: * </pre>
080: * <p>To change the default input language for all files that do
081: * not have a specified language encoding, use the <code>-in</code> option.
082: * </p>
083: * <p>If the <code>include</code> option is set, the input files are scanned
084: * for <code>rdfs:seeAlso</code> and <code>owl:imports</code> statements, and
085: * the objects of these statements are read as well. By default, <code>include</code>
086: * is off. If <code>include</code> is turned on, the normal behaviour is for
087: * the including statements (e.g <code>owl:imports</code> to be filtered
088: * from the output models. To leave such statements in place, use the <code>--nofilter</code>
089: * option.</p>
090: * <p>rdfcat uses the Jena {@link com.hp.hpl.jena.util.FileManager FileManager}
091: * to resolve input URI's to locations. This allows, for example, <code>http:</code>
092: * URI's to be re-directed to local <code>file:</code> locations, to avoid a
093: * network transaction.</p>
094: * <p>Examples:</p>
095: * <pre>
096: * Join two RDF/XML files together into a single model in RDF/XML-ABBREV:
097: * java jena.rdfcat in1 in2 > out.rdf
098: *
099: * Convert a single RDF/XML file to N3:
100: * java jena.rdfcat in1 -out N3 > out.n3
101: *
102: * Join two owl files one N3, one XML, and their imports, into a single NTRIPLE file:
103: * java jena.rdfcat -out NTRIPLE -include in1.owl -n in2.owl > out.ntriple
104: *
105: * Concatenate two N3-serving http URL's as N-TRIPLE
106: * java jena.rdfcat -in N3 -out N-TRIPLE http://example.com/a http://example.com/b
107: * </pre>
108: * <p>Note that, in a difference from the Unix utility <code>cat</code>, the order
109: * of input statements is not preserved. The output document is a merge of the
110: * input documents, and does not preserve any statement ordering from the input
111: * serialisations. Also, duplicate triples will be suppressed.</p>
112: *
113: * @author Ian Dickinson, HP Labs (<a href="mailto:Ian.Dickinson@hp.com">email</a>)
114: * @version Release @release@ ($Id: rdfcat.java,v 1.15 2008/01/02 12:08:16 andy_seaborne Exp $)
115: */
116: public class rdfcat {
117: // Constants
118: //////////////////////////////////
119:
120: /** Argument setting expected input language to N3 */
121: public final ArgDecl IN_N3 = new ArgDecl(true, "n", "n3", "ttl",
122: "N3", new ArgHandler() {
123: public void action(String arg, String val)
124: throws IllegalArgumentException {
125: m_actionQ.add(new ReadAction(val, "N3"));
126: }
127: });
128:
129: /** Argument setting expected input language to RDF/XML */
130: public final ArgDecl IN_RDF_XML = new ArgDecl(true, "x", "xml",
131: "rdfxml", "rdf", new ArgHandler() {
132: public void action(String arg, String val)
133: throws IllegalArgumentException {
134: m_actionQ.add(new ReadAction(val, "RDF/XML"));
135: }
136: });
137:
138: /** Argument setting expected input language to NTRIPLE */
139: public final ArgDecl IN_NTRIPLE = new ArgDecl(true, "t",
140: "ntriples", "ntriple", "n-triple", "n-triples",
141: new ArgHandler() {
142: public void action(String arg, String val)
143: throws IllegalArgumentException {
144: m_actionQ.add(new ReadAction(val, "N-TRIPLE"));
145: }
146: });
147:
148: /** Argument to set the output language */
149: public final ArgDecl OUT_LANG = new ArgDecl(true, "out",
150: new ArgHandler() {
151: public void action(String arg, String val)
152: throws IllegalArgumentException {
153: setOutput(val);
154: }
155: });
156:
157: /** Argument to set the default input language */
158: public final ArgDecl IN_LANG = new ArgDecl(true, "in",
159: new ArgHandler() {
160: public void action(String arg, String val)
161: throws IllegalArgumentException {
162: expectInput(val);
163: }
164: });
165:
166: /** Argument to turn include processing on */
167: public final ArgDecl INCLUDE = new ArgDecl(false, "include",
168: new ArgHandler() {
169: public void action(String arg, String val)
170: throws IllegalArgumentException {
171: setInclude(true);
172: }
173: });
174:
175: /** Argument to turn include processing off */
176: public final ArgDecl NOINCLUDE = new ArgDecl(false, "noinclude",
177: new ArgHandler() {
178: public void action(String arg, String val)
179: throws IllegalArgumentException {
180: setInclude(false);
181: }
182: });
183:
184: /** Argument to leave import/seeAlso statements in place in flattened models */
185: public final ArgDecl NOFILTER = new ArgDecl(false, "nofilter",
186: new ArgHandler() {
187: public void action(String arg, String val)
188: throws IllegalArgumentException {
189: setRemoveIncludeStatements(false);
190: }
191: });
192:
193: /** Argument to show usage */
194: public final ArgDecl HELP = new ArgDecl(false, "help",
195: new ArgHandler() {
196: public void action(String arg, String val)
197: throws IllegalArgumentException {
198: usage();
199: }
200: });
201: public final ArgDecl USAGE = new ArgDecl(false, "usage",
202: new ArgHandler() {
203: public void action(String arg, String val)
204: throws IllegalArgumentException {
205: usage();
206: }
207: });
208:
209: // Static variables
210: //////////////////////////////////
211:
212: // Instance variables
213: //////////////////////////////////
214:
215: /** The command line processor that handles the arguments */
216: protected CommandLine m_cmdLine = new RCCommandLine().add(IN_N3)
217: .add(IN_NTRIPLE).add(IN_RDF_XML).add(OUT_LANG).add(IN_LANG)
218: .add(INCLUDE).add(NOINCLUDE).add(NOFILTER).add(HELP).add(
219: USAGE);
220:
221: /** The merged model containing all of the inputs */
222: protected Model m_model = ModelFactory.createDefaultModel();
223:
224: /** The output format to write to, defaults to RDF/XML-ABBREV */
225: protected String m_outputFormat = "RDF/XML-ABBREV";
226:
227: /** The input format we're expecting for the next URL to be read - defaults to RDF/XML */
228: protected String m_inputFormat = "RDF/XML";
229:
230: /** Flag to indicate whether we include owl:imports and rdfs:seeAlso */
231: protected boolean m_include = false;
232:
233: /** List of URL's that have been loaded already, occurs check */
234: protected Set m_seen = new HashSet();
235:
236: /** Flag to control whether import/include statements are filtered from merged models */
237: protected boolean m_removeIncludeStatements = true;
238:
239: /** Action queue */
240: protected List m_actionQ = new ArrayList();
241:
242: // Constructors
243: //////////////////////////////////
244:
245: // External signature methods
246: //////////////////////////////////
247:
248: public static void main(String[] args) {
249: new rdfcat().go(args);
250: }
251:
252: // Internal implementation methods
253: //////////////////////////////////
254:
255: /* main loop */
256: protected void go(String[] args) {
257: m_cmdLine.process(args);
258:
259: // process any stored items
260: for (int i = 0; i < m_cmdLine.numItems(); i++) {
261: m_actionQ.add(new ReadAction(m_cmdLine.getItem(i),
262: getExpectedInput()));
263: }
264: for (Iterator j = m_actionQ.iterator(); j.hasNext();) {
265: ((RCAction) j.next()).run(this );
266: }
267:
268: // generate the output
269: m_model.write(getOutputStream(), m_outputFormat);
270: }
271:
272: /** Set the input language of next and subsequent reads */
273: protected void expectInput(String lang) {
274: m_inputFormat = lang;
275: }
276:
277: /** Answer the currently expected input format */
278: protected String getExpectedInput() {
279: return m_inputFormat;
280: }
281:
282: /** Set the language to write the output model in */
283: protected void setOutput(String lang) {
284: m_outputFormat = getCheckedLanguage(lang);
285: }
286:
287: /**
288: Answer the full, checked, language name expanded from <code>shortName</code>.
289: The shortName is expanded according to the table of abbreviations [below].
290: It is then checked against RDFWriterFImpl's writer table [this is hacky but
291: at the moment it's the most available interface] and the NoWriter exception
292: trapped and replaced by the original IllegalArgument exception.
293: */
294: public static String getCheckedLanguage(String shortLang) {
295: String fullLang = (String) unabbreviate.get(shortLang);
296: String tryLang = (fullLang == null ? shortLang : fullLang);
297: try {
298: new RDFWriterFImpl().getWriter(tryLang);
299: } catch (NoWriterForLangException e) {
300: throw new IllegalArgumentException("'" + shortLang
301: + "' is not recognised as a legal output format");
302: }
303: return tryLang;
304: }
305:
306: /**
307: Map from abbreviated names to full names.
308: */
309: public static Map unabbreviate = makeUnabbreviateMap();
310:
311: /**
312: Construct the canonical abbreviation map.
313: */
314: protected static Map makeUnabbreviateMap() {
315: Map result = new HashMap();
316: result.put("x", "RDF/XML");
317: result.put("rdf", "RDF/XML");
318: result.put("rdfxml", "RDF/XML");
319: result.put("xml", "RDF/XML");
320: result.put("n3", "N3");
321: result.put("n", "N3");
322: result.put("ttl", "N3");
323: result.put("ntriples", "N-TRIPLE");
324: result.put("ntriple", "N-TRIPLE");
325: result.put("t", "N-TRIPLE");
326: result.put("owl", "RDF/XML-ABBREV");
327: result.put("abbrev", "RDF/XML-ABBREV");
328: return result;
329: }
330:
331: /** Set the flag to include owl:imports and rdf:seeAlso files in the output, default off */
332: protected void setInclude(boolean incl) {
333: m_include = incl;
334: }
335:
336: /** Set the flag to leave owl:imports and rdfs:seeAlso statements in place, rather than filter them */
337: protected void setRemoveIncludeStatements(boolean f) {
338: m_removeIncludeStatements = f;
339: }
340:
341: /* Take the string as an input file or URI, and
342: * try to read using the current default input syntax.
343: */
344: protected void readInput(String inputName) {
345: List queue = new ArrayList();
346: queue.add(new IncludeQueueEntry(inputName, null));
347:
348: while (!queue.isEmpty()) {
349: IncludeQueueEntry entry = (IncludeQueueEntry) queue
350: .remove(0);
351: String in = entry.m_includeURI;
352:
353: if (!m_seen.contains(in)) {
354: m_seen.add(in);
355: Model inModel = ModelFactory.createDefaultModel();
356:
357: // check for stdin
358: if (in.equals("-")) {
359: inModel.read(System.in, null, m_inputFormat);
360: } else {
361: // lang from extension overrides default set on command line
362: String lang = FileUtils
363: .guessLang(in, m_inputFormat);
364: FileManager.get().readModel(inModel, in, lang);
365: }
366:
367: // check for anything more that we need to read
368: if (m_include) {
369: addIncludes(inModel, queue);
370: }
371:
372: // merge the models
373: m_model.add(inModel);
374: m_model.setNsPrefixes(inModel);
375:
376: // do we remove the include statement?
377: if (m_removeIncludeStatements
378: && entry.m_includeStmt != null) {
379: m_model.remove(entry.m_includeStmt);
380: }
381: }
382: }
383: }
384:
385: /** Return the stream to which the output is written, defaults to stdout */
386: protected OutputStream getOutputStream() {
387: return System.out;
388: }
389:
390: /** Add any additional models to include given the rdfs:seeAlso and
391: * owl:imports statements in the given model
392: */
393: protected void addIncludes(Model inModel, List queue) {
394: // first collect any rdfs:seeAlso statements
395: StmtIterator i = inModel.listStatements(null, RDFS.seeAlso,
396: (RDFNode) null);
397: while (i.hasNext()) {
398: Statement s = i.nextStatement();
399: queue.add(new IncludeQueueEntry(getURL(s.getObject()), s));
400: }
401:
402: // then any owl:imports
403: i = inModel.listStatements(null, OWL.imports, (RDFNode) null);
404: while (i.hasNext()) {
405: Statement s = i.nextStatement();
406: queue
407: .add(new IncludeQueueEntry(getURL(s.getResource()),
408: s));
409: }
410: }
411:
412: protected void usage() {
413: System.err.println("Usage: java jena.rdfcat (option|input)*");
414: System.err
415: .println("Concatenates the contents of zero or more input RDF documents.");
416: System.err
417: .println("Options: -out N3 | N-TRIPLE | RDF/XML | RDF/XML-ABBREV");
418: System.err
419: .println(" -n expect subsequent inputs in N3 syntax");
420: System.err
421: .println(" -x expect subsequent inputs in RDF/XML syntax");
422: System.err
423: .println(" -t expect subsequent inputs in N-TRIPLE syntax");
424: System.err
425: .println(" -[no]include include rdfs:seeAlso and owl:imports");
426: System.err
427: .println("input can be filename, URL, or - for stdin");
428: System.err
429: .println("Recognised aliases for -n are: -n3 -ttl or -N3");
430: System.err
431: .println("Recognised aliases for -x are: -xml -rdf or -rdfxml");
432: System.err.println("Recognised aliases for -t are: -ntriple");
433: System.err
434: .println("Output format aliases: x, xml or rdf for RDF/XML, n, n3 or ttl for N3, t or ntriple for N-TRIPLE");
435: System.err
436: .println("See the Javadoc for jena.rdfcat for additional details.");
437:
438: System.exit(0);
439: }
440:
441: /** Answer a URL string from a resource or literal */
442: protected String getURL(RDFNode n) {
443: return n.isLiteral() ? ((Literal) n).getLexicalForm()
444: : ((Resource) n).getURI();
445: }
446:
447: //==============================================================================
448: // Inner class definitions
449: //==============================================================================
450:
451: /** Local extension to CommandLine to handle mixed arguments and values */
452: protected class RCCommandLine extends CommandLine {
453: /** Don't stop processing args on the first non-arg */
454: public boolean xendProcessing(String argStr) {
455: return false;
456: }
457:
458: /** Handle an unrecognised argument by assuming it's a URI to read */
459: public void handleUnrecognizedArg(String argStr) {
460: if (argStr.equals("-") || !argStr.startsWith("-")) {
461: // queue this action for reading later
462: m_actionQ
463: .add(new ReadAction(argStr, getExpectedInput()));
464: } else {
465: System.err.println("Unrecognised argument: " + argStr);
466: usage();
467: }
468: }
469:
470: /** Hook to test whether this argument should be processed further
471: */
472: public boolean ignoreArgument(String argStr) {
473: return !argStr.startsWith("-") || argStr.length() == 1;
474: }
475:
476: /** Answer an iterator over the non-arg items from the command line */
477: public Iterator getItems() {
478: return items.iterator();
479: }
480: }
481:
482: /** Queue entry that contains both a URI to be included, and a statement that may be removed */
483: protected class IncludeQueueEntry {
484: protected String m_includeURI;
485: protected Statement m_includeStmt;
486:
487: protected IncludeQueueEntry(String includeURI,
488: Statement includeStmt) {
489: m_includeURI = includeURI;
490: m_includeStmt = includeStmt;
491: }
492: }
493:
494: /** Simple action object for local processing queue */
495: protected interface RCAction {
496: public void run(rdfcat rc);
497: }
498:
499: /** Action to set the output format */
500: protected class ReadAction implements RCAction {
501: private String m_lang;
502: private String m_uri;
503:
504: protected ReadAction(String uri, String lang) {
505: m_lang = lang;
506: m_uri = uri;
507: }
508:
509: /** perform the action of reading a uri */
510: public void run(rdfcat rc) {
511: String l = rc.getExpectedInput();
512: if (m_lang != null) {
513: // if an input lang was given, use that
514: rc.expectInput(m_lang);
515: }
516: rc.readInput(m_uri);
517:
518: // put the lang back to default
519: rc.expectInput(l);
520: }
521: }
522: }
523:
524: /*
525: * (c) Copyright 2003, 2004, 2005, 2006, 2007, 2008 Hewlett-Packard Development Company, LP
526: * All rights reserved.
527: *
528: * Redistribution and use in source and binary forms, with or without
529: * modification, are permitted provided that the following conditions
530: * are met:
531: * 1. Redistributions of source code must retain the above copyright
532: * notice, this list of conditions and the following disclaimer.
533: * 2. Redistributions in binary form must reproduce the above copyright
534: * notice, this list of conditions and the following disclaimer in the
535: * documentation and/or other materials provided with the distribution.
536: * 3. The name of the author may not be used to endorse or promote products
537: * derived from this software without specific prior written permission.
538: *
539: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
540: * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
541: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
542: * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
543: * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
544: * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
545: * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
546: * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
547: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
548: * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
549: */
|