001: /*
002: * (c) Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 Hewlett-Packard Development Company, LP
003: * All rights reserved.
004: *
005: * Redistribution and use in source and binary forms, with or without
006: * modification, are permitted provided that the following conditions
007: * are met:
008: * 1. Redistributions of source code must retain the above copyright
009: * notice, this list of conditions and the following disclaimer.
010: * 2. Redistributions in binary form must reproduce the above copyright
011: * notice, this list of conditions and the following disclaimer in the
012: * documentation and/or other materials provided with the distribution.
013: * 3. The name of the author may not be used to endorse or promote products
014: * derived from this software without specific prior written permission.
015:
016: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
017: * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
018: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
019: * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
020: * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
021: * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
022: * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
023: * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
024: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
025: * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
026:
027: * * $Id: NTriple.java,v 1.25 2008/01/02 12:06:46 andy_seaborne Exp $
028:
029: AUTHOR: Jeremy J. Carroll
030: */
031: /*
032: * XML2NTriple.java
033: *
034: * Created on July 13, 2001, 10:06 PM
035: */
036:
037: package com.hp.hpl.jena.rdf.arp;
038:
039: import java.io.File;
040: import java.io.FileInputStream;
041: import java.io.IOException;
042: import java.io.InputStream;
043: import java.io.PrintStream;
044: import java.net.URL;
045:
046: import org.xml.sax.ErrorHandler;
047: import org.xml.sax.Locator;
048: import org.xml.sax.SAXException;
049: import org.xml.sax.SAXParseException;
050:
051: /** A command line interface into ARP.
052: * Creates NTriple's or just error messages.
053: * <pre>
054: * java <class-path> com.hp.hpl.jena.arp.NTriple ( [ -[xstfurR]][ -b xmlBase -[eiw] NNN[,NNN...] ] [ file ] [ url ] )...
055: * </pre>
056: * <p>
057: * All options, files and URLs can be intemingled in any order.
058: * They are processed from left-to-right.
059: * <dl>
060: * file </dt><dd> Converts RDF/XML file into N-triples
061: * </dd><dt>
062: * url </dt><dd> Converts RDF/XML from URL into N-triples
063: * </dd><dt>
064: * -b uri </dt><dd> Sets XML Base to the absolute URI.
065: * </dd><dt>
066: * -r </dt><dd> Content is RDF (default, no embedding, rdf:RDF tag may be omitted).
067: * </dd><dt>
068: * -R </dt><dd> RDF embedded in XML document, search for obligatory rdf:RDF start element.
069: * </dd><dt>
070: * -t </dt><dd> No n-triple output, error checking only.
071: * </dd><dt>
072: * -x </dt><dd> Lax mode - warnings are suppressed.
073: * </dd><dt>
074: * -s </dt><dd> Strict mode - most warnings are errors.
075: * </dd><dt>
076: * -u </dt><dd> Allow unqualified attributes (defaults to warning).
077: * </dd><dt>
078: * -f </dt><dd> All errors are fatal - report first one only.
079: * </dd><dt>
080: * -n </dt><dd> Show line numbers of each triple.
081: * </dd><dt>
082: * -b url </dt><dd> Sets XML Base to the absolute url.
083: * </dd><dt>
084: * -e NNN[,NNN...]</dt><dd>
085: * Treats numbered warning conditions as errrors.
086: * </dd><dt>
087: * -w NNN[,NNN...]</dt><dd>
088: * Treats numbered error conditions as warnings.
089: * </dd><dt>
090: * -i NNN[,NNN...]
091: * </dt><dd>
092: * Ignores numbered error/warning conditions.
093: * </dl>
094: * @author jjc
095: */
096: public class NTriple implements ARPErrorNumbers {
097:
098: private static StringBuffer line = new StringBuffer();
099: private static ARP arp;
100: private static String xmlBase = null;
101: private static boolean numbers = false;
102:
103: /** Starts an RDF/XML to NTriple converter.
104: * @param args The command-line arguments.
105: */
106: static public void main(String args[]) {
107: mainEh(args, null, null);
108: }
109:
110: static StatementHandler andMeToo = null;
111:
112: /** Starts an RDF/XML to NTriple converter,
113: * using an error handler, and an ARPHandler.
114: * Statements get processed both by this class,
115: * and by the passed in StatementHandler
116: * @param args The command-line arguments.
117: * @param eh Can be null.
118: * @param ap Can be null.
119: */
120: static public void mainEh(String args[], ErrorHandler eh,
121: ARPEventHandler ap) {
122: boolean doneOne = false;
123: startMem = -1;
124: andMeToo = ap;
125: //SH sh = new SH();
126: int i;
127: arp = new ARP();
128: ARPHandlers handlers = arp.getHandlers();
129: handlers.setStatementHandler(getSH(true));
130: // arp.getOptions().setEmbedding(true);
131: if (ap != null) {
132: handlers.setNamespaceHandler(ap);
133: handlers.setExtendedHandler(ap);
134: }
135: if (eh != null)
136: handlers.setErrorHandler(eh);
137:
138: for (i = 0; i < args.length - 1; i++) {
139: if (args[i].startsWith("-")) {
140: i += processOpts(args[i].substring(1), args[i + 1]);
141: } else {
142: doneOne = true;
143: process(args[i]);
144: }
145: }
146: if (args.length > 0) {
147: if (args[i].startsWith("-")) {
148: if (doneOne
149: || processOpts(args[i].substring(1), "100") == 1)
150: usage();
151: } else {
152: doneOne = true;
153: process(args[i]);
154: }
155: }
156: if (!doneOne) {
157: process(System.in, "http://example.org/stdin",
158: "standard input");
159: }
160: if (startMem != -1) {
161: rt.gc();
162: System.out.println(rt.totalMemory() - rt.freeMemory()
163: - startMem);
164: rt.gc();
165: System.out.println(rt.totalMemory() - rt.freeMemory()
166: - startMem);
167: rt.gc();
168: System.out.println(rt.totalMemory() - rt.freeMemory()
169: - startMem);
170: rt.gc();
171: System.out.println(rt.totalMemory() - rt.freeMemory()
172: - startMem);
173: }
174: }
175:
176: /**
177: * @param b false for quiet.
178: */
179: private static StatementHandler getSH(boolean b) {
180: StatementHandler rslt = b ? (StatementHandler) new SH(
181: System.out) : new NoSH();
182: if (andMeToo != null)
183: rslt = new TwoSH(rslt, andMeToo);
184: return rslt;
185: }
186:
187: static private void lineNumber() {
188: if (numbers) {
189: Locator locator = arp.getLocator();
190: if (locator != null)
191: print("# " + locator.getSystemId() + ":"
192: + locator.getLineNumber() + "("
193: + locator.getColumnNumber() + ")\n");
194: }
195: }
196:
197: /*
198: * Options:
199: * -x Lax, Warnings suppressed
200: * -s Strict, Warnings are errors
201: * -f All errors are fatal.
202: * -u Suppress unqualified attribute warnings
203: * -t Error checking only, no n-triple output
204: * -b: set xml:base (same for all files?)
205: * -e: convert numbered warnings to errors
206: * -i: suppress numbered warnings
207: * -w: convert numbered errors/suppressed warnings to warnings
208: * -n: give line numbers
209: *
210: */
211: static void usage() {
212: System.err
213: .println("java <class-path> "
214: + NTriple.class.getName()
215: + " ( [ -[xstfurR]][ -b xmlBase -[eiw] NNN[,NNN...] ] [ file ] [ url ] )... ");
216: System.err
217: .println(" All options, files and URLs can be intemingled in any order.");
218: System.err
219: .println(" They are processed from left-to-right.");
220: System.err
221: .println(" file Converts RDF/XML file into N-triples");
222: System.err
223: .println(" url Converts RDF/XML from URL into N-triples");
224: System.err
225: .println(" -b uri Sets XML Base to the absolute URI.");
226: System.err
227: .println(" -r Content is RDF (default, no embedding, rdf:RDF tag may be omitted).");
228: System.err
229: .println(" -R RDF embedded in XML document, search for obligatory rdf:RDF start element.");
230: System.err
231: .println(" -t No n-triple output, error checking only.");
232: System.err
233: .println(" -x Lax mode - warnings are suppressed.");
234: System.err
235: .println(" -s Strict mode - most warnings are errors.");
236: System.err
237: .println(" -n Show line and column numbers.");
238: System.err
239: .println(" -u Allow unqualified attributes (defaults to warning).");
240: System.err
241: .println(" -f All errors are fatal - report first one only.");
242: System.err
243: .println(" -b url Sets XML Base to the absolute url.");
244: System.err.println(" -e NNN[,NNN...]");
245: System.err
246: .println(" Treats numbered warning conditions as errrors.");
247: System.err.println(" -w NNN[,NNN...]");
248: System.err
249: .println(" Treats numbered error conditions as warnings.");
250: System.err.println(" -i NNN[,NNN...]");
251: System.err
252: .println(" Ignores numbered error/warning conditions.");
253: System.exit(1);
254: }
255:
256: static final private Runtime rt = Runtime.getRuntime();
257: static private int startMem = -1;
258:
259: static private int processOpts(String opts, String nextArg) {
260: boolean usedNext = false;
261: ARPOptions options = arp.getOptions();
262: for (int i = 0; i < opts.length(); i++) {
263: char opt = opts.charAt(i);
264: if ("beiwD".indexOf(opt) != -1) {
265: if (usedNext)
266: usage();
267: usedNext = true;
268: }
269: switch (opt) {
270: case 'D':
271: final int nStatements = Integer.parseInt(nextArg);
272: rt.gc();
273: rt.gc();
274: startMem = (int) (rt.totalMemory() - rt.freeMemory());
275: arp.getHandlers().setStatementHandler(
276: new StatementHandler() {
277: int debugC = 0;
278:
279: public void statement(AResource subj,
280: AResource pred, AResource obj) {
281: statement(null, null, (ALiteral) null);
282:
283: }
284:
285: public void statement(AResource subj,
286: AResource pred, ALiteral lit) {
287: if (++debugC % 100 == 0) {
288: System.out.println("T: " + debugC);
289: rt.gc();
290: System.out
291: .println("M1: "
292: + (rt.totalMemory()
293: - rt
294: .freeMemory() - startMem));
295: rt.gc();
296: System.out
297: .println("M2: "
298: + (rt.totalMemory()
299: - rt
300: .freeMemory() - startMem));
301: }
302: if (debugC == 1) {
303: rt.gc();
304: rt.gc();
305: startMem = (int) (rt.totalMemory() - rt
306: .freeMemory());
307: }
308: if (debugC == nStatements) {
309:
310: rt.gc();
311: System.err.println("Kill me now.");
312: try {
313: Thread.sleep(200000);
314: } catch (Exception e) {
315: // ignore
316: }
317:
318: }
319:
320: }
321: });
322: break;
323: case 'x':
324: options.setLaxErrorMode();
325: break;
326: case 's':
327: options.setStrictErrorMode();
328: break;
329: case 't':
330: arp.getHandlers().setStatementHandler(getSH(false));
331: break;
332: case 'r':
333: options.setEmbedding(false);
334: break;
335: case 'R':
336: options.setEmbedding(true);
337: break;
338: case 'n':
339: numbers = true;
340: break;
341: case 'E':
342: arp.getHandlers().setErrorHandler(new ErrorHandler() {
343: public void warning(SAXParseException exception) { /* ignore */
344: }
345:
346: public void error(SAXParseException exception) { /* ignore */
347: }
348:
349: public void fatalError(SAXParseException exception) { /* ignore */
350: }
351: });
352:
353: arp.setBadStatementHandler(new SH(System.err));
354: break;
355: case 'b':
356: xmlBase = nextArg;
357: break;
358: case 'e':
359: setErrorMode(nextArg, EM_ERROR);
360: break;
361: case 'i':
362: setErrorMode(nextArg, EM_IGNORE);
363: break;
364: case 'w':
365: setErrorMode(nextArg, EM_WARNING);
366: break;
367: case 'f':
368: for (int j = 0; j < 400; j++) {
369: if (options.setErrorMode(j, -1) == EM_ERROR)
370: options.setErrorMode(j, EM_FATAL);
371: }
372: break;
373: case 'u':
374: options.setErrorMode(WARN_UNQUALIFIED_ATTRIBUTE,
375: EM_IGNORE);
376: options.setErrorMode(WARN_UNQUALIFIED_RDF_ATTRIBUTE,
377: EM_IGNORE);
378: break;
379: default:
380: usage();
381: }
382: }
383: return usedNext ? 1 : 0;
384: }
385:
386: static private void setErrorMode(String numbers, int mode) {
387: int n[] = new int[3];
388: int j = 0;
389: numbers += ",";
390: for (int i = 0; i < numbers.length(); i++) {
391: char c = numbers.charAt(i);
392: switch (c) {
393: case '0':
394: case '1':
395: case '2':
396: case '3':
397: case '4':
398: case '5':
399: case '6':
400: case '7':
401: case '8':
402: case '9':
403: if (j == 3)
404: usage();
405: n[j++] = c - '0';
406: break;
407: case ' ':
408: case ';':
409: case ',':
410: if (i == 0)
411: usage();
412: switch (j) {
413: case 0:
414: break;
415: case 3:
416: arp.getOptions().setErrorMode(
417: n[0] * 100 + n[1] * 10 + n[2], mode);
418: j = 0;
419: break;
420: default:
421: usage();
422: }
423: break;
424: default:
425: usage();
426: }
427: }
428: }
429:
430: static private void process(String surl) {
431: InputStream in;
432:
433: URL url;
434: String baseURL;
435:
436: try {
437: File ff = new File(surl);
438: in = new FileInputStream(ff);
439: url = ff.toURL();
440: baseURL = url.toExternalForm();
441: if (baseURL.startsWith("file:/")
442: && !baseURL.startsWith("file://")) {
443: baseURL = "file://" + baseURL.substring(5);
444: }
445: } catch (Exception ignore) {
446: try {
447: url = new URL(surl);
448: in = url.openStream();
449: baseURL = url.toExternalForm();
450: } catch (Exception e) {
451: System.err.println("ARP: Failed to open: " + surl);
452: System.err.println(" "
453: + ParseException.formatMessage(ignore));
454: System.err.println(" "
455: + ParseException.formatMessage(e));
456: return;
457: }
458: }
459: process(in, baseURL, surl);
460: }
461:
462: static private void process(InputStream in, String xmlBasex,
463: String surl) {
464: String xmlBasey = xmlBase == null ? xmlBasex : xmlBase;
465: try {
466: arp.load(in, xmlBasey);
467: } catch (IOException e) {
468: System.err.println("Error: " + surl + ": "
469: + ParseException.formatMessage(e));
470: } catch (SAXParseException e) {
471: // already reported.
472: } catch (SAXException sax) {
473: System.err.println("Error: " + surl + ": "
474: + ParseException.formatMessage(sax));
475: }
476: }
477:
478: private static class TwoSH implements StatementHandler {
479: final StatementHandler a, b;
480:
481: public void statement(AResource subj, AResource pred,
482: AResource obj) {
483: a.statement(subj, pred, obj);
484: b.statement(subj, pred, obj);
485: }
486:
487: public void statement(AResource subj, AResource pred,
488: ALiteral lit) {
489: a.statement(subj, pred, lit);
490: b.statement(subj, pred, lit);
491: }
492:
493: TwoSH(StatementHandler A, StatementHandler B) {
494: a = A;
495: b = B;
496: }
497: }
498:
499: private static class NoSH implements StatementHandler {
500: // private int ix = 0;
501: // private void userData(AResource n){
502: // if (n.isAnonymous()) {
503: //// n.setUserData(new Integer(ix++));
504: // }
505: // }
506: public void statement(AResource subj, AResource pred,
507: AResource obj) {
508: // userData(subj);
509: // userData(pred);
510: // userData(obj);
511: }
512:
513: public void statement(AResource subj, AResource pred,
514: ALiteral lit) {
515: // userData(subj);
516: // userData(pred);
517: }
518: }
519:
520: private static class SH implements StatementHandler {
521: PrintStream out;
522:
523: SH(PrintStream out) {
524: this .out = out;
525: }
526:
527: public void statement(AResource subj, AResource pred,
528: AResource obj) {
529: lineNumber();
530: resource(subj);
531: resource(pred);
532: resource(obj);
533: line.append('.');
534: out.println(line);
535: line.setLength(0);
536: }
537:
538: public void statement(AResource subj, AResource pred,
539: ALiteral lit) {
540: // String lang = lit.getLang();
541: // String parseType = lit.getParseType();
542: lineNumber();
543: /*
544: if (parseType != null) {
545: System.out.print("# ");
546: if (parseType != null)
547: System.out.print("'" + parseType + "'");
548: System.out.println();
549: }
550: */
551: resource(subj);
552: resource(pred);
553: literal(lit);
554: line.append('.');
555: out.println(line);
556: line.setLength(0);
557: }
558: }
559:
560: static private void print(String s) {
561: line.append(s);
562: }
563:
564: static private void resource(AResource r) {
565: if (r.isAnonymous()) {
566: print("_:j");
567: print(r.getAnonymousID());
568: print(" ");
569: } else {
570: print("<");
571: escapeURI(r.getURI());
572: print("> ");
573: }
574: }
575:
576: static private void escape(String s) {
577: int lg = s.length();
578: for (int i = 0; i < lg; i++) {
579: char ch = s.charAt(i);
580: switch (ch) {
581: case '\\':
582: print("\\\\");
583: break;
584: case '"':
585: print("\\\"");
586: break;
587: case '\n':
588: print("\\n");
589: break;
590: case '\r':
591: print("\\r");
592: break;
593: case '\t':
594: print("\\t");
595: break;
596: default:
597: if (ch >= 32 && ch <= 126)
598: line.append(ch);
599: else {
600: print("\\u");
601: String hexstr = Integer.toHexString(ch)
602: .toUpperCase();
603: int pad = 4 - hexstr.length();
604:
605: for (; pad > 0; pad--)
606: print("0");
607: print(hexstr);
608: }
609: }
610: }
611: }
612:
613: static private boolean okURIChars[] = new boolean[128];
614: static {
615: for (int i = 32; i < 127; i++)
616: okURIChars[i] = true;
617: okURIChars['<'] = false;
618: okURIChars['>'] = false;
619: okURIChars['\\'] = false;
620:
621: }
622:
623: static private void escapeURI(String s) {
624: int lg = s.length();
625: for (int i = 0; i < lg; i++) {
626: char ch = s.charAt(i);
627: if (ch < okURIChars.length && okURIChars[ch]) {
628: line.append(ch);
629: } else {
630: print("\\u");
631: String hexstr = Integer.toHexString(ch).toUpperCase();
632: int pad = 4 - hexstr.length();
633:
634: for (; pad > 0; pad--)
635: print("0");
636: print(hexstr);
637: }
638: }
639: }
640:
641: static private void literal(ALiteral l) {
642: //if (l.isWellFormedXML())
643: // System.out.print("xml");
644: line.append('"');
645: escape(l.toString());
646: line.append('"');
647: String lang = l.getLang();
648: if (lang != null && !lang.equals("")) {
649: line.append('@');
650: print(lang);
651: }
652: String dt = l.getDatatypeURI();
653: if (dt != null && !dt.equals("")) {
654: print("^^<");
655: escapeURI(dt);
656: line.append('>');
657: }
658:
659: line.append(' ');
660: }
661:
662: }
|