001: /*
002: * (c) Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 Hewlett-Packard Development Company, LP All rights
003: * reserved.
004: *
005: * (c) Copyright 2003, Plugged In Software
006: *
007: * Redistribution and use in source and binary forms, with or without
008: * modification, are permitted provided that the following conditions are met: 1.
009: * Redistributions of source code must retain the above copyright notice, this
010: * list of conditions and the following disclaimer. 2. Redistributions in
011: * binary form must reproduce the above copyright notice, this list of
012: * conditions and the following disclaimer in the documentation and/or other
013: * materials provided with the distribution. 3. The name of the author may not
014: * be used to endorse or promote products derived from this software without
015: * specific prior written permission.
016: *
017: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
018: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
019: * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
020: * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
021: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
022: * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
023: * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
024: * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
025: * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
026: * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
027: *
028: * $Id: XMLHandler.java,v 1.30 2008/01/02 12:09:07 andy_seaborne Exp $
029: *
030: * AUTHOR: Jeremy J. Carroll
031: */
032: /*
033: * ARPFilter.java
034: *
035: * Created on June 21, 2001, 10:01 PM
036: */
037:
038: package com.hp.hpl.jena.rdf.arp.impl;
039:
040: import java.io.File;
041: import java.net.MalformedURLException;
042: import java.util.HashMap;
043: import java.util.Iterator;
044: import java.util.Map;
045:
046: import org.xml.sax.Attributes;
047: import org.xml.sax.Locator;
048: import org.xml.sax.SAXException;
049: import org.xml.sax.SAXParseException;
050:
051: import com.hp.hpl.jena.iri.IRI;
052: import com.hp.hpl.jena.iri.IRIFactory;
053: import com.hp.hpl.jena.rdf.arp.ALiteral;
054: import com.hp.hpl.jena.rdf.arp.ARPErrorNumbers;
055: import com.hp.hpl.jena.rdf.arp.ARPHandlers;
056: import com.hp.hpl.jena.rdf.arp.ARPOptions;
057: import com.hp.hpl.jena.rdf.arp.AResource;
058: import com.hp.hpl.jena.rdf.arp.ExtendedHandler;
059: import com.hp.hpl.jena.rdf.arp.FatalParsingErrorException;
060: import com.hp.hpl.jena.rdf.arp.ParseException;
061: import com.hp.hpl.jena.rdf.arp.StatementHandler;
062: import com.hp.hpl.jena.rdf.arp.states.Frame;
063: import com.hp.hpl.jena.rdf.arp.states.FrameI;
064: import com.hp.hpl.jena.rdf.arp.states.LookingForRDF;
065: import com.hp.hpl.jena.rdf.arp.states.StartStateRDForDescription;
066:
067: /**
068: * This class converts SAX events into a stream of encapsulated events suitable
069: * for the RDF parser. In effect, this is the RDF lexer. updates by kers to
070: * handle exporting namespace prefix maps.
071: *
072: * @author jjc
073: */
074: public class XMLHandler extends LexicalHandlerImpl implements
075: ARPErrorNumbers, Names {
076:
077: boolean encodingProblems = false;
078:
079: protected Map idsUsed = new HashMap();
080: protected int idsUsedCount = 0;
081:
082: public void triple(ANode s, ANode p, ANode o) {
083: StatementHandler stmt;
084: boolean bad = s.isTainted() || p.isTainted() || o.isTainted();
085: if (bad) {
086: stmt = badStatementHandler;
087: } else {
088: stmt = handlers.getStatementHandler();
089: }
090: AResourceInternal subj = (AResourceInternal) s;
091: AResourceInternal pred = (AResourceInternal) p;
092: if (!bad)
093: subj.setHasBeenUsed();
094: if (o instanceof AResource) {
095: AResourceInternal obj = (AResourceInternal) o;
096: if (!bad)
097: obj.setHasBeenUsed();
098: stmt.statement(subj, pred, obj);
099: } else
100: stmt.statement(subj, pred, (ALiteral) o);
101: }
102:
103: // This is the current frame.
104: FrameI frame;
105:
106: public void startPrefixMapping(String prefix, String uri)
107: throws SAXParseException {
108: checkNamespaceURI(uri);
109: handlers.getNamespaceHandler().startPrefixMapping(prefix, uri);
110: }
111:
112: public void endPrefixMapping(String prefix) {
113: handlers.getNamespaceHandler().endPrefixMapping(prefix);
114: }
115:
116: public Locator getLocator() {
117: return locator;
118: }
119:
120: Locator locator;
121:
122: public void setDocumentLocator(Locator locator) {
123: this .locator = locator;
124: }
125:
126: static final private boolean DEBUG = false;
127:
128: public void startElement(String uri, String localName,
129: String rawName, Attributes atts) throws SAXException {
130: if (Thread.interrupted())
131: warning(null, ERR_INTERRUPTED, "Interrupt detected.");
132: FrameI oldFrame = frame;
133: frame = frame.startElement(uri, localName, rawName, atts);
134: if (DEBUG)
135: System.err.println("<" + rawName + "> :: "
136: + getSimpleName(oldFrame.getClass()) + " --> "
137: + getSimpleName(frame.getClass()));
138: }
139:
140: public void endElement(String uri, String localName, String rawName)
141: throws SAXException {
142: frame.endElement();
143: frame = frame.getParent();
144: frame.afterChild();
145: if (DEBUG)
146: System.err.println("</" + rawName + "> :: <--"
147: + getSimpleName(frame.getClass()));
148: }
149:
150: static public String getSimpleName(Class c) {
151: String rslt[] = c.getName().split("\\.");
152: return rslt[rslt.length - 1];
153: }
154:
155: public void characters(char ch[], int start, int length)
156: throws SAXException {
157: frame.characters(ch, start, length);
158: }
159:
160: public void ignorableWhitespace(char ch[], int start, int length)
161: throws SAXException { // Never called.
162: characters(ch, start, length);
163: }
164:
165: void setUserData(String nodeId, Object v) {
166: nodeIdUserData.put(nodeId, v);
167: }
168:
169: Object getUserData(String nodeId) {
170: return nodeIdUserData.get(nodeId);
171: }
172:
173: public void comment(char[] ch, int start, int length)
174: throws SAXParseException {
175: frame.comment(ch, start, length);
176: }
177:
178: public void processingInstruction(String target, String data)
179: throws SAXException {
180: frame.processingInstruction(target, data);
181: }
182:
183: public void warning(Taint taintMe, int id, String msg)
184: throws SAXParseException {
185: if (options.getErrorMode(id) != EM_IGNORE)
186: warning(taintMe, id, location(), msg);
187: }
188:
189: void warning(Taint taintMe, int id, Location loc, String msg)
190: throws SAXParseException {
191: if (options.getErrorMode(id) != EM_IGNORE)
192: warning(taintMe, id, new ParseException(id, loc, msg) {
193: private static final long serialVersionUID = 1990910846204964756L;
194: });
195: }
196:
197: void generalError(int id, Exception e) throws SAXParseException {
198: Location where = new Location(locator);
199: // System.err.println(e.getMessage());
200: warning(null, id, new ParseException(id, where, e));
201:
202: }
203:
204: void warning(Taint taintMe, int id, SAXParseException e)
205: throws SAXParseException {
206: try {
207: switch (options.getErrorMode(id)) {
208: case EM_IGNORE:
209: break;
210: case EM_WARNING:
211: handlers.getErrorHandler().warning(e);
212: break;
213: case EM_ERROR:
214: if (taintMe != null)
215: taintMe.taint();
216: handlers.getErrorHandler().error(e);
217: break;
218: case EM_FATAL:
219: handlers.getErrorHandler().fatalError(e);
220: break;
221: }
222: } catch (SAXParseException xx) {
223: throw xx;
224: } catch (SAXException ee) {
225: throw new WrappedException(ee);
226: }
227: if (e instanceof ParseException
228: && ((ParseException) e).isPromoted())
229: throw e;
230: if (options.getErrorMode(id) == EM_FATAL) {
231: // If we get here, we shouldn't go on
232: // throw an error into Jena.
233: throw new FatalParsingErrorException();
234:
235: }
236: }
237:
238: public void error(SAXParseException e) throws SAXParseException {
239: warning(null, ERR_SAX_ERROR, e);
240: }
241:
242: public void warning(SAXParseException e) throws SAXParseException {
243: warning(null, WARN_SAX_WARNING, e);
244: }
245:
246: public void fatalError(SAXParseException e) throws SAXException {
247: warning(null, ERR_SAX_FATAL_ERROR, e);
248: // If we get here, we shouldn't go on
249: // throw an error into Jena.
250: throw new FatalParsingErrorException();
251:
252: }
253:
254: /**
255: * @param v
256: */
257: public void endLocalScope(ANode v) {
258: if (handlers.getExtendedHandler() != nullScopeHandler) {
259: ARPResource bn = (ARPResource) v;
260: if (!bn.getHasBeenUsed())
261: return;
262: if (bn.hasNodeID()) {
263: // save for later end scope
264: if (handlers.getExtendedHandler()
265: .discardNodesWithNodeID())
266: return;
267: String bnodeID = bn.nodeID;
268: if (!nodeIdUserData.containsKey(bnodeID))
269: nodeIdUserData.put(bnodeID, null);
270: } else {
271: handlers.getExtendedHandler().endBNodeScope(bn);
272: }
273: }
274: }
275:
276: public void endRDF() {
277: handlers.getExtendedHandler().endRDF();
278: }
279:
280: public void startRDF() {
281: handlers.getExtendedHandler().startRDF();
282: }
283:
284: boolean ignoring(int eCode) {
285: return options.getErrorMode(eCode) == EM_IGNORE;
286: }
287:
288: public boolean isError(int eCode) {
289: return options.getErrorMode(eCode) == EM_ERROR;
290: }
291:
292: protected AbsXMLContext initialContext(String base, String lang)
293: throws SAXParseException {
294: return initialContextWithBase(base).withLang(this , lang);
295: }
296:
297: private boolean allowRelativeReferences = false;
298:
299: private AbsXMLContext initialContextWithBase(String base)
300: throws SAXParseException {
301: allowRelativeReferences = false;
302: if (base == null) {
303: warning(null, IGN_NO_BASE_URI_SPECIFIED,
304: "Base URI not specified for input file; local URI references will be in error.");
305:
306: return new XMLBaselessContext(this ,
307: ERR_RESOLVING_URI_AGAINST_NULL_BASE);
308:
309: } else if (base.equals("")) {
310: allowRelativeReferences = true;
311: warning(null, IGN_NO_BASE_URI_SPECIFIED,
312: "Base URI specified as \"\"; local URI references will not be resolved.");
313: return new XMLBaselessContext(this ,
314: WARN_RESOLVING_URI_AGAINST_EMPTY_BASE);
315: } else {
316: // if (base.toLowerCase().startsWith("file:")
317: // && base.length()>5
318: // && base.charAt(5) != '/'
319: // ) {
320: // System.err.print(base);
321: // try {
322: // base = new File(base.substring(5)).toURL().toString();
323: // if (base.length()<=6
324: // || base.charAt(6)!= '/')
325: // base = "file://"+base.substring(5);
326: // } catch (MalformedURLException e) {
327: // // ignore, just leave it alone.
328: // }
329: // System.err.println(" ==> "+base);
330: //
331: // }
332: return new XMLBaselessContext(this ,
333: ERR_RESOLVING_AGAINST_RELATIVE_BASE).withBase(this ,
334: base);
335: }
336: }
337:
338: /*
339: private XMLContext initialContextWithBasex(String base)
340: throws SAXParseException {
341: XMLContext rslt = new XMLContext(this, base);
342: RDFURIReference b = rslt.getURI();
343: if (base == null) {
344: warning(null,IGN_NO_BASE_URI_SPECIFIED,
345: "Base URI not specified for input file; local URI references will be in error.");
346:
347: } else if (base.equals("")) {
348: warning(null,IGN_NO_BASE_URI_SPECIFIED,
349: "Base URI specified as \"\"; local URI references will not be resolved.");
350:
351: } else {
352: checkBadURI(null,b);
353: // Warnings on bad base.
354:
355: // if (b.isVeryBad()||b.isRelative()) {
356: // return
357: }
358:
359: return rslt;
360: }
361: */
362:
363: private ARPOptions options = new ARPOptions();
364:
365: private ARPHandlers handlers = new ARPHandlers();
366:
367: StatementHandler getStatementHandler() {
368: return handlers.getStatementHandler();
369: }
370:
371: public ARPHandlers getHandlers() {
372: return handlers;
373: }
374:
375: public ARPOptions getOptions() {
376: return options;
377: }
378:
379: public void setOptionsWith(ARPOptions newOpts) {
380: options = newOpts.copy();
381:
382: }
383:
384: public void setHandlersWith(ARPHandlers newHh) {
385: handlers = new ARPHandlers();
386: handlers.setErrorHandler(newHh.getErrorHandler());
387: handlers.setExtendedHandler(newHh.getExtendedHandler());
388: handlers.setNamespaceHandler(newHh.getNamespaceHandler());
389: handlers.setStatementHandler(newHh.getStatementHandler());
390:
391: }
392:
393: private Map nodeIdUserData;
394:
395: public void initParse(String base, String lang)
396: throws SAXParseException {
397: nodeIdUserData = new HashMap();
398: idsUsed = ignoring(WARN_REDEFINITION_OF_ID) ? null
399: : new HashMap();
400: idsUsedCount = 0;
401: if (options.getEmbedding())
402: frame = new LookingForRDF(this , initialContext(base, lang));
403: else
404: frame = new StartStateRDForDescription(this ,
405: initialContext(base, lang));
406:
407: }
408:
409: /**
410: * This method must be always be called after parsing, e.g. in a finally
411: * block.
412: *
413: */
414: void afterParse() {
415: while (frame != null) {
416: frame.abort();
417: frame = frame.getParent();
418: }
419: // endRDF();
420: endBnodeScope();
421: idsUsed = null;
422: }
423:
424: void endBnodeScope() {
425: if (handlers.getExtendedHandler() != nullScopeHandler) {
426: Iterator it = nodeIdUserData.keySet().iterator();
427: while (it.hasNext()) {
428: String nodeId = (String) it.next();
429: ARPResource bn = new ARPResource(this , nodeId);
430: handlers.getExtendedHandler().endBNodeScope(bn);
431: }
432: }
433: }
434:
435: public Location location() {
436: return new Location(locator);
437: }
438:
439: private IRIFactory factory = IRIFactory.jenaImplementation();
440:
441: IRIFactory iriFactory() {
442: if (factory == null) {
443:
444: // TODO locator stuff
445: // factory = new IRIFactory();
446: // factory.useSpecificationRDF(false);
447: /*
448: if (locator != null)
449: factory = new IRIFactory(locator);
450: else
451: factory = new IRIFactory(new Locator() {
452:
453: public int getColumnNumber() {
454: return locator == null ? -1 : locator.getColumnNumber();
455: }
456:
457: public int getLineNumber() {
458: return locator == null ? -1 : locator.getLineNumber();
459: }
460:
461: public String getPublicId() {
462: return locator == null ? null : locator.getPublicId();
463: }
464:
465: public String getSystemId() {
466: return locator == null ? null : locator.getSystemId();
467: }
468:
469: });
470: */
471: }
472: return factory;
473: }
474:
475: private void checkNamespaceURI(String uri) throws SAXParseException {
476: ((Frame) frame).checkEncoding(null, uri);
477: if (uri.length() != 0) {
478: IRI u = iriFactory().create(uri);
479: // if (u.isVeryBad()) {
480: // warning(null,
481: // WARN_BAD_NAMESPACE_URI,
482: // "The namespace URI: <"
483: // + uri
484: // + "> is not well formed.");
485: // return;
486: //
487: // }
488: if (!u.isAbsolute()) {
489: warning(
490: null,
491: WARN_RELATIVE_NAMESPACE_URI_DEPRECATED,
492: "The namespace URI: <"
493: + uri
494: + "> is relative. Such use has been deprecated by the W3C, and may result in RDF interoperability failures. Use an absolute namespace URI.");
495: }
496: try {
497: if (!u.toASCIIString().equals(u.toString()))
498: warning(
499: null,
500: WARN_BAD_NAMESPACE_URI,
501: "Non-ascii characters in a namespace URI may not be completely portable: <"
502: + u.toString()
503: + ">. Resulting RDF URI references are legal.");
504: } catch (MalformedURLException e) {
505: warning(null, WARN_BAD_NAMESPACE_URI,
506: "toAscii failed for namespace URI: <"
507: + u.toString() + ">. " + e.getMessage());
508: }
509:
510: if (uri.startsWith(rdfns) && !uri.equals(rdfns))
511: warning(null, WARN_BAD_RDF_NAMESPACE_URI,
512: "Namespace URI ref <" + uri
513: + "> may not be used in RDF/XML.");
514: if (uri.startsWith(xmlns) && !uri.equals(xmlns))
515: warning(null, WARN_BAD_XML_NAMESPACE_URI,
516: "Namespace URI ref <" + uri
517: + "> may not be used in RDF/XML.");
518: }
519: }
520:
521: public boolean allowRelativeURIs() {
522: return allowRelativeReferences;
523: }
524:
525: private IRI sameDocRef;
526:
527: public IRI sameDocRef() {
528: if (sameDocRef == null) {
529: sameDocRef = iriFactory().create("");
530: }
531: return sameDocRef;
532: }
533:
534: private StatementHandler badStatementHandler = nullStatementHandler;
535:
536: public void setBadStatementHandler(StatementHandler sh) {
537: badStatementHandler = sh;
538: }
539:
540: final public static StatementHandler nullStatementHandler = new StatementHandler() {
541: public void statement(AResource s, AResource p, AResource o) {
542: }
543:
544: public void statement(AResource s, AResource p, ALiteral o) {
545: }
546: };
547: final public static ExtendedHandler nullScopeHandler = new ExtendedHandler() {
548:
549: public void endBNodeScope(AResource bnode) {
550: }
551:
552: public void startRDF() {
553: }
554:
555: public void endRDF() {
556: }
557:
558: public boolean discardNodesWithNodeID() {
559: return true;
560: }
561: };
562: }
|