001: /*
002: * (c) Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 Hewlett-Packard Development Company, LP
003: * All rights reserved.
004: *
005: * Redistribution and use in source and binary forms, with or without
006: * modification, are permitted provided that the following conditions
007: * are met:
008: * 1. Redistributions of source code must retain the above copyright
009: * notice, this list of conditions and the following disclaimer.
010: * 2. Redistributions in binary form must reproduce the above copyright
011: * notice, this list of conditions and the following disclaimer in the
012: * documentation and/or other materials provided with the distribution.
013: * 3. The name of the author may not be used to endorse or promote products
014: * derived from this software without specific prior written permission.
015:
016: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
017: * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
018: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
019: * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
020: * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
021: * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
022: * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
023: * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
024: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
025: * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
026:
027: * * $Id: ParserSupport.java,v 1.16 2008/01/02 12:09:04 andy_seaborne Exp $
028:
029: AUTHOR: Jeremy J. Carroll
030: */
031: /*
032: * S.java
033: *
034: * Created on July 15, 2001, 7:13 AM
035: */
036:
037: package com.hp.hpl.jena.rdf.arp.impl;
038:
039: import java.util.HashMap;
040: import java.util.Map;
041:
042: import org.apache.xerces.util.XMLChar;
043: import org.xml.sax.SAXParseException;
044:
045: import com.hp.hpl.jena.iri.IRI;
046: import com.hp.hpl.jena.rdf.arp.ARPErrorNumbers;
047: import com.hp.hpl.jena.rdf.arp.lang.LanguageTagCodes;
048:
049: /**
050: *
051: * @author jjc
052: *
053: */
054: public class ParserSupport implements ARPErrorNumbers,
055: LanguageTagCodes, Names {
056:
057: // protected void checkBadURI(Taint taintMe,RDFURIReference uri) throws SAXParseException {
058: // arp.checkBadURI(taintMe,uri);
059: // }
060:
061: protected ParserSupport(XMLHandler arp, AbsXMLContext xml) {
062: this .arp = arp;
063: this .xml = xml;
064: }
065:
066: Map idsUsed() {
067: return arp.idsUsed;
068: }
069:
070: protected final XMLHandler arp;
071: public final AbsXMLContext xml;
072:
073: /**
074: * @param str The fully expanded URI
075: */
076: protected void checkIdSymbol(Taint taintMe, AbsXMLContext ctxt,
077: String str) throws SAXParseException {
078: if (arp.idsUsed != null) {
079: IRI uri = ctxt.uri;
080: Map idsUsedForBase = (Map) idsUsed().get(uri);
081: if (idsUsedForBase == null) {
082: idsUsedForBase = new HashMap();
083: idsUsed().put(uri, idsUsedForBase);
084: }
085: Location prev = (Location) idsUsedForBase.get(str);
086: if (prev != null) {
087: arp.warning(taintMe, WARN_REDEFINITION_OF_ID,
088: "Redefinition of ID: " + str);
089: arp.warning(taintMe, WARN_REDEFINITION_OF_ID, prev,
090: "Previous definition of '" + str + "'.");
091: } else {
092: idsUsedForBase.put(str, arp.location());
093: arp.idsUsedCount++;
094: if (arp.idsUsedCount > 10000) {
095: arp.idsUsed = null;
096: arp
097: .warning(taintMe, WARN_BIG_FILE,
098: "Input is large. Switching off checking for illegal reuse of rdf:ID's.");
099: }
100: }
101: }
102:
103: checkXMLName(taintMe, str);
104: checkEncoding(taintMe, str);
105: }
106:
107: protected void checkXMLName(Taint taintMe, String str)
108: throws SAXParseException {
109: if (!XMLChar.isValidNCName(str)) {
110: // System.err.println("not name (id): " + str);
111: warning(taintMe, WARN_BAD_NAME, "Not an XML Name: '" + str
112: + "'");
113: }
114:
115: }
116:
117: // protected void checkNodeID(Taint taintMe, String str) throws SAXParseException {
118: // if (!XMLChar.isValidNCName(str)) {
119: // warning(taintMe,
120: // WARN_BAD_NAME,
121: // "Not an XML Name: '" + str + "'");
122: // }
123: // }
124: public void checkString(Taint taintMe, String t)
125: throws SAXParseException {
126: if (!CharacterModel.isNormalFormC(t))
127: warning(taintMe, WARN_STRING_NOT_NORMAL_FORM_C,
128: "String not in Unicode Normal Form C: \"" + t
129: + "\"");
130: checkEncoding(taintMe, t);
131: checkComposingChar(taintMe, t);
132: }
133:
134: void checkComposingChar(Taint taintMe, String t)
135: throws SAXParseException {
136: if (CharacterModel.startsWithComposingCharacter(t))
137: warning(taintMe, WARN_STRING_COMPOSING_CHAR,
138: "String is not legal in XML 1.1; starts with composing char: \""
139: + t + "\" (" + ((int) t.charAt(0)) + ")");
140: }
141:
142: public void checkComposingChar(Taint taintMe, char ch[], int st,
143: int ln) throws SAXParseException {
144: if (ln > 0 && CharacterModel.isComposingChar(ch[st]))
145: warning(taintMe, WARN_STRING_COMPOSING_CHAR,
146: "String is not legal in XML 1.1; starts with composing char: \""
147: + new String(ch, st, ln) + "\" ("
148: + (int) ch[st] + ")");
149: }
150:
151: // public void checkXMLLang(Taint taintMe, String lang) throws SAXParseException {
152: // if (lang.equals(""))
153: // return;
154: // try {
155: // LanguageTag tag = new LanguageTag(lang);
156: // int tagType = tag.tagType();
157: // if (tagType == LT_ILLEGAL) {
158: // warning(taintMe,
159: // WARN_BAD_XMLLANG,
160: // tag.errorMessage());
161: // }
162: // if ((tagType & LT_UNDETERMINED) == LT_UNDETERMINED) {
163: // warning(taintMe,
164: // WARN_BAD_XMLLANG,
165: // "Unnecessary use of language tag \"und\" prohibited by RFC3066");
166: // }
167: // if ((tagType & LT_IANA_DEPRECATED) == LT_IANA_DEPRECATED) {
168: // warning(taintMe,
169: // WARN_DEPRECATED_XMLLANG,
170: // "Use of deprecated language tag \"" + lang + "\".");
171: // }
172: // if ((tagType & LT_PRIVATE_USE) == LT_PRIVATE_USE) {
173: // warning(taintMe,
174: // IGN_PRIVATE_XMLLANG,
175: // "Use of (IANA) private language tag \"" + lang + "\".");
176: // } else if ((tagType & LT_LOCAL_USE) == LT_LOCAL_USE) {
177: // warning(taintMe,
178: // IGN_PRIVATE_XMLLANG,
179: // "Use of (ISO639-2) local use language tag \""
180: // + lang
181: // + "\".");
182: // } else if ((tagType & LT_EXTRA) == LT_EXTRA) {
183: // warning(taintMe,
184: // IGN_PRIVATE_XMLLANG,
185: // "Use of additional private subtags on language \""
186: // + lang
187: // + "\".");
188: // }
189: // } catch (LanguageTagSyntaxException e) {
190: // warning(taintMe,
191: // WARN_MALFORMED_XMLLANG,
192: // e.getMessage());
193: // }
194: // }
195:
196: public void checkEncoding(Taint taintMe, String s)
197: throws SAXParseException {
198: if (arp.encodingProblems) {
199: for (int i = s.length() - 1; i >= 0; i--) {
200: if (s.charAt(i) < 0 || s.charAt(i) > 127) {
201: warning(taintMe, ERR_ENCODING_MISMATCH,
202: "Encoding error with non-ascii characters.");
203: break;
204: }
205: }
206: }
207: }
208:
209: /**
210: * whether this is a warning or an error is determined later.
211: * @param i
212: * @param msg
213: */
214: protected void warning(Taint taintMe, int i, String msg)
215: throws SAXParseException {
216: arp.warning(taintMe, i, msg);
217: }
218:
219: protected boolean isWhite(char ch[], int st, int ln) {
220: for (int i = 0; i < ln; i++)
221: if (!isWhite(ch[st + i]))
222: return false;
223: return true;
224: }
225:
226: protected boolean isWhite(StringBuffer buf) {
227: for (int i = buf.length() - 1; i >= 0; i--)
228: if (!isWhite(buf.charAt(i)))
229: return false;
230: return true;
231: }
232:
233: private boolean isWhite(char c) {
234: switch (c) {
235: case '\n':
236: case '\r':
237: case '\t':
238: case ' ':
239: return true;
240: default:
241: return false;
242: }
243: }
244:
245: protected void triple(ANode a, ANode b, ANode c) {
246: arp.triple(a, b, c);
247: }
248:
249: public AbsXMLContext getXMLContext() {
250: return xml;
251: }
252:
253: public XMLHandler getXMLHandler() {
254: return arp;
255: }
256:
257: protected String resolve(Taint taintMe, AbsXMLContext x, String uri)
258: throws SAXParseException {
259: IRI ref = x.resolveAsURI(arp, taintMe, uri);
260: // checkBadURI(taintMe,ref);
261: return ref.toString();
262: }
263:
264: }
|