001: /*
002: * (c) Copyright 2007, 2008 Hewlett-Packard Development Company, LP
003: * All rights reserved.
004: * [See end of file]
005: */
006:
007: package com.hp.hpl.jena.n3.turtle;
008:
009: import com.hp.hpl.jena.datatypes.RDFDatatype;
010: import com.hp.hpl.jena.datatypes.TypeMapper;
011: import com.hp.hpl.jena.datatypes.xsd.XSDDatatype;
012: import com.hp.hpl.jena.graph.Node;
013: import com.hp.hpl.jena.graph.Triple;
014: import com.hp.hpl.jena.n3.IRIResolver;
015: import com.hp.hpl.jena.n3.JenaURIException;
016: import com.hp.hpl.jena.rdf.model.AnonId;
017: import com.hp.hpl.jena.shared.PrefixMapping;
018: import com.hp.hpl.jena.shared.impl.PrefixMappingImpl;
019: import com.hp.hpl.jena.vocabulary.RDF;
020:
021: public class ParserBase {
022: // Should be the same as ARQ ParserBase and Prologues.
023: protected final Node XSD_TRUE = Node.createLiteral("true", null,
024: XSDDatatype.XSDboolean);
025: protected final Node XSD_FALSE = Node.createLiteral("false", null,
026: XSDDatatype.XSDboolean);
027:
028: protected final Node nRDFtype = RDF.type.asNode();
029:
030: protected final Node nRDFnil = RDF.nil.asNode();
031: protected final Node nRDFfirst = RDF.first.asNode();
032: protected final Node nRDFrest = RDF.rest.asNode();
033:
034: protected final Node nRDFsubject = RDF.subject.asNode();
035: protected final Node nRDFpredicate = RDF.predicate.asNode();
036: protected final Node nRDFobject = RDF.object.asNode();
037:
038: protected final String SWAP_NS = "http://www.w3.org/2000/10/swap/";
039: protected final String SWAP_LOG_NS = "http://www.w3.org/2000/10/swap/log#";
040: protected final Node nLogImplies = Node.createURI(SWAP_LOG_NS
041: + "implies");
042:
043: protected boolean strictTurtle = true;
044: protected boolean skolomizedBNodes = true;
045:
046: public ParserBase() {
047: }
048:
049: PrefixMapping prefixMapping = new PrefixMappingImpl();
050: IRIResolver resolver = new IRIResolver();
051:
052: protected String getBaseURI() {
053: return resolver.getBaseIRI();
054: }
055:
056: public void setBaseURI(String u) {
057: resolver = new IRIResolver(u);
058: }
059:
060: protected void setBase(String iriStr, int line, int column) {
061: // Already resolved.
062: setBaseURI(iriStr);
063: }
064:
065: public PrefixMapping getPrefixMapping() {
066: return prefixMapping;
067: }
068:
069: // label => bNode for construct templates patterns
070: LabelToNodeMap bNodeLabels = new LabelToNodeMap();
071:
072: TurtleEventHandler handler = null;
073:
074: public void setEventHandler(TurtleEventHandler h) {
075: handler = h;
076: }
077:
078: protected void emitTriple(int line, int col, Triple triple) {
079: handler.triple(line, col, triple);
080: }
081:
082: protected void startFormula(int line, int col) {
083: handler.startFormula(line, col);
084: }
085:
086: protected void endFormula(int line, int col) {
087: handler.endFormula(line, col);
088: }
089:
090: protected void setPrefix(int line, int col, String prefix,
091: String uri) {
092: prefixMapping.setNsPrefix(prefix, uri);
093: handler.prefix(line, col, prefix, uri);
094: }
095:
096: protected int makePositiveInteger(String lexicalForm) {
097: if (lexicalForm == null)
098: return -1;
099:
100: return Integer.parseInt(lexicalForm);
101: }
102:
103: protected Node createLiteralInteger(String lexicalForm) {
104: return Node.createLiteral(lexicalForm, null,
105: XSDDatatype.XSDinteger);
106: }
107:
108: protected Node createLiteralDouble(String lexicalForm) {
109: return Node.createLiteral(lexicalForm, null,
110: XSDDatatype.XSDdouble);
111: }
112:
113: protected Node createLiteralDecimal(String lexicalForm) {
114: return Node.createLiteral(lexicalForm, null,
115: XSDDatatype.XSDdecimal);
116: }
117:
118: protected Node createLiteral(String lexicalForm, String langTag,
119: Node datatype) {
120: String uri = (datatype == null) ? null : datatype.getURI();
121: return createLiteral(lexicalForm, langTag, uri);
122: }
123:
124: protected Node createLiteral(String lexicalForm, String langTag,
125: String datatypeURI) {
126: Node n = null;
127: // Can't have type and lang tag.
128: if (datatypeURI != null) {
129: RDFDatatype dType = TypeMapper.getInstance()
130: .getSafeTypeByName(datatypeURI);
131: n = Node.createLiteral(lexicalForm, null, dType);
132: } else
133: n = Node.createLiteral(lexicalForm, langTag, null);
134: return n;
135: }
136:
137: protected long integerValue(String s) {
138: if (s.startsWith("+"))
139: s = s.substring(1);
140: if (s.startsWith("0x")) {
141: // Hex
142: s = s.substring(2);
143: return Long.parseLong(s, 16);
144: }
145: return Long.parseLong(s);
146: }
147:
148: protected double doubleValue(String s) {
149: if (s.startsWith("+"))
150: s = s.substring(1);
151: double valDouble = Double.parseDouble(s);
152: return valDouble;
153: }
154:
155: protected String stripQuotes(String s) {
156: return s.substring(1, s.length() - 1);
157: }
158:
159: protected String stripQuotes3(String s) {
160: return s.substring(3, s.length() - 3);
161: }
162:
163: protected String stripChars(String s, int n) {
164: return s.substring(n, s.length());
165: }
166:
167: protected String resolveQuotedIRI(String iriStr, int line,
168: int column) {
169: iriStr = stripQuotes(iriStr);
170: return resolveIRI(iriStr, line, column);
171: }
172:
173: protected String resolveIRI(String iriStr, int line, int column) {
174: if (isBNodeIRI(iriStr))
175: return iriStr;
176:
177: if (resolver != null)
178: iriStr = _resolveIRI(iriStr, line, column);
179: return iriStr;
180: }
181:
182: private String _resolveIRI(String iriStr, int line, int column) {
183: try {
184: iriStr = resolver.resolve(iriStr);
185: } catch (JenaURIException ex) {
186: throwParseException(ex.getMessage(), line, column);
187: }
188: return iriStr;
189: }
190:
191: protected String resolvePName(String qname, int line, int column) {
192: String s = prefixMapping.expandPrefix(qname);
193: if (s == null || s.equals(qname))
194: throwParseException("Unresolved prefixed name: " + qname,
195: line, column);
196: return s;
197: }
198:
199: final static String bNodeLabelStart = "_:";
200:
201: protected Node createListNode() {
202: return createBNode();
203: }
204:
205: // Unlabelled bNode.
206: protected Node createBNode() {
207: return bNodeLabels.allocNode();
208: }
209:
210: // Labelled bNode.
211: protected Node createBNode(String label, int line, int column) {
212: return bNodeLabels.asNode(label);
213: }
214:
215: protected Node createVariable(String s, int line, int column) {
216: s = s.substring(1); // Drop the marker
217: return Node.createVariable(s);
218: }
219:
220: protected Node createNode(String iri) {
221: // Is it a bNode label? i.e. <_:xyz>
222: if (isBNodeIRI(iri)) {
223: String s = iri.substring(bNodeLabelStart.length());
224: Node n = Node.createAnon(new AnonId(s));
225: return n;
226: }
227: return Node.createURI(iri);
228: }
229:
230: protected boolean isBNodeIRI(String iri) {
231: return skolomizedBNodes && iri.startsWith(bNodeLabelStart);
232: }
233:
234: // protected Node createNodeFromURI(String s, int line, int column)
235: // {
236: // s = stripQuotes(s) ;
237: // String uriStr = s ; // Mutated
238: //
239: // try {
240: // uriStr = resolver.resolve(uriStr) ;
241: // } catch (JenaURIException ex)
242: // {
243: // throw new TurtleParseException(exMsg(ex.getMessage(), line, column)) ;
244: // }
245: // return Node.createURI(uriStr) ;
246: // }
247:
248: protected void throwParseException(String s, int line, int column) {
249: throw new TurtleParseException(exMsg(s, line, column));
250: }
251:
252: protected String fixupPrefix(String prefix, int line, int column) {
253: if (prefix.endsWith(":"))
254: prefix = prefix.substring(0, prefix.length() - 1);
255: return prefix;
256: }
257:
258: // Utilities to remove escapes
259:
260: // Testing interface
261: public static String unescapeStr(String s) {
262: return unescape(s, '\\', false, 1, 1);
263: }
264:
265: // public static String unescapeCodePoint(String s)
266: // { return unescape(s, '\\', true, 1, 1) ; }
267: //
268: // protected String unescapeCodePoint(String s, int line, int column)
269: // { return unescape(s, '\\', true, line, column) ; }
270:
271: protected String unescapeStr(String s, int line, int column) {
272: return unescape(s, '\\', false, line, column);
273: }
274:
275: // Worker function
276: private static String unescape(String s, char escape,
277: boolean pointCodeOnly, int line, int column) {
278: int i = s.indexOf(escape);
279:
280: if (i == -1)
281: return s;
282:
283: // Dump the initial part straight into the string buffer
284: StringBuffer sb = new StringBuffer(s.substring(0, i));
285: int len = s.length();
286: for (; i < len; i++) {
287: char ch = s.charAt(i);
288: // Keep line and column numbers.
289: switch (ch) {
290: case '\n':
291: case '\r':
292: line++;
293: column = 1;
294: break;
295: default:
296: column++;
297: break;
298: }
299:
300: if (ch != escape) {
301: sb.append(ch);
302: continue;
303: }
304:
305: // Escape
306: if (i >= len - 1)
307: throw new TurtleParseException(
308: exMsg("Illegal escape at end of string", line,
309: column));
310: char ch2 = s.charAt(i + 1);
311: column = column + 1;
312: i = i + 1;
313:
314: // \\u and \\U
315:if (ch2 == 'u') {
316: // i points to the \ so i+6 is next character
317: if (i + 4 >= len)
318: throw new TurtleParseException(exMsg(
319: "\\u escape too short", line, column));
320: int x = hex(s, i + 1, 4, line, column);
321: sb.append((char) x);
322: // Jump 1 2 3 4 -- already skipped \ and u
323: i = i + 4;
324: column = column + 4;
325: continue;
326: }
327: if (ch2 == 'U') {
328: // i points to the \ so i+6 is next character
329: if (i + 8 >= len)
330: throw new TurtleParseException(exMsg(
331: "\\U escape too short", line, column));
332: int x = hex(s, i + 1, 8, line, column);
333: sb.append((char) x);
334: // Jump 1 2 3 4 5 6 7 8 -- already skipped \ and u
335: i = i + 8;
336: column = column + 8;
337: continue;
338: }
339:
340: // Are we doing just point code escapes?
341: // If so, \X-anything else is legal as a literal "\" and "X"
342:
343: if (pointCodeOnly) {
344: sb.append('\\');
345: sb.append(ch2);
346: i = i + 1;
347: continue;
348: }
349:
350: // Not just codepoints. Must be a legal escape.
351: char ch3 = 0;
352: switch (ch2) {
353: case 'n':
354: ch3 = '\n';
355: break;
356: case 't':
357: ch3 = '\t';
358: break;
359: case 'r':
360: ch3 = '\r';
361: break;
362: case 'b':
363: ch3 = '\b';
364: break;
365: case 'f':
366: ch3 = '\f';
367: break;
368: case '\'':
369: ch3 = '\'';
370: break;
371: case '\"':
372: ch3 = '\"';
373: break;
374: case '\\':
375: ch3 = '\\';
376: break;
377: default:
378: throw new TurtleParseException(exMsg(
379: "Unknown escape: \\" + ch2, line, column));
380: }
381: sb.append(ch3);
382: }
383: return sb.toString();
384: }
385:
386: // Line and column that started the escape
387: static private int hex(String s, int i, int len, int line,
388: int column) {
389: // if ( i+len >= s.length() )
390: // {
391: //
392: // }
393: int x = 0;
394: for (int j = i; j < i + len; j++) {
395: char ch = s.charAt(j);
396: column++;
397: int k = 0;
398: switch (ch) {
399: case '0':
400: k = 0;
401: break;
402: case '1':
403: k = 1;
404: break;
405: case '2':
406: k = 2;
407: break;
408: case '3':
409: k = 3;
410: break;
411: case '4':
412: k = 4;
413: break;
414: case '5':
415: k = 5;
416: break;
417: case '6':
418: k = 6;
419: break;
420: case '7':
421: k = 7;
422: break;
423: case '8':
424: k = 8;
425: break;
426: case '9':
427: k = 9;
428: break;
429: case 'A':
430: case 'a':
431: k = 10;
432: break;
433: case 'B':
434: case 'b':
435: k = 11;
436: break;
437: case 'C':
438: case 'c':
439: k = 12;
440: break;
441: case 'D':
442: case 'd':
443: k = 13;
444: break;
445: case 'E':
446: case 'e':
447: k = 14;
448: break;
449: case 'F':
450: case 'f':
451: k = 15;
452: break;
453: default:
454: throw new TurtleParseException(exMsg(
455: "Illegal hex escape: " + ch, line, column));
456: }
457: x = (x << 4) + k;
458: }
459: return x;
460: }
461:
462: protected static String exMsg(String msg, int line, int column) {
463: return "Line " + line + ", column " + column + ": " + msg;
464: }
465: }
466:
467: /*
468: * (c) Copyright 2007, 2008 Hewlett-Packard Development Company, LP
469: * All rights reserved.
470: *
471: * Redistribution and use in source and binary forms, with or without
472: * modification, are permitted provided that the following conditions
473: * are met:
474: * 1. Redistributions of source code must retain the above copyright
475: * notice, this list of conditions and the following disclaimer.
476: * 2. Redistributions in binary form must reproduce the above copyright
477: * notice, this list of conditions and the following disclaimer in the
478: * documentation and/or other materials provided with the distribution.
479: * 3. The name of the author may not be used to endorse or promote products
480: * derived from this software without specific prior written permission.
481: *
482: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
483: * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
484: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
485: * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
486: * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
487: * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
488: * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
489: * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
490: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
491: * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
492: */
|