0001: /******************************************************************
0002: * File: Rule.java
0003: * Created by: Dave Reynolds
0004: * Created on: 29-Mar-03
0005: *
0006: * (c) Copyright 2003, 2004, 2005, 2006, 2007, 2008 Hewlett-Packard Development Company, LP
0007: * [See end of file]
0008: * $Id: Rule.java,v 1.48 2008/01/02 12:07:47 andy_seaborne Exp $
0009: *****************************************************************/package com.hp.hpl.jena.reasoner.rulesys;
0010:
0011: import java.io.BufferedReader;
0012: import java.io.IOException;
0013: import java.util.*;
0014:
0015: import com.hp.hpl.jena.util.FileUtils;
0016: import com.hp.hpl.jena.util.PrintUtil;
0017: import com.hp.hpl.jena.util.Tokenizer;
0018: import com.hp.hpl.jena.graph.*;
0019: import com.hp.hpl.jena.reasoner.*;
0020: import com.hp.hpl.jena.shared.*;
0021: import com.hp.hpl.jena.datatypes.RDFDatatype;
0022: import com.hp.hpl.jena.datatypes.TypeMapper;
0023: import com.hp.hpl.jena.datatypes.xsd.*;
0024:
0025: import org.apache.commons.logging.Log;
0026: import org.apache.commons.logging.LogFactory;
0027:
0028: /**Representation of a generic inference rule.
0029: * <p>
0030: * This represents the rule specification but most engines will
0031: * compile this specification into an abstract machine or processing
0032: * graph. </p>
0033: * <p>
0034: * The rule specification comprises a list of antecendents (body) and a list
0035: * of consequents (head). If there is more than one consequent then a backchainer
0036: * should regard this as a shorthand for several rules, all with the
0037: * same body but with a singleton head. </p>
0038: * <p>
0039: * Each element in the head or body can be a TriplePattern, a Functor or a Rule.
0040: * A TriplePattern is just a triple of Nodes but the Nodes can represent
0041: * variables, wildcards and embedded functors - as well as constant uri or
0042: * literal graph nodes. A functor comprises a functor name and a list of
0043: * arguments. The arguments are Nodes of any type except functor nodes
0044: * (there is no functor nesting). The functor name can be mapped into a registered
0045: * java class that implements its semantics. Functors play three roles -
0046: * in heads they represent actions (procedural attachement); in bodies they
0047: * represent builtin predicates; in TriplePatterns they represent embedded
0048: * structured literals that are used to cache matched subgraphs such as
0049: * restriction specifications. </p>
0050: * <p>
0051: * The equality contract for rules is that two rules are equal if each of terms
0052: * (ClauseEntry objects) are equals and they have the same name, if any.
0053: * </p>
0054: * We include a trivial, recursive descent parser but this is just there
0055: * to allow rules to be embedded in code. External rule syntax based on N3
0056: * and RDF could be developed. The embedded syntax supports rules such as:
0057: * <blockindent>
0058: * <code>[ (?C rdf:type *), guard(?C, ?P) -> (?c rb:restriction some(?P, ?D)) ].</code><br />
0059: * <code>[ (?s owl:foo ?p) -> [ (?s owl:bar ?a) -> (?s ?p ?a) ] ].</code><br />
0060: * <code>[name: (?s owl:foo ?p) -> (?s ?p ?a)].</code><br />
0061: * </blockindent>
0062: * only built in namespaces are recognized as such, * is a wildcard node, ?c is a variable,
0063: * name(node ... node) is a functor, (node node node) is a triple pattern, [..] is an
0064: * embedded rule, commas are ignore and can be freely used as separators. Functor names
0065: * may not end in ':'.
0066: * </p>
0067: * @author <a href="mailto:der@hplb.hpl.hp.com">Dave Reynolds</a> * @version $Revision: 1.48 $ on $Date: 2008/01/02 12:07:47 $
0068: */
0069: public class Rule implements ClauseEntry {
0070:
0071: //=======================================================================
0072: // variables
0073:
0074: /** Rule body */
0075: protected ClauseEntry[] body;
0076:
0077: /** Rule head or set of heads */
0078: protected ClauseEntry[] head;
0079:
0080: /** Optional name for the rule */
0081: protected String name;
0082:
0083: /** The number of distinct variables used in the rule */
0084: protected int numVars = -1;
0085:
0086: /** Flags whether the rule was written as a forward or backward rule */
0087: protected boolean isBackward = false;
0088:
0089: /** Flags whether the rule is monotonic */
0090: protected boolean isMonotonic = true;
0091:
0092: static Log logger = LogFactory.getLog(Rule.class);
0093:
0094: /**
0095: * Constructor
0096: * @param body a list of TriplePatterns or Functors.
0097: * @param head a list of TriplePatterns, Functors or rules
0098: */
0099: public Rule(List head, List body) {
0100: this (null, head, body);
0101: }
0102:
0103: /**
0104: * Constructor
0105: * @param name a label for rule
0106: * @param body a list of TriplePatterns or Functors.
0107: * @param head a list of TriplePatterns, Functors or rules
0108: */
0109: public Rule(String name, List head, List body) {
0110: this (name, (ClauseEntry[]) head.toArray(new ClauseEntry[head
0111: .size()]), (ClauseEntry[]) body
0112: .toArray(new ClauseEntry[body.size()]));
0113: }
0114:
0115: /**
0116: * Constructor
0117: * @param name a label for rule
0118: * @param body an array of TriplePatterns or Functors.
0119: * @param head an array of TriplePatterns, Functors or rules
0120: */
0121: public Rule(String name, ClauseEntry[] head, ClauseEntry[] body) {
0122: this .name = name;
0123: this .head = head;
0124: this .body = body;
0125: this .isMonotonic = allMonotonic(head);
0126: }
0127:
0128: // Compute the monotonicity flag
0129: // Future support for negation would affect this
0130: private boolean allMonotonic(ClauseEntry[] elts) {
0131: for (int i = 0; i < elts.length; i++) {
0132: ClauseEntry elt = elts[i];
0133: if (elt instanceof Functor) {
0134: Builtin b = ((Functor) elt).getImplementor();
0135: if (b != null) {
0136: if (!b.isMonotonic())
0137: return false;
0138: } else {
0139: throw new ReasonerException("Undefined Functor "
0140: + ((Functor) elt).getName() + " in "
0141: + toShortString());
0142: }
0143: }
0144: }
0145: return true;
0146: }
0147:
0148: //=======================================================================
0149: // accessors
0150:
0151: /**
0152: * Return the number of body elements
0153: */
0154: public int bodyLength() {
0155: return body.length;
0156: }
0157:
0158: /**
0159: * Return the n'th body element
0160: */
0161: public ClauseEntry getBodyElement(int n) {
0162: return body[n];
0163: }
0164:
0165: /**
0166: * return the entire rule body as an array of objects
0167: */
0168: public ClauseEntry[] getBody() {
0169: return body;
0170: }
0171:
0172: /**
0173: * Return the number of head elements
0174: */
0175: public int headLength() {
0176: return head.length;
0177: }
0178:
0179: /**
0180: * Return the n'th head element
0181: */
0182: public ClauseEntry getHeadElement(int n) {
0183: return head[n];
0184: }
0185:
0186: /**
0187: * return the entire rule head as an array of objects
0188: */
0189: public ClauseEntry[] getHead() {
0190: return head;
0191: }
0192:
0193: /**
0194: * Return true if the rule was written as a backward (as opposed to forward) rule.
0195: */
0196: public boolean isBackward() {
0197: return isBackward;
0198: }
0199:
0200: /**
0201: * Set the rule to be run backwards.
0202: * @param flag if true the rule should run backwards.
0203: */
0204: public void setBackward(boolean flag) {
0205: isBackward = flag;
0206: }
0207:
0208: /**
0209: * Get the name for the rule - can be null.
0210: */
0211: public String getName() {
0212: return name;
0213: }
0214:
0215: /**
0216: * Set the number of distinct variables for this rule.
0217: * Used internally when cloing rules, not normally required.
0218: */
0219: public void setNumVars(int n) {
0220: numVars = n;
0221: }
0222:
0223: /**
0224: * Return the number of distinct variables in the rule. Or more precisely, the
0225: * size of a binding environment needed to represent the rule.
0226: */
0227: public int getNumVars() {
0228: if (numVars == -1) {
0229: // only have to do this if the rule was generated programatically
0230: // the parser will have prefilled this in for normal rules
0231: int max = findVars(body, -1);
0232: max = findVars(head, max);
0233: numVars = max + 1;
0234: }
0235: return numVars;
0236: }
0237:
0238: /**
0239: * Find all the variables in a clause array.
0240: */
0241: private int findVars(Object[] nodes, int maxIn) {
0242: int max = maxIn;
0243: for (int i = 0; i < nodes.length; i++) {
0244: Object node = nodes[i];
0245: if (node instanceof TriplePattern) {
0246: max = findVars((TriplePattern) node, max);
0247: } else {
0248: max = findVars((Functor) node, max);
0249: }
0250: }
0251: return max;
0252: }
0253:
0254: /**
0255: * Find all the variables in a TriplePattern.
0256: */
0257: private int findVars(TriplePattern t, int maxIn) {
0258: int max = maxIn;
0259: max = maxVarIndex(t.getSubject(), max);
0260: max = maxVarIndex(t.getPredicate(), max);
0261: Node obj = t.getObject();
0262: if (obj instanceof Node_RuleVariable) {
0263: max = maxVarIndex(obj, max);
0264: } else if (Functor.isFunctor(obj)) {
0265: max = findVars((Functor) obj.getLiteralValue(), max);
0266: }
0267: return max;
0268: }
0269:
0270: /**
0271: * Find all the variables in a Functor.
0272: */
0273: private int findVars(Functor f, int maxIn) {
0274: int max = maxIn;
0275: Node[] args = f.getArgs();
0276: for (int i = 0; i < args.length; i++) {
0277: if (args[i].isVariable())
0278: max = maxVarIndex(args[i], max);
0279: }
0280: return max;
0281: }
0282:
0283: /**
0284: * Return the maximum node index of the variable and the max so far.
0285: */
0286: private int maxVarIndex(Node var, int max) {
0287: if (var instanceof Node_RuleVariable) {
0288: int index = ((Node_RuleVariable) var).index;
0289: if (index > max)
0290: return index;
0291: }
0292: return max;
0293: }
0294:
0295: /**
0296: * Instantiate a rule given a variable binding environment.
0297: * This will clone any non-bound variables though that is only needed
0298: * for trail implementations.
0299: */
0300: public Rule instantiate(BindingEnvironment env) {
0301: HashMap vmap = new HashMap();
0302: return new Rule(name, cloneClauseArray(head, vmap, env),
0303: cloneClauseArray(body, vmap, env));
0304: }
0305:
0306: /**
0307: * Clone a rule, cloning any embedded variables.
0308: */
0309: public Rule cloneRule() {
0310: if (getNumVars() > 0) {
0311: HashMap vmap = new HashMap();
0312: return new Rule(name, cloneClauseArray(head, vmap, null),
0313: cloneClauseArray(body, vmap, null));
0314: } else {
0315: return this ;
0316: }
0317: }
0318:
0319: /**
0320: * Clone a clause array.
0321: */
0322: private ClauseEntry[] cloneClauseArray(ClauseEntry[] clauses,
0323: Map vmap, BindingEnvironment env) {
0324: ClauseEntry[] cClauses = new ClauseEntry[clauses.length];
0325: for (int i = 0; i < clauses.length; i++) {
0326: cClauses[i] = cloneClause(clauses[i], vmap, env);
0327: }
0328: return cClauses;
0329: }
0330:
0331: /**
0332: * Clone a clause, cloning any embedded variables.
0333: */
0334: private ClauseEntry cloneClause(ClauseEntry clause, Map vmap,
0335: BindingEnvironment env) {
0336: if (clause instanceof TriplePattern) {
0337: TriplePattern tp = (TriplePattern) clause;
0338: return new TriplePattern(cloneNode(tp.getSubject(), vmap,
0339: env), cloneNode(tp.getPredicate(), vmap, env),
0340: cloneNode(tp.getObject(), vmap, env));
0341: } else {
0342: return cloneFunctor((Functor) clause, vmap, env);
0343: }
0344: }
0345:
0346: /**
0347: * Clone a functor, cloning any embedded variables.
0348: */
0349: private Functor cloneFunctor(Functor f, Map vmap,
0350: BindingEnvironment env) {
0351: Node[] args = f.getArgs();
0352: Node[] cargs = new Node[args.length];
0353: for (int i = 0; i < args.length; i++) {
0354: cargs[i] = cloneNode(args[i], vmap, env);
0355: }
0356: Functor fn = new Functor(f.getName(), cargs);
0357: fn.setImplementor(f.getImplementor());
0358: return fn;
0359: }
0360:
0361: /**
0362: * Close a single node.
0363: */
0364: private Node cloneNode(Node nIn, Map vmap, BindingEnvironment env) {
0365: Node n = (env == null) ? nIn : env.getGroundVersion(nIn);
0366: if (n instanceof Node_RuleVariable) {
0367: Node_RuleVariable nv = (Node_RuleVariable) n;
0368: Node c = (Node) vmap.get(nv);
0369: if (c == null) {
0370: c = ((Node_RuleVariable) n).cloneNode();
0371: vmap.put(nv, c);
0372: }
0373: return c;
0374: } else if (Functor.isFunctor(n)) {
0375: Functor f = (Functor) n.getLiteralValue();
0376: return Functor.makeFunctorNode(cloneFunctor(f, vmap, env));
0377: } else {
0378: return n;
0379: }
0380: }
0381:
0382: /**
0383: * Returns false for rules which can affect other rules non-monotonically (remove builtin
0384: * or similar) or are affected non-monotonically (involve negation-as-failure).
0385: */
0386: public boolean isMonotonic() {
0387: return isMonotonic;
0388: }
0389:
0390: /**
0391: * Returns true if the rule does not depend on any data, and so should
0392: * be treated as an axiom.
0393: */
0394: public boolean isAxiom() {
0395: if (isBackward() && body.length > 0)
0396: return false;
0397: for (int i = 0; i < body.length; i++) {
0398: if (body[i] instanceof TriplePattern) {
0399: return false;
0400: }
0401: }
0402: return true;
0403: }
0404:
0405: /**
0406: * Printable string describing the rule
0407: */
0408: public String toString() {
0409: StringBuffer buff = new StringBuffer();
0410: buff.append("[ ");
0411: if (name != null) {
0412: buff.append(name);
0413: buff.append(": ");
0414: }
0415: if (isBackward) {
0416: for (int i = 0; i < head.length; i++) {
0417: buff.append(PrintUtil.print(head[i]));
0418: buff.append(" ");
0419: }
0420: buff.append("<- ");
0421: for (int i = 0; i < body.length; i++) {
0422: buff.append(PrintUtil.print(body[i]));
0423: buff.append(" ");
0424: }
0425: } else {
0426: for (int i = 0; i < body.length; i++) {
0427: buff.append(PrintUtil.print(body[i]));
0428: buff.append(" ");
0429: }
0430: buff.append("-> ");
0431: for (int i = 0; i < head.length; i++) {
0432: buff.append(PrintUtil.print(head[i]));
0433: buff.append(" ");
0434: }
0435: }
0436: buff.append("]");
0437: return buff.toString();
0438: }
0439:
0440: /**
0441: * Print a short description of the rule, just its name if it
0442: * has one, otherwise the whole rule description.
0443: */
0444: public String toShortString() {
0445: if (name != null) {
0446: return name;
0447: } else {
0448: return toString();
0449: }
0450: }
0451:
0452: //=======================================================================
0453: // parser access
0454:
0455: /**
0456: * Parse a string as a rule.
0457: * @throws ParserException if there is a problem
0458: */
0459: public static Rule parseRule(String source) throws ParserException {
0460: Parser parser = new Parser(source);
0461: return parser.parseRule();
0462: }
0463:
0464: /**
0465: * Answer the list of rules parsed from the given URL.
0466: * @throws RulesetNotFoundException
0467: */
0468: public static List rulesFromURL(String uri) {
0469: try {
0470: BufferedReader br = FileUtils.readerFromURL(uri);
0471: return parseRules(Rule.rulesParserFromReader(br));
0472: } catch (WrappedIOException e) {
0473: throw new RulesetNotFoundException(uri);
0474: }
0475: }
0476:
0477: /**
0478: Answer a String which is the concatenation (with newline glue) of all the
0479: non-comment lines readable from <code>src</code>. A comment line is
0480: one starting "#" or "//".
0481: @deprecated Use rulesParserFromReader
0482: */
0483: public static String rulesStringFromReader(BufferedReader src) {
0484: try {
0485: StringBuffer result = new StringBuffer();
0486: String line;
0487: while ((line = src.readLine()) != null) {
0488: if (line.startsWith("#") || line.startsWith("//"))
0489: continue; // Skip comment lines
0490: result.append(line);
0491: result.append("\n");
0492: }
0493: return result.toString();
0494: } catch (IOException e) {
0495: throw new WrappedIOException(e);
0496: }
0497: }
0498:
0499: /**
0500: * Processes the source reader stripping off comment lines and noting prefix
0501: * definitions (@prefix) and rule inclusion commands (@include).
0502: * Returns a parser which is bound to the stripped source text with
0503: * associated prefix and rule inclusion definitions.
0504: */
0505: public static Parser rulesParserFromReader(BufferedReader src) {
0506: try {
0507: StringBuffer result = new StringBuffer();
0508: String line;
0509: Map prefixes = new HashMap();
0510: List preloadedRules = new ArrayList();
0511: while ((line = src.readLine()) != null) {
0512: if (line.startsWith("#"))
0513: continue; // Skip comment lines
0514: line = line.trim();
0515: if (line.startsWith("//"))
0516: continue; // Skip comment lines
0517: if (line.startsWith("@prefix")) {
0518: line = line.substring("@prefix".length());
0519: String prefix = nextArg(line);
0520: String rest = nextAfterArg(line);
0521: if (prefix.endsWith(":"))
0522: prefix = prefix.substring(0,
0523: prefix.length() - 1);
0524: String url = extractURI(rest);
0525: prefixes.put(prefix, url);
0526:
0527: } else if (line.startsWith("@include")) {
0528: // Include referenced rule file, either URL or local special case
0529: line = line.substring("@include".length());
0530: String url = extractURI(line);
0531: // Check for predefined cases
0532: if (url.equalsIgnoreCase("rdfs")) {
0533: preloadedRules.addAll(RDFSFBRuleReasoner
0534: .loadRules());
0535:
0536: } else if (url.equalsIgnoreCase("owl")) {
0537: preloadedRules.addAll(OWLFBRuleReasoner
0538: .loadRules());
0539:
0540: } else if (url.equalsIgnoreCase("owlmicro")) {
0541: preloadedRules.addAll(OWLMicroReasoner
0542: .loadRules());
0543:
0544: } else if (url.equalsIgnoreCase("owlmini")) {
0545: preloadedRules.addAll(OWLMiniReasoner
0546: .loadRules());
0547:
0548: } else {
0549: // Just try loading as a URL
0550: preloadedRules.addAll(rulesFromURL(url));
0551: }
0552:
0553: } else {
0554: result.append(line);
0555: result.append("\n");
0556: }
0557: }
0558: Parser parser = new Parser(result.toString());
0559: parser.registerPrefixMap(prefixes);
0560: parser.addRulesPreload(preloadedRules);
0561: return parser;
0562: } catch (IOException e) {
0563: throw new WrappedIOException(e);
0564: }
0565: }
0566:
0567: /**
0568: * Helper function find a URI argument in the current string,
0569: * optionally surrounded by matching <>.
0570: */
0571: private static String extractURI(String lineSoFar) {
0572: String token = lineSoFar.trim();
0573: if (token.startsWith("<")) {
0574: int split = token.indexOf('>');
0575: token = token.substring(1, split);
0576: }
0577: return token;
0578: }
0579:
0580: /**
0581: * Helper function to return the next whitespace delimited argument
0582: * from the string
0583: */
0584: private static String nextArg(String token) {
0585: int start = nextSplit(0, false, token);
0586: int stop = nextSplit(start, true, token);
0587: return token.substring(start, stop);
0588: }
0589:
0590: /**
0591: * Helper function to return the remainder of the line after
0592: * stripping off the next whitespace delimited argument
0593: * from the string
0594: */
0595: private static String nextAfterArg(String token) {
0596: int start = nextSplit(0, false, token);
0597: int stop = nextSplit(start, true, token);
0598: int rest = nextSplit(stop, false, token);
0599: return token.substring(rest);
0600: }
0601:
0602: /**
0603: * Helper function - find index of next whitespace or non white
0604: * after the start index.
0605: */
0606: private static int nextSplit(int start, boolean white, String line) {
0607: int i = start;
0608: while (i < line.length()) {
0609: boolean isWhite = Character.isWhitespace(line.charAt(i));
0610: if ((white & isWhite) || (!white & !isWhite)) {
0611: return i;
0612: }
0613: i++;
0614: }
0615: return i;
0616: }
0617:
0618: public static void main(String[] args) {
0619: String test = " <http://myuri/fool>.";
0620: String arg = nextArg(test);
0621: String rest = nextAfterArg(test);
0622: String uri = extractURI(rest);
0623: System.out.println("ARG = [" + arg + "], URI = [" + uri + "]");
0624: }
0625:
0626: /**
0627: * Run a pre-bound rule parser to extract it's rules
0628: * @return a list of rules
0629: * @throws ParserException if there is a problem
0630: */
0631: public static List parseRules(Parser parser) throws ParserException {
0632: boolean finished = false;
0633: List ruleset = new ArrayList();
0634: ruleset.addAll(parser.getRulesPreload());
0635: while (!finished) {
0636: try {
0637: parser.peekToken();
0638: } catch (NoSuchElementException e) {
0639: finished = true;
0640: break;
0641: }
0642: Rule rule = parser.parseRule();
0643: ruleset.add(rule);
0644: }
0645: return ruleset;
0646: }
0647:
0648: /**
0649: * Parse a string as a list a rules.
0650: * @return a list of rules
0651: * @throws ParserException if there is a problem
0652: */
0653: public static List parseRules(String source) throws ParserException {
0654: return parseRules(new Parser(source));
0655: }
0656:
0657: //=======================================================================
0658: // parser support
0659:
0660: /**
0661: * Inner class which provides minimalist parsing support based on
0662: * tokenisation with depth 1 lookahead. No sensible error reporting on offer.
0663: * No embedded spaces supported.
0664: */
0665: public static class Parser {
0666:
0667: /** Tokenizer */
0668: private Tokenizer stream;
0669:
0670: /** Look ahead, null if none */
0671: private String lookahead;
0672:
0673: // Literal parse state flags
0674: private static final int NORMAL = 0;
0675: private static final int STARTED_LITERAL = 1;
0676:
0677: /** Literal parse state */
0678: private int literalState = NORMAL;;
0679:
0680: /** Trace back of recent tokens for error reporting */
0681: protected List priorTokens = new ArrayList();
0682:
0683: /** Maximum number of recent tokens to remember */
0684: private static final int maxPriors = 20;
0685:
0686: /** Variable table */
0687: private Map varMap;
0688:
0689: /** Local prefix map */
0690: private PrefixMapping prefixMapping = PrefixMapping.Factory
0691: .create();
0692:
0693: /** Pre-included rules */
0694: private List preloadedRules = new ArrayList();
0695:
0696: /**
0697: * Constructor
0698: * @param source the string to be parsed
0699: */
0700: Parser(String source) {
0701: stream = new Tokenizer(source, "()[], \t\n\r", "'\"", true);
0702: lookahead = null;
0703: }
0704:
0705: /**
0706: * Register a new namespace prefix with the parser
0707: */
0708: public void registerPrefix(String prefix, String namespace) {
0709: prefixMapping.setNsPrefix(prefix, namespace);
0710: }
0711:
0712: /**
0713: * Register a set of prefix to namespace mappings with the parser
0714: */
0715: public void registerPrefixMap(Map map) {
0716: prefixMapping.setNsPrefixes(map);
0717: }
0718:
0719: /**
0720: * Return a map of all the discovered prefixes
0721: */
0722: public Map getPrefixMap() {
0723: return prefixMapping.getNsPrefixMap();
0724: }
0725:
0726: /**
0727: * Add a new set of preloaded rules.
0728: */
0729: void addRulesPreload(List rules) {
0730: preloadedRules.addAll(rules);
0731: }
0732:
0733: /**
0734: * Return the complete set of preloaded rules;
0735: */
0736: public List getRulesPreload() {
0737: return preloadedRules;
0738: }
0739:
0740: /**
0741: * Return the next token
0742: */
0743: String nextToken() {
0744: if (lookahead != null) {
0745: String temp = lookahead;
0746: lookahead = null;
0747: return temp;
0748: } else {
0749: String token = stream.nextToken();
0750: if (literalState == NORMAL) {
0751: // Skip separators unless within a literal
0752: while (isSeparator(token)) {
0753: token = stream.nextToken();
0754: }
0755: }
0756: if (token.equals("'")) {
0757: if (literalState == NORMAL) {
0758: literalState = STARTED_LITERAL;
0759: } else {
0760: literalState = NORMAL;
0761: }
0762: }
0763: priorTokens.add(0, token);
0764: if (priorTokens.size() > maxPriors) {
0765: priorTokens.remove(priorTokens.size() - 1);
0766: }
0767: return token;
0768: }
0769: }
0770:
0771: /**
0772: * Return a trace of the recently seen tokens, for use
0773: * in error reporting
0774: */
0775: public String recentTokens() {
0776: StringBuffer trace = new StringBuffer();
0777: for (int i = priorTokens.size() - 1; i >= 0; i--) {
0778: trace.append(priorTokens.get(i));
0779: trace.append(" ");
0780: }
0781: return trace.toString();
0782: }
0783:
0784: /**
0785: * Peek ahead one token.
0786: */
0787: String peekToken() {
0788: if (lookahead == null) {
0789: lookahead = nextToken();
0790: }
0791: return lookahead;
0792: }
0793:
0794: /**
0795: * Push back a previously fetched token. Only depth 1 supported.
0796: */
0797: void pushback(String token) {
0798: lookahead = token;
0799: }
0800:
0801: /**
0802: * Returns true if token is an skippable separator
0803: */
0804: boolean isSeparator(String token) {
0805: if (token.length() == 1) {
0806: char c = token.charAt(0);
0807: return (c == ',' || Character.isWhitespace(c));
0808: }
0809: return false;
0810: }
0811:
0812: /**
0813: * Returns true if token is a syntax element ()[]
0814: */
0815: boolean isSyntax(String token) {
0816: if (token.length() == 1) {
0817: char c = token.charAt(0);
0818: return (c == '(' || c == ')' || c == '[' || c == ']');
0819: }
0820: return false;
0821: }
0822:
0823: /**
0824: * Find the variable index for the given variable name
0825: * and return a Node_RuleVariable with that index.
0826: */
0827: Node_RuleVariable getNodeVar(String name) {
0828: Node_RuleVariable node = (Node_RuleVariable) varMap
0829: .get(name);
0830: if (node == null) {
0831: node = new Node_RuleVariable(name, varMap.size());
0832: varMap.put(name, node);
0833: }
0834: return node;
0835: }
0836:
0837: /**
0838: * Translate a token to a node.
0839: */
0840: Node parseNode(String token) {
0841: if (token.startsWith("?")) {
0842: return getNodeVar(token);
0843: // Dropped support for anon wildcards until the implementation is better resolved
0844: } else if (token.equals("*") || token.equals("_")) {
0845: throw new ParserException(
0846: "Wildcard variables no longer supported", this );
0847: //// return Node_RuleVariable.ANY;
0848: // return Node_RuleVariable.WILD;
0849: } else if (token.startsWith("<") && token.endsWith(">")) {
0850: String uri = token.substring(1, token.length() - 1);
0851: return Node.createURI(uri);
0852: } else if (token.indexOf(':') != -1) {
0853: String exp = prefixMapping.expandPrefix(token); // Local map first
0854: exp = PrintUtil.expandQname(exp); // Retain global map for backward compatibility
0855: if (exp == token) {
0856: // No expansion was possible
0857: String prefix = token.substring(0, token
0858: .indexOf(':'));
0859: if (prefix.equals("http") || prefix.equals("urn")
0860: || prefix.equals("file")
0861: || prefix.equals("ftp")
0862: || prefix.equals("mailto")) {
0863: // assume it is all OK and fall through
0864: } else {
0865: // Likely to be a typo in a qname or failure to register
0866: throw new ParserException(
0867: "Unrecognized qname prefix (" + prefix
0868: + ") in rule", this );
0869: }
0870: }
0871: return Node.createURI(exp);
0872: } else if (peekToken().equals("(")) {
0873: Functor f = new Functor(token, parseNodeList(),
0874: BuiltinRegistry.theRegistry);
0875: return Functor.makeFunctorNode(f);
0876: } else if (token.equals("'") || token.equals("\"")) {
0877: // A plain literal
0878: String lit = nextToken();
0879: // Skip the trailing quote
0880: nextToken();
0881: // Check for an explicit datatype
0882: if (peekToken().startsWith("^^")) {
0883: String dtURI = nextToken().substring(2);
0884: if (dtURI.indexOf(':') != -1) {
0885: // Thanks to Steve Cranefield for pointing out the need for prefix expansion here
0886: String exp = prefixMapping.expandPrefix(dtURI); // Local map first
0887: exp = PrintUtil.expandQname(exp); // Retain global map for backward compatibility
0888: if (exp == dtURI) {
0889: // No expansion was possible
0890: String prefix = dtURI.substring(0, dtURI
0891: .indexOf(':'));
0892: if (prefix.equals("http")
0893: || prefix.equals("urn")
0894: || prefix.equals("ftp")
0895: || prefix.equals("mailto")) {
0896: // assume it is all OK and fall through
0897: } else {
0898: // Likely to be a typo in a qname or failure to register
0899: throw new ParserException(
0900: "Unrecognized qname prefix ("
0901: + prefix + ") in rule",
0902: this );
0903: }
0904: } else {
0905: dtURI = exp;
0906: }
0907: }
0908: RDFDatatype dt = TypeMapper.getInstance()
0909: .getSafeTypeByName(dtURI);
0910: return Node.createLiteral(lit, "", dt);
0911: } else {
0912: return Node.createLiteral(lit, "", false);
0913: }
0914: } else if (Character.isDigit(token.charAt(0))
0915: || (token.charAt(0) == '-' && token.length() > 1 && Character
0916: .isDigit(token.charAt(1)))) {
0917: // A number literal
0918: return parseNumber(token);
0919: } else {
0920: // A uri
0921: return Node.createURI(token);
0922: }
0923: }
0924:
0925: /**
0926: * Turn a possible numeric token into typed literal else a plain literal
0927: * @return the constructed literal node
0928: */
0929: Node parseNumber(String lit) {
0930: if (Character.isDigit(lit.charAt(0))
0931: || (lit.charAt(0) == '-' && lit.length() > 1 && Character
0932: .isDigit(lit.charAt(1)))) {
0933: if (lit.indexOf(".") != -1) {
0934: // Float?
0935: if (XSDDatatype.XSDfloat.isValid(lit)) {
0936: return Node.createLiteral(lit, "",
0937: XSDDatatype.XSDfloat);
0938: }
0939: } else {
0940: // Int?
0941: if (XSDDatatype.XSDint.isValid(lit)) {
0942: return Node.createLiteral(lit, "",
0943: XSDDatatype.XSDint);
0944: }
0945: }
0946: }
0947: // Default is a plain literal
0948: return Node.createLiteral(lit, "", false);
0949: }
0950:
0951: /**
0952: * Parse a list of nodes delimited by parentheses
0953: */
0954: List parseNodeList() {
0955: String token = nextToken();
0956: if (!token.equals("(")) {
0957: throw new ParserException(
0958: "Expected '(' at start of clause, found "
0959: + token, this );
0960: }
0961: token = nextToken();
0962: List nodeList = new ArrayList();
0963: while (!isSyntax(token)) {
0964: nodeList.add(parseNode(token));
0965: token = nextToken();
0966: }
0967: if (!token.equals(")")) {
0968: throw new ParserException(
0969: "Expected ')' at end of clause, found " + token,
0970: this );
0971: }
0972: return nodeList;
0973: }
0974:
0975: /**
0976: * Parse a clause, could be a triple pattern, a rule or a functor
0977: */
0978: Object parseClause() {
0979: String token = peekToken();
0980: if (token.equals("(")) {
0981: List nodes = parseNodeList();
0982: if (nodes.size() != 3) {
0983: throw new ParserException("Triple with "
0984: + nodes.size() + " nodes!", this );
0985: }
0986: if (Functor.isFunctor((Node) nodes.get(0))) {
0987: throw new ParserException(
0988: "Functors not allowed in subject position of pattern",
0989: this );
0990: }
0991: if (Functor.isFunctor((Node) nodes.get(1))) {
0992: throw new ParserException(
0993: "Functors not allowed in predicate position of pattern",
0994: this );
0995: }
0996: return new TriplePattern((Node) nodes.get(0),
0997: (Node) nodes.get(1), (Node) nodes.get(2));
0998: } else if (token.equals("[")) {
0999: nextToken();
1000: return doParseRule(true);
1001: } else {
1002: String name = nextToken();
1003: List args = parseNodeList();
1004: Functor clause = new Functor(name, args,
1005: BuiltinRegistry.theRegistry);
1006: if (clause.getImplementor() == null) {
1007: // Not a fatal error becase later processing can add this
1008: // implementation to the registry
1009: logger
1010: .warn("Rule references unimplemented functor: "
1011: + name);
1012: }
1013: return clause;
1014: }
1015: }
1016:
1017: /**
1018: * Parse a rule, terminated by a "]" or "." character.
1019: */
1020: public Rule parseRule() {
1021: return doParseRule(false);
1022: }
1023:
1024: /**
1025: * Parse a rule, terminated by a "]" or "." character.
1026: * @param retainVarMap set to true to ccause the existing varMap to be left in place, which
1027: * is required for nested rules.
1028: */
1029: private Rule doParseRule(boolean retainVarMap) {
1030: try {
1031: // Skip initial '[' if present
1032: if (peekToken().equals("[")) {
1033: nextToken();
1034: }
1035: // Check for optional name
1036: String name = null;
1037: String token = peekToken();
1038: if (token.endsWith(":")) {
1039: name = token.substring(0, token.length() - 1);
1040: nextToken();
1041: }
1042: // Start rule parsing with empty variable table
1043: if (!retainVarMap)
1044: varMap = new HashMap();
1045: // Body
1046: List body = new ArrayList();
1047: token = peekToken();
1048: while (!(token.equals("->") || token.equals("<-"))) {
1049: body.add(parseClause());
1050: token = peekToken();
1051: }
1052: boolean backwardRule = token.equals("<-");
1053: List head = new ArrayList();
1054: token = nextToken(); // skip -> token
1055: token = peekToken();
1056: while (!(token.equals(".") || token.equals("]"))) {
1057: head.add(parseClause());
1058: token = peekToken();
1059: }
1060: nextToken(); // consume the terminating token
1061: Rule r = null;
1062: if (backwardRule) {
1063: r = new Rule(name, body, head);
1064: } else {
1065: r = new Rule(name, head, body);
1066: }
1067: r.numVars = varMap.keySet().size();
1068: r.isBackward = backwardRule;
1069: return r;
1070: } catch (NoSuchElementException e) {
1071: throw new ParserException("Malformed rule", this );
1072: }
1073: }
1074:
1075: }
1076:
1077: /** Equality override */
1078: public boolean equals(Object o) {
1079: // Pass 1 - just check basic shape
1080: if (!(o instanceof Rule))
1081: return false;
1082: Rule other = (Rule) o;
1083: if (other.head.length != head.length)
1084: return false;
1085: if (other.body.length != body.length)
1086: return false;
1087: // Pass 2 - check clause by clause matching
1088: for (int i = 0; i < body.length; i++) {
1089: if (!((ClauseEntry) body[i])
1090: .sameAs((ClauseEntry) other.body[i]))
1091: return false;
1092: }
1093: for (int i = 0; i < head.length; i++) {
1094: if (!((ClauseEntry) head[i])
1095: .sameAs((ClauseEntry) other.head[i]))
1096: return false;
1097: }
1098: // Also include the rule name in the equality contract
1099: if (name != null) {
1100: if (!name.equals(other.name))
1101: return false;
1102: } else {
1103: if (other.name != null)
1104: return false;
1105: }
1106: return true;
1107: }
1108:
1109: /** hash function override */
1110: public int hashCode() {
1111: int hash = 0;
1112: for (int i = 0; i < body.length; i++) {
1113: hash = (hash << 1) ^ body[i].hashCode();
1114: }
1115: for (int i = 0; i < head.length; i++) {
1116: hash = (hash << 1) ^ head[i].hashCode();
1117: }
1118: return hash;
1119: }
1120:
1121: /**
1122: * Compare clause entries, taking into account variable indices.
1123: * The equality function ignores differences between variables.
1124: */
1125: public boolean sameAs(Object o) {
1126: return equals(o);
1127: }
1128:
1129: //=======================================================================
1130: // Other supporting inner classes
1131:
1132: /**
1133: * Inner class. Exception raised if there is a problem
1134: * during rule parsing.
1135: */
1136: public static class ParserException extends JenaException {
1137:
1138: /** constructor */
1139: public ParserException(String message, Parser parser) {
1140: super (constructMessage(message, parser));
1141: }
1142:
1143: /**
1144: * Extract context trace from prior tokens stack
1145: */
1146: private static String constructMessage(String baseMessage,
1147: Parser parser) {
1148: StringBuffer message = new StringBuffer();
1149: message.append(baseMessage);
1150: message.append("\nAt '");
1151: message.append(parser.recentTokens());
1152: message.append("'");
1153: return message.toString();
1154: }
1155:
1156: }
1157:
1158: }
1159:
1160: /*
1161: (c) Copyright 2003, 2004, 2005, 2006, 2007, 2008 Hewlett-Packard Development Company, LP
1162: All rights reserved.
1163:
1164: Redistribution and use in source and binary forms, with or without
1165: modification, are permitted provided that the following conditions
1166: are met:
1167:
1168: 1. Redistributions of source code must retain the above copyright
1169: notice, this list of conditions and the following disclaimer.
1170:
1171: 2. Redistributions in binary form must reproduce the above copyright
1172: notice, this list of conditions and the following disclaimer in the
1173: documentation and/or other materials provided with the distribution.
1174:
1175: 3. The name of the author may not be used to endorse or promote products
1176: derived from this software without specific prior written permission.
1177:
1178: THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1179: IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
1180: OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
1181: IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
1182: INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
1183: NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1184: DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1185: THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1186: (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
1187: THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1188: */
|