001: /*
002: (c) Copyright 2003, 2004, 2005, 2006, 2007, 2008 Hewlett-Packard Development Company, LP
003: [See end of file]
004: $Id: LiteralLabel.java,v 1.33 2008/01/02 12:05:20 andy_seaborne Exp $
005: */
006:
007: package com.hp.hpl.jena.graph.impl;
008:
009: import com.hp.hpl.jena.datatypes.*;
010: import com.hp.hpl.jena.datatypes.xsd.*;
011: import com.hp.hpl.jena.datatypes.xsd.impl.*;
012: import com.hp.hpl.jena.shared.impl.JenaParameters;
013:
014: /**
015: * Represents the "contents" of a Node_Literal.
016: * These contents comprise a lexical form, an optional language tag,
017: * and optional datatype structure and a value.
018: *
019: * @author Jeremy Carroll and Dave Reynolds
020: */
021: final public class LiteralLabel {
022:
023: //=======================================================================
024: // Variables
025:
026: public static LiteralLabel createLiteralLabel(String lex,
027: String lang, RDFDatatype dtype)
028: throws DatatypeFormatException {
029: return new LiteralLabel(lex, lang, dtype);
030: }
031:
032: /**
033: * The lexical form of the literal, may be null if the literal was
034: * created programatically and has not yet been serialized
035: */
036: private String lexicalForm;
037:
038: /**
039: * The value form of the literal. It will be null only if the value
040: * has not been parsed or if it is an illegal value.
041: * For plain literals and xsd:string literals
042: * the value is the same as the lexicalForm.
043: */
044: private Object value;
045:
046: /**
047: * The type of the literal. A null type indicates a classic "plain" literal.
048: * The type of a literal is fixed when it is created.
049: */
050: private RDFDatatype dtype;
051:
052: /**
053: * The xml:lang tag. For xsd literals this is ignored and not part of
054: * equality. For plain literals it is not ignored. The lang of a
055: * literal is fixed when it is created.
056: */
057: final private String lang;
058:
059: /**
060: * Indicates whether this is a legal literal. The working groups requires
061: * ill-formed literals to be treated as syntactically correct so instead
062: * of only storing well-formed literals we hack around it this way.
063: * N.B. This applies to any literal, not just XML well-formed literals.
064: */
065: private boolean wellformed = true;
066:
067: //=======================================================================
068: // Constructors
069:
070: /**
071: * Build a typed literal label from its lexical form. The
072: * lexical form will be parsed now and the value stored. If
073: * the form is not legal this will throw an exception.
074: *
075: * @param lex the lexical form of the literal
076: * @param lang the optional language tag, only relevant for plain literals
077: * @param dtype the type of the literal, null for old style "plain" literals
078: * @throws DatatypeFormatException if lex is not a legal form of dtype
079: */
080: private LiteralLabel(String lex, String lang, RDFDatatype dtype)
081: throws DatatypeFormatException {
082: lexicalForm = lex;
083: this .dtype = dtype;
084: this .lang = (lang == null ? "" : lang);
085: if (dtype == null) {
086: value = lex;
087: } else {
088: setValue(lex);
089: }
090: normalize();
091: }
092:
093: /**
094: * Build a plain literal label from its lexical form.
095: * @param lex the lexical form of the literal
096: * @param lang the optional language tag, only relevant for plain literals
097: */
098: public LiteralLabel(String lex, String lang) {
099: this (lex, lang, null);
100: }
101:
102: /**
103: * Build a typed literal label from its value form. If the value is a string we
104: * assume this is inteded to be a lexical form after all.
105: *
106: * @param value the value of the literal
107: * @param lang the optional language tag, only relevant for plain literals
108: * @param dtype the type of the literal, null for old style "plain" literals
109: */
110: public LiteralLabel(Object value, String lang, RDFDatatype dtype)
111: throws DatatypeFormatException {
112: this .dtype = dtype;
113: this .lang = (lang == null ? "" : lang);
114: if (value instanceof String) {
115: String lex = (String) value;
116: lexicalForm = lex;
117: if (dtype == null) {
118: this .value = lex;
119: } else {
120: setValue(lex);
121: }
122: } else {
123: this .value = (dtype == null) ? value : dtype
124: .cannonicalise(value);
125: }
126:
127: normalize();
128:
129: if (dtype != null && lexicalForm == null) {
130: // We are creating a literal from a java object, check the lexical form of the object is acceptable
131: // Done here and uses this.dtype so it can use the normalized type
132: wellformed = this .dtype.isValidValue(value);
133: if (JenaParameters.enableEagerLiteralValidation
134: && !wellformed) {
135: throw new DatatypeFormatException(value.toString(),
136: dtype, "in literal creation");
137: }
138: }
139: }
140:
141: /**
142: * Build a typed literal label supplying both value and lexical form.
143: * The caller guarantees that the lexical form is legal,
144: * and the value corresponds.
145: *
146: * @param lex the lexical form of the literal
147: * @param value the value of the literal
148: * @param lang the optional language tag, only relevant for plain literals
149: * @param dtype the type of the literal, null for old style "plain" literals
150: */
151: public LiteralLabel(String lex, Object value, String lang,
152: RDFDatatype dtype) {
153: this (value, lang, dtype);
154: this .lexicalForm = lex;
155: }
156:
157: /**
158: * Build a typed literal label from its value form using
159: * whatever datatype is currently registered as the the default
160: * representation for this java class. No language tag is supplied.
161: * @param value the literal value to encapsulate
162: */
163: public LiteralLabel(Object value) {
164: this (value, "", TypeMapper.getInstance().getTypeByValue(value));
165: }
166:
167: /**
168: * Old style constructor. Creates either a plain literal or an
169: * XMLLiteral.
170: * @param xml If true then s is exclusive canonical XML of type rdf:XMLLiteral, and no checking will be invoked.
171:
172: */
173: public LiteralLabel(String s, String lg, boolean xml) {
174: this .lexicalForm = s;
175: this .lang = (lg == null ? "" : lg);
176: if (xml) {
177: // XML Literal
178: this .dtype = XMLLiteralType.theXMLLiteralType;
179: value = s;
180: wellformed = true;
181: } else {
182: // Plain literal
183: this .value = s;
184: this .dtype = null;
185: }
186: }
187:
188: /**
189: * Internal function to set the object value from the lexical form.
190: * Requires datatype to be set.
191: * @throws DatatypeFormatException if the value is ill-formed and
192: * eager checking is on.
193: */
194: private void setValue(String lex) throws DatatypeFormatException {
195: try {
196: value = dtype.parse(lex);
197: wellformed = true;
198: } catch (DatatypeFormatException e) {
199: if (JenaParameters.enableEagerLiteralValidation) {
200: e.fillInStackTrace();
201: throw e;
202: } else {
203: wellformed = false;
204: }
205: }
206: }
207:
208: /**
209: * Normalize the literal. If the value is narrower than the current data type
210: * (e.g. value is xsd:date but the time is xsd:datetime) it will narrow
211: * the type. If the type is narrower than the value then it may normalize
212: * the value (e.g. set the mask of an XSDDateTime)
213: */
214: protected void normalize() {
215: if (dtype != null && value != null) {
216: dtype = dtype.normalizeSubType(value, dtype);
217: }
218: }
219:
220: //=======================================================================
221: // Methods
222:
223: /**
224: Answer true iff this is a well-formed XML literal.
225: */
226: public boolean isXML() {
227: return dtype == XMLLiteralType.theXMLLiteralType
228: && this .wellformed;
229: }
230:
231: /**
232: Answer truee iff this is a well-formed literal.
233: */
234: public boolean isWellFormed() {
235: return dtype != null && this .wellformed;
236: }
237:
238: /**
239: Answer a human-acceptable representation of this literal value.
240: This is NOT intended for a machine-processed result.
241: */
242: public String toString(boolean quoting) {
243: StringBuffer b = new StringBuffer();
244: if (quoting)
245: b.append('"');
246: b.append(getLexicalForm());
247: if (quoting)
248: b.append('"');
249: if (lang != null && !lang.equals(""))
250: b.append("@").append(lang);
251: if (dtype != null)
252: b.append("^^").append(dtype.getURI());
253: return b.toString();
254: }
255:
256: public String toString() {
257: return toString(false);
258: }
259:
260: /**
261: Answer the lexical form of this literal, constructing it on-the-fly
262: (and remembering it) if necessary.
263: */
264: public String getLexicalForm() {
265: if (lexicalForm == null)
266: lexicalForm = (dtype == null ? value.toString() : dtype
267: .unparse(value));
268: return lexicalForm;
269: }
270:
271: /**
272: Answer the value used to index this literal
273: TODO Consider pushing indexing decisions down to the datatype
274: */
275: public Object getIndexingValue() {
276: return isXML() ? this : !lang.equals("") ? getLexicalForm()
277: + "@" + lang.toLowerCase() : wellformed ? getValue()
278: : getLexicalForm();
279: }
280:
281: /**
282: Answer the language associated with this literal (the empty string if
283: there's no language).
284: */
285: public String language() {
286: return lang;
287: }
288:
289: /**
290: Answer a suitable instance of a Java class representing this literal's
291: value. May throw an exception if the literal is ill-formed.
292: */
293: public Object getValue() throws DatatypeFormatException {
294: if (wellformed) {
295: return value;
296: } else {
297: throw new DatatypeFormatException(lexicalForm, dtype,
298: " in getValue()");
299: }
300: }
301:
302: /**
303: Answer the datatype of this literal, null if it is untyped.
304: */
305: public RDFDatatype getDatatype() {
306: return dtype;
307: }
308:
309: /**
310: Answer the datatype URI of this literal, null if it untyped.
311: */
312: public String getDatatypeURI() {
313: if (dtype == null)
314: return null;
315: return dtype.getURI();
316: }
317:
318: /**
319: Answer true iff this literal is syntactically equal to <code>other</code>.
320: Note: this is <i>not</i> <code>sameValueAs</code>.
321: */
322: public boolean equals(Object other) {
323: if (other == null || !(other instanceof LiteralLabel)) {
324: return false;
325: }
326: LiteralLabel otherLiteral = (LiteralLabel) other;
327: boolean typeEqual = (dtype == null ? otherLiteral.dtype == null
328: : dtype.equals(otherLiteral.dtype));
329: boolean langEqual = (dtype == null ? lang
330: .equals(otherLiteral.lang) : true);
331: return typeEqual
332: && langEqual
333: && getLexicalForm().equals(
334: otherLiteral.getLexicalForm());
335: }
336:
337: /**
338: Answer true iff this literal represents the same (abstract) value as
339: the other one.
340: */
341: public boolean sameValueAs(LiteralLabel other) {
342: if (other == null)
343: return false;
344: if (!wellformed || !other.wellformed) {
345: if (!other.wellformed) {
346: // Need to support this comparison in order for the WG tests on ill formed
347: // literals to be testable using isIsomorphic to
348: return lexicalForm.equals(other.lexicalForm)
349: && lang.equalsIgnoreCase(other.lang);
350: } else {
351: return false;
352: }
353: }
354: if (dtype == null) {
355: // Plain literal
356: if (other.dtype == null
357: || (JenaParameters.enablePlainLiteralSameAsString && other.dtype
358: .equals(XSDDatatype.XSDstring))) {
359: return lexicalForm.equals(other.lexicalForm)
360: && lang.equalsIgnoreCase(other.lang);
361: } else {
362: return false;
363: }
364: } else {
365: // Typed literal
366: return dtype.isEqual(this , other);
367: }
368: }
369:
370: /**
371: Answer the hashcode of this literal, derived from its value if it's
372: well-formed and otherwise its lexical form.
373: */
374: public int hashCode() {
375: return dtype == null ? getDefaultHashcode() : dtype
376: .getHashCode(this );
377: }
378:
379: /**
380: Answer the default hash value, suitable for datatypes which have values
381: which support hashCode() naturally: it is derived from its value if it is
382: well-formed and otherwise from its lexical form.
383: */
384: public int getDefaultHashcode() {
385: return (wellformed ? value : getLexicalForm()).hashCode();
386: }
387:
388: }
389:
390: /*
391: (c) Copyright 2003, 2004, 2005, 2006, 2007, 2008 Hewlett-Packard Development Company, LP
392: All rights reserved.
393:
394: Redistribution and use in source and binary forms, with or without
395: modification, are permitted provided that the following conditions
396: are met:
397:
398: 1. Redistributions of source code must retain the above copyright
399: notice, this list of conditions and the following disclaimer.
400:
401: 2. Redistributions in binary form must reproduce the above copyright
402: notice, this list of conditions and the following disclaimer in the
403: documentation and/or other materials provided with the distribution.
404:
405: 3. The name of the author may not be used to endorse or promote products
406: derived from this software without specific prior written permission.
407:
408: THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
409: IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
410: OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
411: IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
412: INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
413: NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
414: DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
415: THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
416: (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
417: THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
418: */
|