001: /**
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */package org.apache.solr.schema;
017:
018: import org.apache.lucene.document.Field;
019: import org.apache.lucene.document.Fieldable;
020: import org.apache.lucene.analysis.Analyzer;
021: import org.apache.lucene.analysis.TokenStream;
022: import org.apache.lucene.analysis.Tokenizer;
023: import org.apache.lucene.analysis.Token;
024: import org.apache.lucene.search.SortField;
025: import org.apache.solr.search.function.ValueSource;
026: import org.apache.solr.search.function.OrdFieldSource;
027: import org.apache.solr.search.Sorting;
028: import org.apache.solr.request.XMLWriter;
029: import org.apache.solr.request.TextResponseWriter;
030: import org.apache.solr.analysis.SolrAnalyzer;
031: import org.apache.solr.core.SolrException;
032:
033: import java.util.logging.Logger;
034: import java.util.Map;
035: import java.util.HashMap;
036: import java.io.Reader;
037: import java.io.IOException;
038:
039: /**
040: * Base class for all field types used by an index schema.
041: *
042: * @author yonik
043: * @version $Id: FieldType.java 542679 2007-05-29 22:28:21Z ryan $
044: */
045: public abstract class FieldType extends FieldProperties {
046: public static final Logger log = Logger.getLogger(FieldType.class
047: .getName());
048:
049: /** The name of the type (not the name of the field) */
050: protected String typeName;
051: /** additional arguments specified in the field type declaration */
052: protected Map<String, String> args;
053: /** properties explicitly set to true */
054: protected int trueProperties;
055: /** properties explicitly set to false */
056: protected int falseProperties;
057: int properties;
058:
059: /** Returns true if fields of this type should be tokenized */
060: public boolean isTokenized() {
061: return (properties & TOKENIZED) != 0;
062: }
063:
064: /** subclasses should initialize themselves with the args provided
065: * and remove valid arguments. leftover arguments will cause an exception.
066: * Common boolean properties have already been handled.
067: *
068: */
069: protected void init(IndexSchema schema, Map<String, String> args) {
070: }
071:
072: // Handle additional arguments...
073: void setArgs(IndexSchema schema, Map<String, String> args) {
074: // default to STORED and INDEXED, and MULTIVALUED depending on schema version
075: properties = (STORED | INDEXED);
076: if (schema.getVersion() < 1.1f)
077: properties |= MULTIVALUED;
078:
079: this .args = args;
080: Map<String, String> initArgs = new HashMap<String, String>(args);
081:
082: trueProperties = FieldProperties
083: .parseProperties(initArgs, true);
084: falseProperties = FieldProperties.parseProperties(initArgs,
085: false);
086:
087: properties &= ~falseProperties;
088: properties |= trueProperties;
089:
090: for (String prop : FieldProperties.propertyNames)
091: initArgs.remove(prop);
092:
093: init(schema, initArgs);
094:
095: String positionInc = initArgs.get("positionIncrementGap");
096: if (positionInc != null) {
097: Analyzer analyzer = getAnalyzer();
098: if (analyzer instanceof SolrAnalyzer) {
099: ((SolrAnalyzer) analyzer)
100: .setPositionIncrementGap(Integer
101: .parseInt(positionInc));
102: } else {
103: throw new RuntimeException(
104: "Can't set positionIncrementGap on custom analyzer "
105: + analyzer.getClass());
106: }
107: analyzer = getQueryAnalyzer();
108: if (analyzer instanceof SolrAnalyzer) {
109: ((SolrAnalyzer) analyzer)
110: .setPositionIncrementGap(Integer
111: .parseInt(positionInc));
112: } else {
113: throw new RuntimeException(
114: "Can't set positionIncrementGap on custom analyzer "
115: + analyzer.getClass());
116: }
117: initArgs.remove("positionIncrementGap");
118: }
119:
120: if (initArgs.size() > 0) {
121: throw new RuntimeException("schema fieldtype " + typeName
122: + "(" + this .getClass().getName() + ")"
123: + " invalid arguments:" + initArgs);
124: }
125: }
126:
127: /** :TODO: document this method */
128: protected void restrictProps(int props) {
129: if ((properties & props) != 0) {
130: throw new RuntimeException("schema fieldtype " + typeName
131: + "(" + this .getClass().getName() + ")"
132: + " invalid properties:"
133: + propertiesToString(properties & props));
134: }
135: }
136:
137: /** The Name of this FieldType as specified in the schema file */
138: public String getTypeName() {
139: return typeName;
140: }
141:
142: void setTypeName(String typeName) {
143: this .typeName = typeName;
144: }
145:
146: public String toString() {
147: return typeName
148: + "{class="
149: + this .getClass().getName()
150: // + propertiesToString(properties)
151: + (analyzer != null ? ",analyzer="
152: + analyzer.getClass().getName() : "")
153: + ",args=" + args + "}";
154: }
155:
156: /**
157: * Used for adding a document when a field needs to be created from a
158: * type and a string.
159: *
160: * <p>
161: * By default, the indexed value is the same as the stored value
162: * (taken from toInternal()). Having a different representation for
163: * external, internal, and indexed would present quite a few problems
164: * given the current Lucene architecture. An analyzer for adding docs
165: * would need to translate internal->indexed while an analyzer for
166: * querying would need to translate external->indexed.
167: * </p>
168: * <p>
169: * The only other alternative to having internal==indexed would be to have
170: * internal==external. In this case, toInternal should convert to
171: * the indexed representation, toExternal() should do nothing, and
172: * createField() should *not* call toInternal, but use the external
173: * value and set tokenized=true to get Lucene to convert to the
174: * internal(indexed) form.
175: * </p>
176: *
177: * :TODO: clean up and clarify this explanation.
178: *
179: * @see #toInternal
180: */
181: public Field createField(SchemaField field, String externalVal,
182: float boost) {
183: String val;
184: try {
185: val = toInternal(externalVal);
186: } catch (NumberFormatException e) {
187: throw new SolrException(
188: SolrException.ErrorCode.SERVER_ERROR,
189: "Error while creating field '" + field
190: + "' from value '" + externalVal + "'", e,
191: false);
192: }
193: if (val == null)
194: return null;
195: if (!field.indexed() && !field.stored()) {
196: log.finest("Ignoring unindexed/unstored field: " + field);
197: return null;
198: }
199:
200: Field f = new Field(field.getName(), val, getFieldStore(field,
201: val), getFieldIndex(field, val), getFieldTermVec(field,
202: val));
203: f.setOmitNorms(field.omitNorms());
204: f.setBoost(boost);
205: return f;
206: }
207:
208: /* Helpers for field construction */
209: protected Field.TermVector getFieldTermVec(SchemaField field,
210: String internalVal) {
211: Field.TermVector ftv = Field.TermVector.NO;
212: if (field.storeTermPositions() && field.storeTermOffsets())
213: ftv = Field.TermVector.WITH_POSITIONS_OFFSETS;
214: else if (field.storeTermPositions())
215: ftv = Field.TermVector.WITH_POSITIONS;
216: else if (field.storeTermOffsets())
217: ftv = Field.TermVector.WITH_OFFSETS;
218: else if (field.storeTermVector())
219: ftv = Field.TermVector.YES;
220: return ftv;
221: }
222:
223: protected Field.Store getFieldStore(SchemaField field,
224: String internalVal) {
225: return field.stored() ? Field.Store.YES : Field.Store.NO;
226: }
227:
228: protected Field.Index getFieldIndex(SchemaField field,
229: String internalVal) {
230: return field.indexed() ? (isTokenized() ? Field.Index.TOKENIZED
231: : Field.Index.UN_TOKENIZED) : Field.Index.NO;
232: }
233:
234: /**
235: * Convert an external value (from XML update command or from query string)
236: * into the internal format.
237: * @see #toExternal
238: */
239: public String toInternal(String val) {
240: // - used in delete when a Term needs to be created.
241: // - used by the default getTokenizer() and createField()
242: return val;
243: }
244:
245: /**
246: * Convert the stored-field format to an external (string, human readable)
247: * value
248: * @see #toInternal
249: */
250: public String toExternal(Fieldable f) {
251: // currently used in writing XML of the search result (but perhaps
252: // a more efficient toXML(Fieldable f, Writer w) should be used
253: // in the future.
254: return f.stringValue();
255: }
256:
257: /** :TODO: document this method */
258: public String indexedToReadable(String indexedForm) {
259: return indexedForm;
260: }
261:
262: /** :TODO: document this method */
263: public String storedToReadable(Fieldable f) {
264: return toExternal(f);
265: }
266:
267: /** :TODO: document this method */
268: public String storedToIndexed(Fieldable f) {
269: // right now, the transformation of single valued fields like SortableInt
270: // is done when the Field is created, not at analysis time... this means
271: // that the indexed form is the same as the stored field form.
272: return f.stringValue();
273: }
274:
275: /*********
276: // default analyzer for non-text fields.
277: // Only reads 80 bytes, but that should be plenty for a single value.
278: public Analyzer getAnalyzer() {
279: if (analyzer != null) return analyzer;
280:
281: // the default analyzer...
282: return new Analyzer() {
283: public TokenStream tokenStream(String fieldName, Reader reader) {
284: return new Tokenizer(reader) {
285: final char[] cbuf = new char[80];
286: public Token next() throws IOException {
287: int n = input.read(cbuf,0,80);
288: if (n<=0) return null;
289: String s = toInternal(new String(cbuf,0,n));
290: return new Token(s,0,n);
291: };
292: };
293: }
294: };
295: }
296: **********/
297:
298: /**
299: * Default analyzer for types that only produce 1 verbatim token...
300: * A maximum size of chars to be read must be specified
301: */
302: protected final class DefaultAnalyzer extends SolrAnalyzer {
303: final int maxChars;
304:
305: DefaultAnalyzer(int maxChars) {
306: this .maxChars = maxChars;
307: }
308:
309: public TokenStream tokenStream(String fieldName, Reader reader) {
310: return new Tokenizer(reader) {
311: char[] cbuf = new char[maxChars];
312:
313: public Token next() throws IOException {
314: int n = input.read(cbuf, 0, maxChars);
315: if (n <= 0)
316: return null;
317: String s = toInternal(new String(cbuf, 0, n)); // virtual func on parent
318: return new Token(s, 0, n);
319: };
320: };
321: }
322: }
323:
324: /**
325: * Analyzer set by schema for text types to use when indexing fields
326: * of this type, subclasses can set analyzer themselves or override
327: * getAnalyzer()
328: * @see #getAnalyzer
329: */
330: protected Analyzer analyzer = new DefaultAnalyzer(256);
331:
332: /**
333: * Analyzer set by schema for text types to use when searching fields
334: * of this type, subclasses can set analyzer themselves or override
335: * getAnalyzer()
336: * @see #getQueryAnalyzer
337: */
338: protected Analyzer queryAnalyzer = analyzer;
339:
340: /**
341: * Returns the Analyzer to be used when indexing fields of this type.
342: * <p>
343: * This method may be called many times, at any time.
344: * </p>
345: * @see #getQueryAnalyzer
346: */
347: public Analyzer getAnalyzer() {
348: return analyzer;
349: }
350:
351: /**
352: * Returns the Analyzer to be used when searching fields of this type.
353: * <p>
354: * This method may be called many times, at any time.
355: * </p>
356: * @see #getAnalyzer
357: */
358: public Analyzer getQueryAnalyzer() {
359: return queryAnalyzer;
360: }
361:
362: /**
363: * Sets the Analyzer to be used when indexing fields of this type.
364: * @see #getAnalyzer
365: */
366: public void setAnalyzer(Analyzer analyzer) {
367: this .analyzer = analyzer;
368: log.finest("FieldType: " + typeName + ".setAnalyzer("
369: + analyzer.getClass().getName() + ")");
370: }
371:
372: /**
373: * Sets the Analyzer to be used when querying fields of this type.
374: * @see #getQueryAnalyzer
375: */
376: public void setQueryAnalyzer(Analyzer analyzer) {
377: this .queryAnalyzer = analyzer;
378: log.finest("FieldType: " + typeName + ".setQueryAnalyzer("
379: + analyzer.getClass().getName() + ")");
380: }
381:
382: /**
383: * Renders the specified field as XML
384: */
385: public abstract void write(XMLWriter xmlWriter, String name,
386: Fieldable f) throws IOException;
387:
388: /**
389: * calls back to TextResponseWriter to write the field value
390: */
391: public abstract void write(TextResponseWriter writer, String name,
392: Fieldable f) throws IOException;
393:
394: /**
395: * Returns the SortField instance that should be used to sort fields
396: * of this type.
397: */
398: public abstract SortField getSortField(SchemaField field,
399: boolean top);
400:
401: /**
402: * Utility usable by subclasses when they want to get basic String sorting.
403: */
404: protected SortField getStringSort(SchemaField field, boolean reverse) {
405: return Sorting.getStringSortField(field.name, reverse, field
406: .sortMissingLast(), field.sortMissingFirst());
407: }
408:
409: /** called to get the default value source (normally, from the
410: * Lucene FieldCache.)
411: */
412: public ValueSource getValueSource(SchemaField field) {
413: return new OrdFieldSource(field.name);
414: }
415: }
|