0001: /**
0002: * Licensed to the Apache Software Foundation (ASF) under one or more
0003: * contributor license agreements. See the NOTICE file distributed with
0004: * this work for additional information regarding copyright ownership.
0005: * The ASF licenses this file to You under the Apache License, Version 2.0
0006: * (the "License"); you may not use this file except in compliance with
0007: * the License. You may obtain a copy of the License at
0008: *
0009: * http://www.apache.org/licenses/LICENSE-2.0
0010: *
0011: * Unless required by applicable law or agreed to in writing, software
0012: * distributed under the License is distributed on an "AS IS" BASIS,
0013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014: * See the License for the specific language governing permissions and
0015: * limitations under the License.
0016: */package org.apache.solr.schema;
0017:
0018: import org.apache.lucene.analysis.Analyzer;
0019: import org.apache.lucene.analysis.TokenStream;
0020: import org.apache.lucene.document.Fieldable;
0021: import org.apache.lucene.search.DefaultSimilarity;
0022: import org.apache.lucene.search.Similarity;
0023: import org.apache.lucene.queryParser.QueryParser;
0024: import org.apache.solr.core.SolrConfig;
0025: import org.apache.solr.core.SolrException;
0026: import org.apache.solr.core.Config;
0027: import org.apache.solr.analysis.TokenFilterFactory;
0028: import org.apache.solr.analysis.TokenizerChain;
0029: import org.apache.solr.analysis.TokenizerFactory;
0030: import org.apache.solr.search.SolrQueryParser;
0031: import org.apache.solr.util.DOMUtil;
0032: import org.w3c.dom.Document;
0033: import org.w3c.dom.NamedNodeMap;
0034: import org.w3c.dom.Node;
0035: import org.w3c.dom.NodeList;
0036:
0037: import javax.xml.xpath.XPath;
0038: import javax.xml.xpath.XPathConstants;
0039: import javax.xml.xpath.XPathExpressionException;
0040: import javax.xml.xpath.XPathFactory;
0041: import java.io.InputStream;
0042: import java.io.Reader;
0043: import java.util.*;
0044: import java.util.logging.Logger;
0045:
0046: /**
0047: * <code>IndexSchema</code> contains information about the valid fields in an index
0048: * and the types of those fields.
0049: *
0050: * @author yonik
0051: * @version $Id: IndexSchema.java 542679 2007-05-29 22:28:21Z ryan $
0052: */
0053:
0054: public final class IndexSchema {
0055: final static Logger log = Logger.getLogger(IndexSchema.class
0056: .getName());
0057:
0058: private final String schemaFile;
0059: private String name;
0060: private float version;
0061:
0062: /**
0063: * Constructs a schema using the specified file name using the normal
0064: * Config path directory searching rules.
0065: *
0066: * @see Config#openResource
0067: */
0068: public IndexSchema(String schemaFile) {
0069: this .schemaFile = schemaFile;
0070: readConfig();
0071: }
0072:
0073: /**
0074: * Direct acess to the InputStream for the schemaFile used by this instance.
0075: *
0076: * @see Config#openResource
0077: */
0078: public InputStream getInputStream() {
0079: return Config.openResource(schemaFile);
0080: }
0081:
0082: float getVersion() {
0083: return version;
0084: }
0085:
0086: /** The Name of this schema (as specified in the schema file) */
0087: public String getName() {
0088: return name;
0089: }
0090:
0091: private final HashMap<String, SchemaField> fields = new HashMap<String, SchemaField>();
0092: private final HashMap<String, FieldType> fieldTypes = new HashMap<String, FieldType>();
0093: private final List<SchemaField> fieldsWithDefaultValue = new ArrayList<SchemaField>();
0094: private final Collection<SchemaField> requiredFields = new HashSet<SchemaField>();
0095:
0096: /**
0097: * Provides direct access to the Map containing all explicit
0098: * (ie: non-dynamic) fields in the index, keyed on field name.
0099: *
0100: * <p>
0101: * Modifying this Map (or any item in it) will affect the real schema
0102: * </p>
0103: */
0104: public Map<String, SchemaField> getFields() {
0105: return fields;
0106: }
0107:
0108: /**
0109: * Provides direct access to the Map containing all Field Types
0110: * in the index, keyed on fild type name.
0111: *
0112: * <p>
0113: * Modifying this Map (or any item in it) will affect the real schema
0114: * </p>
0115: */
0116: public Map<String, FieldType> getFieldTypes() {
0117: return fieldTypes;
0118: }
0119:
0120: /**
0121: * Provides direct access to the List containing all fields with a default value
0122: */
0123: public List<SchemaField> getFieldsWithDefaultValue() {
0124: return fieldsWithDefaultValue;
0125: }
0126:
0127: /**
0128: * Provides direct access to the List containing all required fields. This
0129: * list contains all fields with default values.
0130: */
0131: public Collection<SchemaField> getRequiredFields() {
0132: return requiredFields;
0133: }
0134:
0135: private Similarity similarity;
0136:
0137: /**
0138: * Returns the Similarity used for this index
0139: */
0140: public Similarity getSimilarity() {
0141: return similarity;
0142: }
0143:
0144: private Analyzer analyzer;
0145:
0146: /**
0147: * Returns the Analyzer used when indexing documents for this index
0148: *
0149: * <p>
0150: * This Analyzer is field (and dynamic field) name aware, and delegates to
0151: * a field specific Analyzer based on the field type.
0152: * </p>
0153: */
0154: public Analyzer getAnalyzer() {
0155: return analyzer;
0156: }
0157:
0158: private Analyzer queryAnalyzer;
0159:
0160: /**
0161: * Returns the Analyzer used when searching this index
0162: *
0163: * <p>
0164: * This Analyzer is field (and dynamic field) name aware, and delegates to
0165: * a field specific Analyzer based on the field type.
0166: * </p>
0167: */
0168: public Analyzer getQueryAnalyzer() {
0169: return queryAnalyzer;
0170: }
0171:
0172: private String defaultSearchFieldName = null;
0173: private String queryParserDefaultOperator = "OR";
0174:
0175: /**
0176: * A SolrQueryParser linked to this IndexSchema for field datatype
0177: * information, and populated with default options from the
0178: * <solrQueryParser> configuration for this IndexSchema.
0179: *
0180: * @param defaultField if non-null overrides the schema default
0181: */
0182: public SolrQueryParser getSolrQueryParser(String defaultField) {
0183: SolrQueryParser qp = new SolrQueryParser(this , defaultField);
0184: String operator = getQueryParserDefaultOperator();
0185: qp
0186: .setDefaultOperator("AND".equals(operator) ? QueryParser.Operator.AND
0187: : QueryParser.Operator.OR);
0188: return qp;
0189: }
0190:
0191: /**
0192: * Name of the default search field specified in the schema file
0193: * @deprecated use getSolrQueryParser().getField()
0194: */
0195: public String getDefaultSearchFieldName() {
0196: return defaultSearchFieldName;
0197: }
0198:
0199: /**
0200: * default operator ("AND" or "OR") for QueryParser
0201: * @deprecated use getSolrQueryParser().getDefaultOperator()
0202: */
0203: public String getQueryParserDefaultOperator() {
0204: return queryParserDefaultOperator;
0205: }
0206:
0207: private SchemaField uniqueKeyField;
0208:
0209: /**
0210: * Unique Key field specified in the schema file
0211: * @return null if this schema has no unique key field
0212: */
0213: public SchemaField getUniqueKeyField() {
0214: return uniqueKeyField;
0215: }
0216:
0217: private String uniqueKeyFieldName;
0218: private FieldType uniqueKeyFieldType;
0219:
0220: /**
0221: * The raw (field type encoded) value of the Unique Key field for
0222: * the specified Document
0223: * @return null if this schema has no unique key field
0224: * @see #printableUniqueKey
0225: */
0226: public Fieldable getUniqueKeyField(
0227: org.apache.lucene.document.Document doc) {
0228: return doc.getFieldable(uniqueKeyFieldName); // this should return null if name is null
0229: }
0230:
0231: /**
0232: * The printable value of the Unique Key field for
0233: * the specified Document
0234: * @return null if this schema has no unique key field
0235: */
0236: public String printableUniqueKey(
0237: org.apache.lucene.document.Document doc) {
0238: Fieldable f = doc.getFieldable(uniqueKeyFieldName);
0239: return f == null ? null : uniqueKeyFieldType.toExternal(f);
0240: }
0241:
0242: private SchemaField getIndexedField(String fname) {
0243: SchemaField f = getFields().get(fname);
0244: if (f == null) {
0245: throw new RuntimeException("unknown field '" + fname + "'");
0246: }
0247: if (!f.indexed()) {
0248: throw new RuntimeException("'" + fname
0249: + "' is not an indexed field:" + f);
0250: }
0251: return f;
0252: }
0253:
0254: private class SolrIndexAnalyzer extends Analyzer {
0255: protected final HashMap<String, Analyzer> analyzers;
0256:
0257: SolrIndexAnalyzer() {
0258: analyzers = analyzerCache();
0259: }
0260:
0261: protected HashMap<String, Analyzer> analyzerCache() {
0262: HashMap<String, Analyzer> cache = new HashMap<String, Analyzer>();
0263: for (SchemaField f : getFields().values()) {
0264: Analyzer analyzer = f.getType().getAnalyzer();
0265: cache.put(f.getName(), analyzer);
0266: }
0267: return cache;
0268: }
0269:
0270: protected Analyzer getAnalyzer(String fieldName) {
0271: Analyzer analyzer = analyzers.get(fieldName);
0272: return analyzer != null ? analyzer : getDynamicFieldType(
0273: fieldName).getAnalyzer();
0274: }
0275:
0276: public TokenStream tokenStream(String fieldName, Reader reader) {
0277: return getAnalyzer(fieldName)
0278: .tokenStream(fieldName, reader);
0279: }
0280:
0281: public int getPositionIncrementGap(String fieldName) {
0282: return getAnalyzer(fieldName).getPositionIncrementGap(
0283: fieldName);
0284: }
0285: }
0286:
0287: private class SolrQueryAnalyzer extends SolrIndexAnalyzer {
0288: protected HashMap<String, Analyzer> analyzerCache() {
0289: HashMap<String, Analyzer> cache = new HashMap<String, Analyzer>();
0290: for (SchemaField f : getFields().values()) {
0291: Analyzer analyzer = f.getType().getQueryAnalyzer();
0292: cache.put(f.getName(), analyzer);
0293: }
0294: return cache;
0295: }
0296:
0297: protected Analyzer getAnalyzer(String fieldName) {
0298: Analyzer analyzer = analyzers.get(fieldName);
0299: return analyzer != null ? analyzer : getDynamicFieldType(
0300: fieldName).getQueryAnalyzer();
0301: }
0302: }
0303:
0304: private void readConfig() {
0305: log.info("Reading Solr Schema");
0306:
0307: try {
0308: /***
0309: DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
0310: Document document = builder.parse(getInputStream());
0311: ***/
0312:
0313: Config config = new Config("schema", getInputStream(),
0314: "/schema/");
0315: Document document = config.getDocument();
0316: XPath xpath = config.getXPath();
0317:
0318: Node nd = (Node) xpath.evaluate("/schema/@name", document,
0319: XPathConstants.NODE);
0320: if (nd == null) {
0321: log.warning("schema has no name!");
0322: } else {
0323: name = nd.getNodeValue();
0324: log.info("Schema name=" + name);
0325: }
0326:
0327: version = config.getFloat("/schema/@version", 1.0f);
0328:
0329: String expression = "/schema/types/fieldtype | /schema/types/fieldType";
0330: NodeList nodes = (NodeList) xpath.evaluate(expression,
0331: document, XPathConstants.NODESET);
0332:
0333: for (int i = 0; i < nodes.getLength(); i++) {
0334: Node node = nodes.item(i);
0335: NamedNodeMap attrs = node.getAttributes();
0336:
0337: String name = DOMUtil.getAttr(attrs, "name",
0338: "fieldtype error");
0339: log.finest("reading fieldtype " + name);
0340: String clsName = DOMUtil.getAttr(attrs, "class",
0341: "fieldtype error");
0342: FieldType ft = (FieldType) Config.newInstance(clsName);
0343: ft.setTypeName(name);
0344:
0345: expression = "./analyzer[@type='query']";
0346: Node anode = (Node) xpath.evaluate(expression, node,
0347: XPathConstants.NODE);
0348: Analyzer queryAnalyzer = readAnalyzer(anode);
0349:
0350: // An analyzer without a type specified, or with type="index"
0351: expression = "./analyzer[not(@type)] | ./analyzer[@type='index']";
0352: anode = (Node) xpath.evaluate(expression, node,
0353: XPathConstants.NODE);
0354: Analyzer analyzer = readAnalyzer(anode);
0355:
0356: if (queryAnalyzer == null)
0357: queryAnalyzer = analyzer;
0358: if (analyzer == null)
0359: analyzer = queryAnalyzer;
0360: if (analyzer != null) {
0361: ft.setAnalyzer(analyzer);
0362: ft.setQueryAnalyzer(queryAnalyzer);
0363: }
0364:
0365: ft.setArgs(this , DOMUtil.toMapExcept(attrs, "name",
0366: "class"));
0367: FieldType old = fieldTypes.put(ft.typeName, ft);
0368: if (old != null) {
0369: String msg = "[schema.xml] Duplicate fieldType definition for '"
0370: + ft.typeName
0371: + "' ignoring: "
0372: + old.toString();
0373:
0374: Throwable t = new SolrException(
0375: SolrException.ErrorCode.SERVER_ERROR, msg);
0376: SolrException.logOnce(log, null, t);
0377: SolrConfig.severeErrors.add(t);
0378: }
0379: log.finest("fieldtype defined: " + ft);
0380: }
0381:
0382: // Hang on to the fields that say if they are required -- this lets us set a reasonable default for the unique key
0383: Map<String, Boolean> explicitRequiredProp = new HashMap<String, Boolean>();
0384: ArrayList<DynamicField> dFields = new ArrayList<DynamicField>();
0385: expression = "/schema/fields/field | /schema/fields/dynamicField";
0386: nodes = (NodeList) xpath.evaluate(expression, document,
0387: XPathConstants.NODESET);
0388:
0389: for (int i = 0; i < nodes.getLength(); i++) {
0390: Node node = nodes.item(i);
0391:
0392: NamedNodeMap attrs = node.getAttributes();
0393:
0394: String name = DOMUtil.getAttr(attrs, "name",
0395: "field definition");
0396: log.finest("reading field def " + name);
0397: String type = DOMUtil.getAttr(attrs, "type", "field "
0398: + name);
0399: String val;
0400:
0401: FieldType ft = fieldTypes.get(type);
0402: if (ft == null) {
0403: throw new SolrException(
0404: SolrException.ErrorCode.BAD_REQUEST,
0405: "Unknown fieldtype '" + type + "'", false);
0406: }
0407:
0408: Map<String, String> args = DOMUtil.toMapExcept(attrs,
0409: "name", "type");
0410: if (args.get("required") != null) {
0411: explicitRequiredProp.put(name, Boolean.valueOf(args
0412: .get("required")));
0413: }
0414:
0415: SchemaField f = SchemaField.create(name, ft, args);
0416:
0417: if (node.getNodeName().equals("field")) {
0418: SchemaField old = fields.put(f.getName(), f);
0419: if (old != null) {
0420: String msg = "[schema.xml] Duplicate field definition for '"
0421: + f.getName()
0422: + "' ignoring: "
0423: + old.toString();
0424:
0425: Throwable t = new SolrException(
0426: SolrException.ErrorCode.SERVER_ERROR,
0427: msg);
0428: SolrException.logOnce(log, null, t);
0429: SolrConfig.severeErrors.add(t);
0430: }
0431:
0432: log.fine("field defined: " + f);
0433: if (f.getDefaultValue() != null) {
0434: log.fine(name + " contains default value: "
0435: + f.getDefaultValue());
0436: fieldsWithDefaultValue.add(f);
0437: }
0438: if (f.isRequired()) {
0439: log.fine(name + " is required in this schema");
0440: requiredFields.add(f);
0441: }
0442: } else if (node.getNodeName().equals("dynamicField")) {
0443: // make sure nothing else has the same path
0444: boolean dup = false;
0445: for (DynamicField df : dFields) {
0446: if (df.regex.equals(f.name)) {
0447: String msg = "[schema.xml] Duplicate DynamicField definition for '"
0448: + f.getName()
0449: + "' ignoring: "
0450: + f.toString();
0451:
0452: Throwable t = new SolrException(
0453: SolrException.ErrorCode.SERVER_ERROR,
0454: msg);
0455: SolrException.logOnce(log, null, t);
0456: SolrConfig.severeErrors.add(t);
0457: dup = true;
0458: break;
0459: }
0460: }
0461: if (!dup) {
0462: dFields.add(new DynamicField(f));
0463: log.fine("dynamic field defined: " + f);
0464: }
0465: } else {
0466: // we should never get here
0467: throw new RuntimeException("Unknown field type");
0468: }
0469: }
0470:
0471: //fields with default values are by definition required
0472: //add them to required fields, and we only have to loop once
0473: // in DocumentBuilder.getDoc()
0474: requiredFields.addAll(getFieldsWithDefaultValue());
0475:
0476: // OK, now sort the dynamic fields largest to smallest size so we don't get
0477: // any false matches. We want to act like a compiler tool and try and match
0478: // the largest string possible.
0479: Collections.sort(dFields);
0480:
0481: log.finest("Dynamic Field Ordering:" + dFields);
0482:
0483: // stuff it in a normal array for faster access
0484: dynamicFields = (DynamicField[]) dFields
0485: .toArray(new DynamicField[dFields.size()]);
0486:
0487: Node node = (Node) xpath.evaluate(
0488: "/schema/similarity/@class", document,
0489: XPathConstants.NODE);
0490: if (node == null) {
0491: similarity = new DefaultSimilarity();
0492: log.fine("using default similarity");
0493: } else {
0494: similarity = (Similarity) Config.newInstance(node
0495: .getNodeValue().trim());
0496: log.fine("using similarity "
0497: + similarity.getClass().getName());
0498: }
0499:
0500: node = (Node) xpath.evaluate(
0501: "/schema/defaultSearchField/text()", document,
0502: XPathConstants.NODE);
0503: if (node == null) {
0504: log
0505: .warning("no default search field specified in schema.");
0506: } else {
0507: defaultSearchFieldName = node.getNodeValue().trim();
0508: // throw exception if specified, but not found or not indexed
0509: if (defaultSearchFieldName != null)
0510: getIndexedField(defaultSearchFieldName);
0511: log.info("default search field is "
0512: + defaultSearchFieldName);
0513: }
0514:
0515: node = (Node) xpath.evaluate(
0516: "/schema/solrQueryParser/@defaultOperator",
0517: document, XPathConstants.NODE);
0518: if (node == null) {
0519: log.fine("using default query parser operator (OR)");
0520: } else {
0521: queryParserDefaultOperator = node.getNodeValue().trim();
0522: log.info("query parser default operator is "
0523: + queryParserDefaultOperator);
0524: }
0525:
0526: node = (Node) xpath.evaluate("/schema/uniqueKey/text()",
0527: document, XPathConstants.NODE);
0528: if (node == null) {
0529: log.warning("no uniqueKey specified in schema.");
0530: } else {
0531: uniqueKeyField = getIndexedField(node.getNodeValue()
0532: .trim());
0533: uniqueKeyFieldName = uniqueKeyField.getName();
0534: uniqueKeyFieldType = uniqueKeyField.getType();
0535: log.info("unique key field: " + uniqueKeyFieldName);
0536:
0537: // Unless the uniqueKeyField is marked 'required=false' then make sure it exists
0538: if (Boolean.FALSE != explicitRequiredProp
0539: .get(uniqueKeyFieldName)) {
0540: uniqueKeyField.required = true;
0541: requiredFields.add(uniqueKeyField);
0542: }
0543: }
0544:
0545: /////////////// parse out copyField commands ///////////////
0546: // Map<String,ArrayList<SchemaField>> cfields = new HashMap<String,ArrayList<SchemaField>>();
0547: // expression = "/schema/copyField";
0548:
0549: ArrayList<DynamicCopy> dCopies = new ArrayList<DynamicCopy>();
0550:
0551: expression = "//copyField";
0552: nodes = (NodeList) xpath.evaluate(expression, document,
0553: XPathConstants.NODESET);
0554:
0555: for (int i = 0; i < nodes.getLength(); i++) {
0556: node = nodes.item(i);
0557: NamedNodeMap attrs = node.getAttributes();
0558:
0559: String source = DOMUtil.getAttr(attrs, "source",
0560: "copyField definition");
0561: String dest = DOMUtil.getAttr(attrs, "dest",
0562: "copyField definition");
0563:
0564: boolean sourceIsPattern = isWildCard(source);
0565: boolean destIsPattern = isWildCard(dest);
0566:
0567: log.fine("copyField source='" + source + "' dest='"
0568: + dest + "'");
0569: SchemaField d = getField(dest);
0570:
0571: if (sourceIsPattern) {
0572: if (destIsPattern) {
0573: DynamicField df = null;
0574: for (DynamicField dd : dynamicFields) {
0575: if (dd.regex.equals(dest)) {
0576: df = dd;
0577: break;
0578: }
0579: }
0580: if (df == null) {
0581: throw new SolrException(
0582: SolrException.ErrorCode.SERVER_ERROR,
0583: "copyField dynamic destination must match a dynamicField.");
0584: }
0585: dCopies.add(new DynamicDestCopy(source, df));
0586: } else {
0587: dCopies.add(new DynamicCopy(source, d));
0588: }
0589: } else if (destIsPattern) {
0590: String msg = "copyField only supports a dynamic destination if the source is also dynamic";
0591: throw new SolrException(
0592: SolrException.ErrorCode.SERVER_ERROR, msg);
0593: } else {
0594: // retrieve the field to force an exception if it doesn't exist
0595: SchemaField f = getField(source);
0596:
0597: SchemaField[] destArr = copyFields.get(source);
0598: if (destArr == null) {
0599: destArr = new SchemaField[] { d };
0600: } else {
0601: destArr = (SchemaField[]) append(destArr, d);
0602: }
0603: copyFields.put(source, destArr);
0604: }
0605: }
0606:
0607: log.finest("Dynamic Copied Fields:" + dCopies);
0608:
0609: // stuff it in a normal array for faster access
0610: dynamicCopyFields = (DynamicCopy[]) dCopies
0611: .toArray(new DynamicCopy[dCopies.size()]);
0612:
0613: } catch (SolrException e) {
0614: SolrConfig.severeErrors.add(e);
0615: throw e;
0616: } catch (Exception e) {
0617: // unexpected exception...
0618: SolrConfig.severeErrors.add(e);
0619: throw new SolrException(
0620: SolrException.ErrorCode.SERVER_ERROR,
0621: "Schema Parsing Failed", e, false);
0622: }
0623:
0624: analyzer = new SolrIndexAnalyzer();
0625: queryAnalyzer = new SolrQueryAnalyzer();
0626: }
0627:
0628: private static Object[] append(Object[] orig, Object item) {
0629: Object[] newArr = (Object[]) java.lang.reflect.Array
0630: .newInstance(orig.getClass().getComponentType(),
0631: orig.length + 1);
0632: System.arraycopy(orig, 0, newArr, 0, orig.length);
0633: newArr[orig.length] = item;
0634: return newArr;
0635: }
0636:
0637: //
0638: // <analyzer><tokenizer class="...."/><tokenizer class="...." arg="....">
0639: //
0640: //
0641: private Analyzer readAnalyzer(Node node)
0642: throws XPathExpressionException {
0643: // parent node used to be passed in as "fieldtype"
0644: // if (!fieldtype.hasChildNodes()) return null;
0645: // Node node = DOMUtil.getChild(fieldtype,"analyzer");
0646:
0647: if (node == null)
0648: return null;
0649: NamedNodeMap attrs = node.getAttributes();
0650: String analyzerName = DOMUtil.getAttr(attrs, "class");
0651: if (analyzerName != null) {
0652: return (Analyzer) Config.newInstance(analyzerName);
0653: }
0654:
0655: XPath xpath = XPathFactory.newInstance().newXPath();
0656: Node tokNode = (Node) xpath.evaluate("./tokenizer", node,
0657: XPathConstants.NODE);
0658: NodeList nList = (NodeList) xpath.evaluate("./filter", node,
0659: XPathConstants.NODESET);
0660:
0661: if (tokNode == null) {
0662: throw new SolrException(
0663: SolrException.ErrorCode.SERVER_ERROR,
0664: "analyzer without class or tokenizer & filter list");
0665: }
0666: TokenizerFactory tfac = readTokenizerFactory(tokNode);
0667:
0668: /******
0669: // oops, getChildNodes() includes text (newlines, etc) in addition
0670: // to the actual child elements
0671: NodeList nList = node.getChildNodes();
0672: TokenizerFactory tfac = readTokenizerFactory(nList.item(0));
0673: if (tfac==null) {
0674: throw new SolrException( SolrException.StatusCode.SERVER_ERROR,"TokenizerFactory must be specified first in analyzer");
0675: }
0676: ******/
0677:
0678: ArrayList<TokenFilterFactory> filters = new ArrayList<TokenFilterFactory>();
0679: for (int i = 0; i < nList.getLength(); i++) {
0680: TokenFilterFactory filt = readTokenFilterFactory(nList
0681: .item(i));
0682: if (filt != null)
0683: filters.add(filt);
0684: }
0685:
0686: return new TokenizerChain(tfac, filters
0687: .toArray(new TokenFilterFactory[filters.size()]));
0688: };
0689:
0690: // <tokenizer class="solr.StandardFilterFactory"/>
0691: private TokenizerFactory readTokenizerFactory(Node node) {
0692: // if (node.getNodeName() != "tokenizer") return null;
0693: NamedNodeMap attrs = node.getAttributes();
0694: String className = DOMUtil.getAttr(attrs, "class", "tokenizer");
0695: TokenizerFactory tfac = (TokenizerFactory) Config
0696: .newInstance(className);
0697: tfac.init(DOMUtil.toMapExcept(attrs, "class"));
0698: return tfac;
0699: }
0700:
0701: // <tokenizer class="solr.StandardFilterFactory"/>
0702: private TokenFilterFactory readTokenFilterFactory(Node node) {
0703: // if (node.getNodeName() != "filter") return null;
0704: NamedNodeMap attrs = node.getAttributes();
0705: String className = DOMUtil.getAttr(attrs, "class",
0706: "token filter");
0707: TokenFilterFactory tfac = (TokenFilterFactory) Config
0708: .newInstance(className);
0709: tfac.init(DOMUtil.toMapExcept(attrs, "class"));
0710: return tfac;
0711: }
0712:
0713: static abstract class DynamicReplacement implements
0714: Comparable<DynamicReplacement> {
0715: final static int STARTS_WITH = 1;
0716: final static int ENDS_WITH = 2;
0717:
0718: final String regex;
0719: final int type;
0720:
0721: final String str;
0722:
0723: protected DynamicReplacement(String regex) {
0724: this .regex = regex;
0725: if (regex.startsWith("*")) {
0726: type = ENDS_WITH;
0727: str = regex.substring(1);
0728: } else if (regex.endsWith("*")) {
0729: type = STARTS_WITH;
0730: str = regex.substring(0, regex.length() - 1);
0731: } else {
0732: throw new RuntimeException(
0733: "dynamic field name must start or end with *");
0734: }
0735: }
0736:
0737: public boolean matches(String name) {
0738: if (type == STARTS_WITH && name.startsWith(str))
0739: return true;
0740: else if (type == ENDS_WITH && name.endsWith(str))
0741: return true;
0742: else
0743: return false;
0744: }
0745:
0746: /**
0747: * Sort order is based on length of regex. Longest comes first.
0748: * @param other The object to compare to.
0749: * @return a negative integer, zero, or a positive integer
0750: * as this object is less than, equal to, or greater than
0751: * the specified object.
0752: */
0753: public int compareTo(DynamicReplacement other) {
0754: return other.regex.length() - regex.length();
0755: }
0756: }
0757:
0758: //
0759: // Instead of storing a type, this could be implemented as a hierarchy
0760: // with a virtual matches().
0761: // Given how often a search will be done, however, speed is the overriding
0762: // concern and I'm not sure which is faster.
0763: //
0764: final static class DynamicField extends DynamicReplacement {
0765: final SchemaField prototype;
0766:
0767: DynamicField(SchemaField prototype) {
0768: super (prototype.name);
0769: this .prototype = prototype;
0770: }
0771:
0772: SchemaField makeSchemaField(String name) {
0773: // could have a cache instead of returning a new one each time, but it might
0774: // not be worth it.
0775: // Actually, a higher level cache could be worth it to avoid too many
0776: // .startsWith() and .endsWith() comparisons. it depends on how many
0777: // dynamic fields there are.
0778: return new SchemaField(prototype, name);
0779: }
0780:
0781: public String toString() {
0782: return prototype.toString();
0783: }
0784: }
0785:
0786: static class DynamicCopy extends DynamicReplacement {
0787: final SchemaField targetField;
0788:
0789: DynamicCopy(String regex, SchemaField targetField) {
0790: super (regex);
0791: this .targetField = targetField;
0792: }
0793:
0794: public SchemaField getTargetField(String sourceField) {
0795: return targetField;
0796: }
0797:
0798: @Override
0799: public String toString() {
0800: return targetField.toString();
0801: }
0802: }
0803:
0804: static class DynamicDestCopy extends DynamicCopy {
0805: final DynamicField dynamic;
0806:
0807: final int dtype;
0808: final String dstr;
0809:
0810: DynamicDestCopy(String source, DynamicField dynamic) {
0811: super (source, dynamic.prototype);
0812: this .dynamic = dynamic;
0813:
0814: String dest = dynamic.regex;
0815: if (dest.startsWith("*")) {
0816: dtype = ENDS_WITH;
0817: dstr = dest.substring(1);
0818: } else if (dest.endsWith("*")) {
0819: dtype = STARTS_WITH;
0820: dstr = dest.substring(0, dest.length() - 1);
0821: } else {
0822: throw new RuntimeException(
0823: "dynamic copyField destination name must start or end with *");
0824: }
0825: }
0826:
0827: @Override
0828: public SchemaField getTargetField(String sourceField) {
0829: String dyn = (type == STARTS_WITH) ? sourceField
0830: .substring(str.length()) : sourceField.substring(0,
0831: sourceField.length() - str.length());
0832:
0833: String name = (dtype == STARTS_WITH) ? (dstr + dyn)
0834: : (dyn + dstr);
0835: return dynamic.makeSchemaField(name);
0836: }
0837:
0838: @Override
0839: public String toString() {
0840: return targetField.toString();
0841: }
0842: }
0843:
0844: private DynamicField[] dynamicFields;
0845:
0846: /**
0847: * Does the schema have the specified field defined explicitly, i.e.
0848: * not as a result of a copyField declaration with a wildcard? We
0849: * consider it explicitly defined if it matches a field or dynamicField
0850: * declaration.
0851: * @param fieldName
0852: * @return true if explicitly declared in the schema.
0853: */
0854: public boolean hasExplicitField(String fieldName) {
0855: if (fields.containsKey(fieldName)) {
0856: return true;
0857: }
0858:
0859: for (DynamicField df : dynamicFields) {
0860: if (df.matches(fieldName))
0861: return true;
0862: }
0863:
0864: return false;
0865: }
0866:
0867: /**
0868: * Returns the SchemaField that should be used for the specified field name, or
0869: * null if none exists.
0870: *
0871: * @param fieldName may be an explicitly defined field, or a name that
0872: * matches a dynamic field.
0873: * @see #getFieldType
0874: */
0875: public SchemaField getFieldOrNull(String fieldName) {
0876: SchemaField f = fields.get(fieldName);
0877: if (f != null)
0878: return f;
0879:
0880: for (DynamicField df : dynamicFields) {
0881: if (df.matches(fieldName))
0882: return df.makeSchemaField(fieldName);
0883: }
0884:
0885: return f;
0886: }
0887:
0888: /**
0889: * Returns the SchemaField that should be used for the specified field name
0890: *
0891: * @param fieldName may be an explicitly defined field, or a name that
0892: * matches a dynamic field.
0893: * @throws SolrException if no such field exists
0894: * @see #getFieldType
0895: */
0896: public SchemaField getField(String fieldName) {
0897: SchemaField f = fields.get(fieldName);
0898: if (f != null)
0899: return f;
0900:
0901: for (DynamicField df : dynamicFields) {
0902: if (df.matches(fieldName))
0903: return df.makeSchemaField(fieldName);
0904: }
0905:
0906: // Hmmm, default field could also be implemented with a dynamic field of "*".
0907: // It would have to be special-cased and only used if nothing else matched.
0908: /*** REMOVED -YCS
0909: if (defaultFieldType != null) return new SchemaField(fieldName,defaultFieldType);
0910: ***/
0911: throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
0912: "undefined field " + fieldName);
0913: }
0914:
0915: /**
0916: * Returns the FieldType for the specified field name.
0917: *
0918: * <p>
0919: * This method exists because it can be more efficient then
0920: * {@link #getField} for dynamic fields if a full SchemaField isn't needed.
0921: * </p>
0922: *
0923: * @param fieldName may be an explicitly created field, or a name that
0924: * excercies a dynamic field.
0925: * @throws SolrException if no such field exists
0926: * @see #getField(String)
0927: * @see #getFieldTypeNoEx
0928: */
0929: public FieldType getFieldType(String fieldName) {
0930: SchemaField f = fields.get(fieldName);
0931: if (f != null)
0932: return f.getType();
0933:
0934: return getDynamicFieldType(fieldName);
0935: }
0936:
0937: /**
0938: * Returns the FieldType for the specified field name.
0939: *
0940: * <p>
0941: * This method exists because it can be more efficient then
0942: * {@link #getField} for dynamic fields if a full SchemaField isn't needed.
0943: * </p>
0944: *
0945: * @param fieldName may be an explicitly created field, or a name that
0946: * excercies a dynamic field.
0947: * @return null if field is not defined.
0948: * @see #getField(String)
0949: * @see #getFieldTypeNoEx
0950: */
0951: public FieldType getFieldTypeNoEx(String fieldName) {
0952: SchemaField f = fields.get(fieldName);
0953: if (f != null)
0954: return f.getType();
0955: return dynFieldType(fieldName);
0956: }
0957:
0958: /**
0959: * Returns the FieldType of the best matching dynamic field for
0960: * the specified field name
0961: *
0962: * @param fieldName may be an explicitly created field, or a name that
0963: * excercies a dynamic field.
0964: * @throws SolrException if no such field exists
0965: * @see #getField(String)
0966: * @see #getFieldTypeNoEx
0967: */
0968: public FieldType getDynamicFieldType(String fieldName) {
0969: for (DynamicField df : dynamicFields) {
0970: if (df.matches(fieldName))
0971: return df.prototype.getType();
0972: }
0973: throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
0974: "undefined field " + fieldName);
0975: }
0976:
0977: private FieldType dynFieldType(String fieldName) {
0978: for (DynamicField df : dynamicFields) {
0979: if (df.matches(fieldName))
0980: return df.prototype.getType();
0981: }
0982: return null;
0983: };
0984:
0985: private final Map<String, SchemaField[]> copyFields = new HashMap<String, SchemaField[]>();
0986: private DynamicCopy[] dynamicCopyFields;
0987:
0988: /**
0989: * Get all copy fields, both the static and the dynamic ones.
0990: * @param sourceField
0991: * @return Array of fields to copy to.
0992: */
0993: public SchemaField[] getCopyFields(String sourceField) {
0994: // Get the dynamic ones into a list.
0995: List<SchemaField> matchCopyFields = new ArrayList<SchemaField>();
0996:
0997: for (DynamicCopy dynamicCopy : dynamicCopyFields) {
0998: if (dynamicCopy.matches(sourceField)) {
0999: matchCopyFields.add(dynamicCopy
1000: .getTargetField(sourceField));
1001: }
1002: }
1003:
1004: // Get the fixed ones, if there are any.
1005: SchemaField[] fixedCopyFields = copyFields.get(sourceField);
1006:
1007: boolean appendFixed = copyFields.containsKey(sourceField);
1008:
1009: // Construct the results by concatenating dynamic and fixed into a results array.
1010:
1011: SchemaField[] results = new SchemaField[matchCopyFields.size()
1012: + (appendFixed ? fixedCopyFields.length : 0)];
1013:
1014: matchCopyFields.toArray(results);
1015:
1016: if (appendFixed) {
1017: System.arraycopy(fixedCopyFields, 0, results,
1018: matchCopyFields.size(), fixedCopyFields.length);
1019: }
1020:
1021: return results;
1022: }
1023:
1024: /**
1025: * Is the given field name a wildcard? I.e. does it begin or end with *?
1026: * @param name
1027: * @return true/false
1028: */
1029: private static boolean isWildCard(String name) {
1030: return name.startsWith("*") || name.endsWith("*");
1031: }
1032:
1033: }
|