Source Code Cross Referenced for IndexSchema.java in  » Search-Engine » apache-solr-1.2.0 » org » apache » solr » schema » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Search Engine » apache solr 1.2.0 » org.apache.solr.schema 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


0001:        /**
0002:         * Licensed to the Apache Software Foundation (ASF) under one or more
0003:         * contributor license agreements.  See the NOTICE file distributed with
0004:         * this work for additional information regarding copyright ownership.
0005:         * The ASF licenses this file to You under the Apache License, Version 2.0
0006:         * (the "License"); you may not use this file except in compliance with
0007:         * the License.  You may obtain a copy of the License at
0008:         *
0009:         *     http://www.apache.org/licenses/LICENSE-2.0
0010:         *
0011:         * Unless required by applicable law or agreed to in writing, software
0012:         * distributed under the License is distributed on an "AS IS" BASIS,
0013:         * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014:         * See the License for the specific language governing permissions and
0015:         * limitations under the License.
0016:         */package org.apache.solr.schema;
0017:
0018:        import org.apache.lucene.analysis.Analyzer;
0019:        import org.apache.lucene.analysis.TokenStream;
0020:        import org.apache.lucene.document.Fieldable;
0021:        import org.apache.lucene.search.DefaultSimilarity;
0022:        import org.apache.lucene.search.Similarity;
0023:        import org.apache.lucene.queryParser.QueryParser;
0024:        import org.apache.solr.core.SolrConfig;
0025:        import org.apache.solr.core.SolrException;
0026:        import org.apache.solr.core.Config;
0027:        import org.apache.solr.analysis.TokenFilterFactory;
0028:        import org.apache.solr.analysis.TokenizerChain;
0029:        import org.apache.solr.analysis.TokenizerFactory;
0030:        import org.apache.solr.search.SolrQueryParser;
0031:        import org.apache.solr.util.DOMUtil;
0032:        import org.w3c.dom.Document;
0033:        import org.w3c.dom.NamedNodeMap;
0034:        import org.w3c.dom.Node;
0035:        import org.w3c.dom.NodeList;
0036:
0037:        import javax.xml.xpath.XPath;
0038:        import javax.xml.xpath.XPathConstants;
0039:        import javax.xml.xpath.XPathExpressionException;
0040:        import javax.xml.xpath.XPathFactory;
0041:        import java.io.InputStream;
0042:        import java.io.Reader;
0043:        import java.util.*;
0044:        import java.util.logging.Logger;
0045:
0046:        /**
0047:         * <code>IndexSchema</code> contains information about the valid fields in an index
0048:         * and the types of those fields.
0049:         *
0050:         * @author yonik
0051:         * @version $Id: IndexSchema.java 542679 2007-05-29 22:28:21Z ryan $
0052:         */
0053:
0054:        public final class IndexSchema {
0055:            final static Logger log = Logger.getLogger(IndexSchema.class
0056:                    .getName());
0057:
0058:            private final String schemaFile;
0059:            private String name;
0060:            private float version;
0061:
0062:            /**
0063:             * Constructs a schema using the specified file name using the normal
0064:             * Config path directory searching rules.
0065:             *
0066:             * @see Config#openResource
0067:             */
0068:            public IndexSchema(String schemaFile) {
0069:                this .schemaFile = schemaFile;
0070:                readConfig();
0071:            }
0072:
0073:            /**
0074:             * Direct acess to the InputStream for the schemaFile used by this instance.
0075:             *
0076:             * @see Config#openResource
0077:             */
0078:            public InputStream getInputStream() {
0079:                return Config.openResource(schemaFile);
0080:            }
0081:
0082:            float getVersion() {
0083:                return version;
0084:            }
0085:
0086:            /** The Name of this schema (as specified in the schema file) */
0087:            public String getName() {
0088:                return name;
0089:            }
0090:
0091:            private final HashMap<String, SchemaField> fields = new HashMap<String, SchemaField>();
0092:            private final HashMap<String, FieldType> fieldTypes = new HashMap<String, FieldType>();
0093:            private final List<SchemaField> fieldsWithDefaultValue = new ArrayList<SchemaField>();
0094:            private final Collection<SchemaField> requiredFields = new HashSet<SchemaField>();
0095:
0096:            /**
0097:             * Provides direct access to the Map containing all explicit
0098:             * (ie: non-dynamic) fields in the index, keyed on field name.
0099:             *
0100:             * <p>
0101:             * Modifying this Map (or any item in it) will affect the real schema
0102:             * </p>
0103:             */
0104:            public Map<String, SchemaField> getFields() {
0105:                return fields;
0106:            }
0107:
0108:            /**
0109:             * Provides direct access to the Map containing all Field Types
0110:             * in the index, keyed on fild type name.
0111:             *
0112:             * <p>
0113:             * Modifying this Map (or any item in it) will affect the real schema
0114:             * </p>
0115:             */
0116:            public Map<String, FieldType> getFieldTypes() {
0117:                return fieldTypes;
0118:            }
0119:
0120:            /**
0121:             * Provides direct access to the List containing all fields with a default value
0122:             */
0123:            public List<SchemaField> getFieldsWithDefaultValue() {
0124:                return fieldsWithDefaultValue;
0125:            }
0126:
0127:            /**
0128:             * Provides direct access to the List containing all required fields.  This
0129:             * list contains all fields with default values.
0130:             */
0131:            public Collection<SchemaField> getRequiredFields() {
0132:                return requiredFields;
0133:            }
0134:
0135:            private Similarity similarity;
0136:
0137:            /**
0138:             * Returns the Similarity used for this index
0139:             */
0140:            public Similarity getSimilarity() {
0141:                return similarity;
0142:            }
0143:
0144:            private Analyzer analyzer;
0145:
0146:            /**
0147:             * Returns the Analyzer used when indexing documents for this index
0148:             *
0149:             * <p>
0150:             * This Analyzer is field (and dynamic field) name aware, and delegates to
0151:             * a field specific Analyzer based on the field type.
0152:             * </p>
0153:             */
0154:            public Analyzer getAnalyzer() {
0155:                return analyzer;
0156:            }
0157:
0158:            private Analyzer queryAnalyzer;
0159:
0160:            /**
0161:             * Returns the Analyzer used when searching this index
0162:             *
0163:             * <p>
0164:             * This Analyzer is field (and dynamic field) name aware, and delegates to
0165:             * a field specific Analyzer based on the field type.
0166:             * </p>
0167:             */
0168:            public Analyzer getQueryAnalyzer() {
0169:                return queryAnalyzer;
0170:            }
0171:
0172:            private String defaultSearchFieldName = null;
0173:            private String queryParserDefaultOperator = "OR";
0174:
0175:            /**
0176:             * A SolrQueryParser linked to this IndexSchema for field datatype
0177:             * information, and populated with default options from the
0178:             * &lt;solrQueryParser&gt; configuration for this IndexSchema.
0179:             *
0180:             * @param defaultField if non-null overrides the schema default
0181:             */
0182:            public SolrQueryParser getSolrQueryParser(String defaultField) {
0183:                SolrQueryParser qp = new SolrQueryParser(this , defaultField);
0184:                String operator = getQueryParserDefaultOperator();
0185:                qp
0186:                        .setDefaultOperator("AND".equals(operator) ? QueryParser.Operator.AND
0187:                                : QueryParser.Operator.OR);
0188:                return qp;
0189:            }
0190:
0191:            /**
0192:             * Name of the default search field specified in the schema file
0193:             * @deprecated use getSolrQueryParser().getField()
0194:             */
0195:            public String getDefaultSearchFieldName() {
0196:                return defaultSearchFieldName;
0197:            }
0198:
0199:            /**
0200:             * default operator ("AND" or "OR") for QueryParser
0201:             * @deprecated use getSolrQueryParser().getDefaultOperator()
0202:             */
0203:            public String getQueryParserDefaultOperator() {
0204:                return queryParserDefaultOperator;
0205:            }
0206:
0207:            private SchemaField uniqueKeyField;
0208:
0209:            /**
0210:             * Unique Key field specified in the schema file
0211:             * @return null if this schema has no unique key field
0212:             */
0213:            public SchemaField getUniqueKeyField() {
0214:                return uniqueKeyField;
0215:            }
0216:
0217:            private String uniqueKeyFieldName;
0218:            private FieldType uniqueKeyFieldType;
0219:
0220:            /**
0221:             * The raw (field type encoded) value of the Unique Key field for
0222:             * the specified Document
0223:             * @return null if this schema has no unique key field
0224:             * @see #printableUniqueKey
0225:             */
0226:            public Fieldable getUniqueKeyField(
0227:                    org.apache.lucene.document.Document doc) {
0228:                return doc.getFieldable(uniqueKeyFieldName); // this should return null if name is null
0229:            }
0230:
0231:            /**
0232:             * The printable value of the Unique Key field for
0233:             * the specified Document
0234:             * @return null if this schema has no unique key field
0235:             */
0236:            public String printableUniqueKey(
0237:                    org.apache.lucene.document.Document doc) {
0238:                Fieldable f = doc.getFieldable(uniqueKeyFieldName);
0239:                return f == null ? null : uniqueKeyFieldType.toExternal(f);
0240:            }
0241:
0242:            private SchemaField getIndexedField(String fname) {
0243:                SchemaField f = getFields().get(fname);
0244:                if (f == null) {
0245:                    throw new RuntimeException("unknown field '" + fname + "'");
0246:                }
0247:                if (!f.indexed()) {
0248:                    throw new RuntimeException("'" + fname
0249:                            + "' is not an indexed field:" + f);
0250:                }
0251:                return f;
0252:            }
0253:
0254:            private class SolrIndexAnalyzer extends Analyzer {
0255:                protected final HashMap<String, Analyzer> analyzers;
0256:
0257:                SolrIndexAnalyzer() {
0258:                    analyzers = analyzerCache();
0259:                }
0260:
0261:                protected HashMap<String, Analyzer> analyzerCache() {
0262:                    HashMap<String, Analyzer> cache = new HashMap<String, Analyzer>();
0263:                    for (SchemaField f : getFields().values()) {
0264:                        Analyzer analyzer = f.getType().getAnalyzer();
0265:                        cache.put(f.getName(), analyzer);
0266:                    }
0267:                    return cache;
0268:                }
0269:
0270:                protected Analyzer getAnalyzer(String fieldName) {
0271:                    Analyzer analyzer = analyzers.get(fieldName);
0272:                    return analyzer != null ? analyzer : getDynamicFieldType(
0273:                            fieldName).getAnalyzer();
0274:                }
0275:
0276:                public TokenStream tokenStream(String fieldName, Reader reader) {
0277:                    return getAnalyzer(fieldName)
0278:                            .tokenStream(fieldName, reader);
0279:                }
0280:
0281:                public int getPositionIncrementGap(String fieldName) {
0282:                    return getAnalyzer(fieldName).getPositionIncrementGap(
0283:                            fieldName);
0284:                }
0285:            }
0286:
0287:            private class SolrQueryAnalyzer extends SolrIndexAnalyzer {
0288:                protected HashMap<String, Analyzer> analyzerCache() {
0289:                    HashMap<String, Analyzer> cache = new HashMap<String, Analyzer>();
0290:                    for (SchemaField f : getFields().values()) {
0291:                        Analyzer analyzer = f.getType().getQueryAnalyzer();
0292:                        cache.put(f.getName(), analyzer);
0293:                    }
0294:                    return cache;
0295:                }
0296:
0297:                protected Analyzer getAnalyzer(String fieldName) {
0298:                    Analyzer analyzer = analyzers.get(fieldName);
0299:                    return analyzer != null ? analyzer : getDynamicFieldType(
0300:                            fieldName).getQueryAnalyzer();
0301:                }
0302:            }
0303:
0304:            private void readConfig() {
0305:                log.info("Reading Solr Schema");
0306:
0307:                try {
0308:                    /***
0309:                    DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
0310:                    Document document = builder.parse(getInputStream());
0311:                     ***/
0312:
0313:                    Config config = new Config("schema", getInputStream(),
0314:                            "/schema/");
0315:                    Document document = config.getDocument();
0316:                    XPath xpath = config.getXPath();
0317:
0318:                    Node nd = (Node) xpath.evaluate("/schema/@name", document,
0319:                            XPathConstants.NODE);
0320:                    if (nd == null) {
0321:                        log.warning("schema has no name!");
0322:                    } else {
0323:                        name = nd.getNodeValue();
0324:                        log.info("Schema name=" + name);
0325:                    }
0326:
0327:                    version = config.getFloat("/schema/@version", 1.0f);
0328:
0329:                    String expression = "/schema/types/fieldtype | /schema/types/fieldType";
0330:                    NodeList nodes = (NodeList) xpath.evaluate(expression,
0331:                            document, XPathConstants.NODESET);
0332:
0333:                    for (int i = 0; i < nodes.getLength(); i++) {
0334:                        Node node = nodes.item(i);
0335:                        NamedNodeMap attrs = node.getAttributes();
0336:
0337:                        String name = DOMUtil.getAttr(attrs, "name",
0338:                                "fieldtype error");
0339:                        log.finest("reading fieldtype " + name);
0340:                        String clsName = DOMUtil.getAttr(attrs, "class",
0341:                                "fieldtype error");
0342:                        FieldType ft = (FieldType) Config.newInstance(clsName);
0343:                        ft.setTypeName(name);
0344:
0345:                        expression = "./analyzer[@type='query']";
0346:                        Node anode = (Node) xpath.evaluate(expression, node,
0347:                                XPathConstants.NODE);
0348:                        Analyzer queryAnalyzer = readAnalyzer(anode);
0349:
0350:                        // An analyzer without a type specified, or with type="index"
0351:                        expression = "./analyzer[not(@type)] | ./analyzer[@type='index']";
0352:                        anode = (Node) xpath.evaluate(expression, node,
0353:                                XPathConstants.NODE);
0354:                        Analyzer analyzer = readAnalyzer(anode);
0355:
0356:                        if (queryAnalyzer == null)
0357:                            queryAnalyzer = analyzer;
0358:                        if (analyzer == null)
0359:                            analyzer = queryAnalyzer;
0360:                        if (analyzer != null) {
0361:                            ft.setAnalyzer(analyzer);
0362:                            ft.setQueryAnalyzer(queryAnalyzer);
0363:                        }
0364:
0365:                        ft.setArgs(this , DOMUtil.toMapExcept(attrs, "name",
0366:                                "class"));
0367:                        FieldType old = fieldTypes.put(ft.typeName, ft);
0368:                        if (old != null) {
0369:                            String msg = "[schema.xml] Duplicate fieldType definition for '"
0370:                                    + ft.typeName
0371:                                    + "' ignoring: "
0372:                                    + old.toString();
0373:
0374:                            Throwable t = new SolrException(
0375:                                    SolrException.ErrorCode.SERVER_ERROR, msg);
0376:                            SolrException.logOnce(log, null, t);
0377:                            SolrConfig.severeErrors.add(t);
0378:                        }
0379:                        log.finest("fieldtype defined: " + ft);
0380:                    }
0381:
0382:                    // Hang on to the fields that say if they are required -- this lets us set a reasonable default for the unique key
0383:                    Map<String, Boolean> explicitRequiredProp = new HashMap<String, Boolean>();
0384:                    ArrayList<DynamicField> dFields = new ArrayList<DynamicField>();
0385:                    expression = "/schema/fields/field | /schema/fields/dynamicField";
0386:                    nodes = (NodeList) xpath.evaluate(expression, document,
0387:                            XPathConstants.NODESET);
0388:
0389:                    for (int i = 0; i < nodes.getLength(); i++) {
0390:                        Node node = nodes.item(i);
0391:
0392:                        NamedNodeMap attrs = node.getAttributes();
0393:
0394:                        String name = DOMUtil.getAttr(attrs, "name",
0395:                                "field definition");
0396:                        log.finest("reading field def " + name);
0397:                        String type = DOMUtil.getAttr(attrs, "type", "field "
0398:                                + name);
0399:                        String val;
0400:
0401:                        FieldType ft = fieldTypes.get(type);
0402:                        if (ft == null) {
0403:                            throw new SolrException(
0404:                                    SolrException.ErrorCode.BAD_REQUEST,
0405:                                    "Unknown fieldtype '" + type + "'", false);
0406:                        }
0407:
0408:                        Map<String, String> args = DOMUtil.toMapExcept(attrs,
0409:                                "name", "type");
0410:                        if (args.get("required") != null) {
0411:                            explicitRequiredProp.put(name, Boolean.valueOf(args
0412:                                    .get("required")));
0413:                        }
0414:
0415:                        SchemaField f = SchemaField.create(name, ft, args);
0416:
0417:                        if (node.getNodeName().equals("field")) {
0418:                            SchemaField old = fields.put(f.getName(), f);
0419:                            if (old != null) {
0420:                                String msg = "[schema.xml] Duplicate field definition for '"
0421:                                        + f.getName()
0422:                                        + "' ignoring: "
0423:                                        + old.toString();
0424:
0425:                                Throwable t = new SolrException(
0426:                                        SolrException.ErrorCode.SERVER_ERROR,
0427:                                        msg);
0428:                                SolrException.logOnce(log, null, t);
0429:                                SolrConfig.severeErrors.add(t);
0430:                            }
0431:
0432:                            log.fine("field defined: " + f);
0433:                            if (f.getDefaultValue() != null) {
0434:                                log.fine(name + " contains default value: "
0435:                                        + f.getDefaultValue());
0436:                                fieldsWithDefaultValue.add(f);
0437:                            }
0438:                            if (f.isRequired()) {
0439:                                log.fine(name + " is required in this schema");
0440:                                requiredFields.add(f);
0441:                            }
0442:                        } else if (node.getNodeName().equals("dynamicField")) {
0443:                            // make sure nothing else has the same path
0444:                            boolean dup = false;
0445:                            for (DynamicField df : dFields) {
0446:                                if (df.regex.equals(f.name)) {
0447:                                    String msg = "[schema.xml] Duplicate DynamicField definition for '"
0448:                                            + f.getName()
0449:                                            + "' ignoring: "
0450:                                            + f.toString();
0451:
0452:                                    Throwable t = new SolrException(
0453:                                            SolrException.ErrorCode.SERVER_ERROR,
0454:                                            msg);
0455:                                    SolrException.logOnce(log, null, t);
0456:                                    SolrConfig.severeErrors.add(t);
0457:                                    dup = true;
0458:                                    break;
0459:                                }
0460:                            }
0461:                            if (!dup) {
0462:                                dFields.add(new DynamicField(f));
0463:                                log.fine("dynamic field defined: " + f);
0464:                            }
0465:                        } else {
0466:                            // we should never get here
0467:                            throw new RuntimeException("Unknown field type");
0468:                        }
0469:                    }
0470:
0471:                    //fields with default values are by definition required
0472:                    //add them to required fields, and we only have to loop once
0473:                    // in DocumentBuilder.getDoc()
0474:                    requiredFields.addAll(getFieldsWithDefaultValue());
0475:
0476:                    // OK, now sort the dynamic fields largest to smallest size so we don't get
0477:                    // any false matches.  We want to act like a compiler tool and try and match
0478:                    // the largest string possible.
0479:                    Collections.sort(dFields);
0480:
0481:                    log.finest("Dynamic Field Ordering:" + dFields);
0482:
0483:                    // stuff it in a normal array for faster access
0484:                    dynamicFields = (DynamicField[]) dFields
0485:                            .toArray(new DynamicField[dFields.size()]);
0486:
0487:                    Node node = (Node) xpath.evaluate(
0488:                            "/schema/similarity/@class", document,
0489:                            XPathConstants.NODE);
0490:                    if (node == null) {
0491:                        similarity = new DefaultSimilarity();
0492:                        log.fine("using default similarity");
0493:                    } else {
0494:                        similarity = (Similarity) Config.newInstance(node
0495:                                .getNodeValue().trim());
0496:                        log.fine("using similarity "
0497:                                + similarity.getClass().getName());
0498:                    }
0499:
0500:                    node = (Node) xpath.evaluate(
0501:                            "/schema/defaultSearchField/text()", document,
0502:                            XPathConstants.NODE);
0503:                    if (node == null) {
0504:                        log
0505:                                .warning("no default search field specified in schema.");
0506:                    } else {
0507:                        defaultSearchFieldName = node.getNodeValue().trim();
0508:                        // throw exception if specified, but not found or not indexed
0509:                        if (defaultSearchFieldName != null)
0510:                            getIndexedField(defaultSearchFieldName);
0511:                        log.info("default search field is "
0512:                                + defaultSearchFieldName);
0513:                    }
0514:
0515:                    node = (Node) xpath.evaluate(
0516:                            "/schema/solrQueryParser/@defaultOperator",
0517:                            document, XPathConstants.NODE);
0518:                    if (node == null) {
0519:                        log.fine("using default query parser operator (OR)");
0520:                    } else {
0521:                        queryParserDefaultOperator = node.getNodeValue().trim();
0522:                        log.info("query parser default operator is "
0523:                                + queryParserDefaultOperator);
0524:                    }
0525:
0526:                    node = (Node) xpath.evaluate("/schema/uniqueKey/text()",
0527:                            document, XPathConstants.NODE);
0528:                    if (node == null) {
0529:                        log.warning("no uniqueKey specified in schema.");
0530:                    } else {
0531:                        uniqueKeyField = getIndexedField(node.getNodeValue()
0532:                                .trim());
0533:                        uniqueKeyFieldName = uniqueKeyField.getName();
0534:                        uniqueKeyFieldType = uniqueKeyField.getType();
0535:                        log.info("unique key field: " + uniqueKeyFieldName);
0536:
0537:                        // Unless the uniqueKeyField is marked 'required=false' then make sure it exists
0538:                        if (Boolean.FALSE != explicitRequiredProp
0539:                                .get(uniqueKeyFieldName)) {
0540:                            uniqueKeyField.required = true;
0541:                            requiredFields.add(uniqueKeyField);
0542:                        }
0543:                    }
0544:
0545:                    /////////////// parse out copyField commands ///////////////
0546:                    // Map<String,ArrayList<SchemaField>> cfields = new HashMap<String,ArrayList<SchemaField>>();
0547:                    // expression = "/schema/copyField";
0548:
0549:                    ArrayList<DynamicCopy> dCopies = new ArrayList<DynamicCopy>();
0550:
0551:                    expression = "//copyField";
0552:                    nodes = (NodeList) xpath.evaluate(expression, document,
0553:                            XPathConstants.NODESET);
0554:
0555:                    for (int i = 0; i < nodes.getLength(); i++) {
0556:                        node = nodes.item(i);
0557:                        NamedNodeMap attrs = node.getAttributes();
0558:
0559:                        String source = DOMUtil.getAttr(attrs, "source",
0560:                                "copyField definition");
0561:                        String dest = DOMUtil.getAttr(attrs, "dest",
0562:                                "copyField definition");
0563:
0564:                        boolean sourceIsPattern = isWildCard(source);
0565:                        boolean destIsPattern = isWildCard(dest);
0566:
0567:                        log.fine("copyField source='" + source + "' dest='"
0568:                                + dest + "'");
0569:                        SchemaField d = getField(dest);
0570:
0571:                        if (sourceIsPattern) {
0572:                            if (destIsPattern) {
0573:                                DynamicField df = null;
0574:                                for (DynamicField dd : dynamicFields) {
0575:                                    if (dd.regex.equals(dest)) {
0576:                                        df = dd;
0577:                                        break;
0578:                                    }
0579:                                }
0580:                                if (df == null) {
0581:                                    throw new SolrException(
0582:                                            SolrException.ErrorCode.SERVER_ERROR,
0583:                                            "copyField dynamic destination must match a dynamicField.");
0584:                                }
0585:                                dCopies.add(new DynamicDestCopy(source, df));
0586:                            } else {
0587:                                dCopies.add(new DynamicCopy(source, d));
0588:                            }
0589:                        } else if (destIsPattern) {
0590:                            String msg = "copyField only supports a dynamic destination if the source is also dynamic";
0591:                            throw new SolrException(
0592:                                    SolrException.ErrorCode.SERVER_ERROR, msg);
0593:                        } else {
0594:                            // retrieve the field to force an exception if it doesn't exist
0595:                            SchemaField f = getField(source);
0596:
0597:                            SchemaField[] destArr = copyFields.get(source);
0598:                            if (destArr == null) {
0599:                                destArr = new SchemaField[] { d };
0600:                            } else {
0601:                                destArr = (SchemaField[]) append(destArr, d);
0602:                            }
0603:                            copyFields.put(source, destArr);
0604:                        }
0605:                    }
0606:
0607:                    log.finest("Dynamic Copied Fields:" + dCopies);
0608:
0609:                    // stuff it in a normal array for faster access
0610:                    dynamicCopyFields = (DynamicCopy[]) dCopies
0611:                            .toArray(new DynamicCopy[dCopies.size()]);
0612:
0613:                } catch (SolrException e) {
0614:                    SolrConfig.severeErrors.add(e);
0615:                    throw e;
0616:                } catch (Exception e) {
0617:                    // unexpected exception...
0618:                    SolrConfig.severeErrors.add(e);
0619:                    throw new SolrException(
0620:                            SolrException.ErrorCode.SERVER_ERROR,
0621:                            "Schema Parsing Failed", e, false);
0622:                }
0623:
0624:                analyzer = new SolrIndexAnalyzer();
0625:                queryAnalyzer = new SolrQueryAnalyzer();
0626:            }
0627:
0628:            private static Object[] append(Object[] orig, Object item) {
0629:                Object[] newArr = (Object[]) java.lang.reflect.Array
0630:                        .newInstance(orig.getClass().getComponentType(),
0631:                                orig.length + 1);
0632:                System.arraycopy(orig, 0, newArr, 0, orig.length);
0633:                newArr[orig.length] = item;
0634:                return newArr;
0635:            }
0636:
0637:            //
0638:            // <analyzer><tokenizer class="...."/><tokenizer class="...." arg="....">
0639:            //
0640:            //
0641:            private Analyzer readAnalyzer(Node node)
0642:                    throws XPathExpressionException {
0643:                // parent node used to be passed in as "fieldtype"
0644:                // if (!fieldtype.hasChildNodes()) return null;
0645:                // Node node = DOMUtil.getChild(fieldtype,"analyzer");
0646:
0647:                if (node == null)
0648:                    return null;
0649:                NamedNodeMap attrs = node.getAttributes();
0650:                String analyzerName = DOMUtil.getAttr(attrs, "class");
0651:                if (analyzerName != null) {
0652:                    return (Analyzer) Config.newInstance(analyzerName);
0653:                }
0654:
0655:                XPath xpath = XPathFactory.newInstance().newXPath();
0656:                Node tokNode = (Node) xpath.evaluate("./tokenizer", node,
0657:                        XPathConstants.NODE);
0658:                NodeList nList = (NodeList) xpath.evaluate("./filter", node,
0659:                        XPathConstants.NODESET);
0660:
0661:                if (tokNode == null) {
0662:                    throw new SolrException(
0663:                            SolrException.ErrorCode.SERVER_ERROR,
0664:                            "analyzer without class or tokenizer & filter list");
0665:                }
0666:                TokenizerFactory tfac = readTokenizerFactory(tokNode);
0667:
0668:                /******
0669:                // oops, getChildNodes() includes text (newlines, etc) in addition
0670:                // to the actual child elements
0671:                NodeList nList = node.getChildNodes();
0672:                TokenizerFactory tfac = readTokenizerFactory(nList.item(0));
0673:                 if (tfac==null) {
0674:                   throw new SolrException( SolrException.StatusCode.SERVER_ERROR,"TokenizerFactory must be specified first in analyzer");
0675:                 }
0676:                 ******/
0677:
0678:                ArrayList<TokenFilterFactory> filters = new ArrayList<TokenFilterFactory>();
0679:                for (int i = 0; i < nList.getLength(); i++) {
0680:                    TokenFilterFactory filt = readTokenFilterFactory(nList
0681:                            .item(i));
0682:                    if (filt != null)
0683:                        filters.add(filt);
0684:                }
0685:
0686:                return new TokenizerChain(tfac, filters
0687:                        .toArray(new TokenFilterFactory[filters.size()]));
0688:            };
0689:
0690:            // <tokenizer class="solr.StandardFilterFactory"/>
0691:            private TokenizerFactory readTokenizerFactory(Node node) {
0692:                // if (node.getNodeName() != "tokenizer") return null;
0693:                NamedNodeMap attrs = node.getAttributes();
0694:                String className = DOMUtil.getAttr(attrs, "class", "tokenizer");
0695:                TokenizerFactory tfac = (TokenizerFactory) Config
0696:                        .newInstance(className);
0697:                tfac.init(DOMUtil.toMapExcept(attrs, "class"));
0698:                return tfac;
0699:            }
0700:
0701:            // <tokenizer class="solr.StandardFilterFactory"/>
0702:            private TokenFilterFactory readTokenFilterFactory(Node node) {
0703:                // if (node.getNodeName() != "filter") return null;
0704:                NamedNodeMap attrs = node.getAttributes();
0705:                String className = DOMUtil.getAttr(attrs, "class",
0706:                        "token filter");
0707:                TokenFilterFactory tfac = (TokenFilterFactory) Config
0708:                        .newInstance(className);
0709:                tfac.init(DOMUtil.toMapExcept(attrs, "class"));
0710:                return tfac;
0711:            }
0712:
0713:            static abstract class DynamicReplacement implements 
0714:                    Comparable<DynamicReplacement> {
0715:                final static int STARTS_WITH = 1;
0716:                final static int ENDS_WITH = 2;
0717:
0718:                final String regex;
0719:                final int type;
0720:
0721:                final String str;
0722:
0723:                protected DynamicReplacement(String regex) {
0724:                    this .regex = regex;
0725:                    if (regex.startsWith("*")) {
0726:                        type = ENDS_WITH;
0727:                        str = regex.substring(1);
0728:                    } else if (regex.endsWith("*")) {
0729:                        type = STARTS_WITH;
0730:                        str = regex.substring(0, regex.length() - 1);
0731:                    } else {
0732:                        throw new RuntimeException(
0733:                                "dynamic field name must start or end with *");
0734:                    }
0735:                }
0736:
0737:                public boolean matches(String name) {
0738:                    if (type == STARTS_WITH && name.startsWith(str))
0739:                        return true;
0740:                    else if (type == ENDS_WITH && name.endsWith(str))
0741:                        return true;
0742:                    else
0743:                        return false;
0744:                }
0745:
0746:                /**
0747:                 * Sort order is based on length of regex.  Longest comes first.
0748:                 * @param other The object to compare to.
0749:                 * @return a negative integer, zero, or a positive integer
0750:                 * as this object is less than, equal to, or greater than
0751:                 * the specified object.
0752:                 */
0753:                public int compareTo(DynamicReplacement other) {
0754:                    return other.regex.length() - regex.length();
0755:                }
0756:            }
0757:
0758:            //
0759:            // Instead of storing a type, this could be implemented as a hierarchy
0760:            // with a virtual matches().
0761:            // Given how often a search will be done, however, speed is the overriding
0762:            // concern and I'm not sure which is faster.
0763:            //
0764:            final static class DynamicField extends DynamicReplacement {
0765:                final SchemaField prototype;
0766:
0767:                DynamicField(SchemaField prototype) {
0768:                    super (prototype.name);
0769:                    this .prototype = prototype;
0770:                }
0771:
0772:                SchemaField makeSchemaField(String name) {
0773:                    // could have a cache instead of returning a new one each time, but it might
0774:                    // not be worth it.
0775:                    // Actually, a higher level cache could be worth it to avoid too many
0776:                    // .startsWith() and .endsWith() comparisons.  it depends on how many
0777:                    // dynamic fields there are.
0778:                    return new SchemaField(prototype, name);
0779:                }
0780:
0781:                public String toString() {
0782:                    return prototype.toString();
0783:                }
0784:            }
0785:
0786:            static class DynamicCopy extends DynamicReplacement {
0787:                final SchemaField targetField;
0788:
0789:                DynamicCopy(String regex, SchemaField targetField) {
0790:                    super (regex);
0791:                    this .targetField = targetField;
0792:                }
0793:
0794:                public SchemaField getTargetField(String sourceField) {
0795:                    return targetField;
0796:                }
0797:
0798:                @Override
0799:                public String toString() {
0800:                    return targetField.toString();
0801:                }
0802:            }
0803:
0804:            static class DynamicDestCopy extends DynamicCopy {
0805:                final DynamicField dynamic;
0806:
0807:                final int dtype;
0808:                final String dstr;
0809:
0810:                DynamicDestCopy(String source, DynamicField dynamic) {
0811:                    super (source, dynamic.prototype);
0812:                    this .dynamic = dynamic;
0813:
0814:                    String dest = dynamic.regex;
0815:                    if (dest.startsWith("*")) {
0816:                        dtype = ENDS_WITH;
0817:                        dstr = dest.substring(1);
0818:                    } else if (dest.endsWith("*")) {
0819:                        dtype = STARTS_WITH;
0820:                        dstr = dest.substring(0, dest.length() - 1);
0821:                    } else {
0822:                        throw new RuntimeException(
0823:                                "dynamic copyField destination name must start or end with *");
0824:                    }
0825:                }
0826:
0827:                @Override
0828:                public SchemaField getTargetField(String sourceField) {
0829:                    String dyn = (type == STARTS_WITH) ? sourceField
0830:                            .substring(str.length()) : sourceField.substring(0,
0831:                            sourceField.length() - str.length());
0832:
0833:                    String name = (dtype == STARTS_WITH) ? (dstr + dyn)
0834:                            : (dyn + dstr);
0835:                    return dynamic.makeSchemaField(name);
0836:                }
0837:
0838:                @Override
0839:                public String toString() {
0840:                    return targetField.toString();
0841:                }
0842:            }
0843:
0844:            private DynamicField[] dynamicFields;
0845:
0846:            /**
0847:             * Does the schema have the specified field defined explicitly, i.e.
0848:             * not as a result of a copyField declaration with a wildcard?  We
0849:             * consider it explicitly defined if it matches a field or dynamicField
0850:             * declaration.
0851:             * @param fieldName
0852:             * @return true if explicitly declared in the schema.
0853:             */
0854:            public boolean hasExplicitField(String fieldName) {
0855:                if (fields.containsKey(fieldName)) {
0856:                    return true;
0857:                }
0858:
0859:                for (DynamicField df : dynamicFields) {
0860:                    if (df.matches(fieldName))
0861:                        return true;
0862:                }
0863:
0864:                return false;
0865:            }
0866:
0867:            /**
0868:             * Returns the SchemaField that should be used for the specified field name, or
0869:             * null if none exists.
0870:             *
0871:             * @param fieldName may be an explicitly defined field, or a name that
0872:             * matches a dynamic field.
0873:             * @see #getFieldType
0874:             */
0875:            public SchemaField getFieldOrNull(String fieldName) {
0876:                SchemaField f = fields.get(fieldName);
0877:                if (f != null)
0878:                    return f;
0879:
0880:                for (DynamicField df : dynamicFields) {
0881:                    if (df.matches(fieldName))
0882:                        return df.makeSchemaField(fieldName);
0883:                }
0884:
0885:                return f;
0886:            }
0887:
0888:            /**
0889:             * Returns the SchemaField that should be used for the specified field name
0890:             *
0891:             * @param fieldName may be an explicitly defined field, or a name that
0892:             * matches a dynamic field.
0893:             * @throws SolrException if no such field exists
0894:             * @see #getFieldType
0895:             */
0896:            public SchemaField getField(String fieldName) {
0897:                SchemaField f = fields.get(fieldName);
0898:                if (f != null)
0899:                    return f;
0900:
0901:                for (DynamicField df : dynamicFields) {
0902:                    if (df.matches(fieldName))
0903:                        return df.makeSchemaField(fieldName);
0904:                }
0905:
0906:                // Hmmm, default field could also be implemented with a dynamic field of "*".
0907:                // It would have to be special-cased and only used if nothing else matched.
0908:                /***  REMOVED -YCS
0909:                if (defaultFieldType != null) return new SchemaField(fieldName,defaultFieldType);
0910:                 ***/
0911:                throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
0912:                        "undefined field " + fieldName);
0913:            }
0914:
0915:            /**
0916:             * Returns the FieldType for the specified field name.
0917:             *
0918:             * <p>
0919:             * This method exists because it can be more efficient then
0920:             * {@link #getField} for dynamic fields if a full SchemaField isn't needed.
0921:             * </p>
0922:             *
0923:             * @param fieldName may be an explicitly created field, or a name that
0924:             * excercies a dynamic field.
0925:             * @throws SolrException if no such field exists
0926:             * @see #getField(String)
0927:             * @see #getFieldTypeNoEx
0928:             */
0929:            public FieldType getFieldType(String fieldName) {
0930:                SchemaField f = fields.get(fieldName);
0931:                if (f != null)
0932:                    return f.getType();
0933:
0934:                return getDynamicFieldType(fieldName);
0935:            }
0936:
0937:            /**
0938:             * Returns the FieldType for the specified field name.
0939:             *
0940:             * <p>
0941:             * This method exists because it can be more efficient then
0942:             * {@link #getField} for dynamic fields if a full SchemaField isn't needed.
0943:             * </p>
0944:             *
0945:             * @param fieldName may be an explicitly created field, or a name that
0946:             * excercies a dynamic field.
0947:             * @return null if field is not defined.
0948:             * @see #getField(String)
0949:             * @see #getFieldTypeNoEx
0950:             */
0951:            public FieldType getFieldTypeNoEx(String fieldName) {
0952:                SchemaField f = fields.get(fieldName);
0953:                if (f != null)
0954:                    return f.getType();
0955:                return dynFieldType(fieldName);
0956:            }
0957:
0958:            /**
0959:             * Returns the FieldType of the best matching dynamic field for
0960:             * the specified field name
0961:             *
0962:             * @param fieldName may be an explicitly created field, or a name that
0963:             * excercies a dynamic field.
0964:             * @throws SolrException if no such field exists
0965:             * @see #getField(String)
0966:             * @see #getFieldTypeNoEx
0967:             */
0968:            public FieldType getDynamicFieldType(String fieldName) {
0969:                for (DynamicField df : dynamicFields) {
0970:                    if (df.matches(fieldName))
0971:                        return df.prototype.getType();
0972:                }
0973:                throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
0974:                        "undefined field " + fieldName);
0975:            }
0976:
0977:            private FieldType dynFieldType(String fieldName) {
0978:                for (DynamicField df : dynamicFields) {
0979:                    if (df.matches(fieldName))
0980:                        return df.prototype.getType();
0981:                }
0982:                return null;
0983:            };
0984:
0985:            private final Map<String, SchemaField[]> copyFields = new HashMap<String, SchemaField[]>();
0986:            private DynamicCopy[] dynamicCopyFields;
0987:
0988:            /**
0989:             * Get all copy fields, both the static and the dynamic ones.
0990:             * @param sourceField
0991:             * @return Array of fields to copy to.
0992:             */
0993:            public SchemaField[] getCopyFields(String sourceField) {
0994:                // Get the dynamic ones into a list.
0995:                List<SchemaField> matchCopyFields = new ArrayList<SchemaField>();
0996:
0997:                for (DynamicCopy dynamicCopy : dynamicCopyFields) {
0998:                    if (dynamicCopy.matches(sourceField)) {
0999:                        matchCopyFields.add(dynamicCopy
1000:                                .getTargetField(sourceField));
1001:                    }
1002:                }
1003:
1004:                // Get the fixed ones, if there are any.
1005:                SchemaField[] fixedCopyFields = copyFields.get(sourceField);
1006:
1007:                boolean appendFixed = copyFields.containsKey(sourceField);
1008:
1009:                // Construct the results by concatenating dynamic and fixed into a results array.
1010:
1011:                SchemaField[] results = new SchemaField[matchCopyFields.size()
1012:                        + (appendFixed ? fixedCopyFields.length : 0)];
1013:
1014:                matchCopyFields.toArray(results);
1015:
1016:                if (appendFixed) {
1017:                    System.arraycopy(fixedCopyFields, 0, results,
1018:                            matchCopyFields.size(), fixedCopyFields.length);
1019:                }
1020:
1021:                return results;
1022:            }
1023:
1024:            /**
1025:             * Is the given field name a wildcard?  I.e. does it begin or end with *?
1026:             * @param name
1027:             * @return true/false
1028:             */
1029:            private static boolean isWildCard(String name) {
1030:                return name.startsWith("*") || name.endsWith("*");
1031:            }
1032:
1033:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.