0001: /*
0002: * Copyright 2004-2006 the original author or authors.
0003: *
0004: * Licensed under the Apache License, Version 2.0 (the "License");
0005: * you may not use this file except in compliance with the License.
0006: * You may obtain a copy of the License at
0007: *
0008: * http://www.apache.org/licenses/LICENSE-2.0
0009: *
0010: * Unless required by applicable law or agreed to in writing, software
0011: * distributed under the License is distributed on an "AS IS" BASIS,
0012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0013: * See the License for the specific language governing permissions and
0014: * limitations under the License.
0015: */
0016:
0017: package org.compass.core.lucene;
0018:
0019: import org.apache.lucene.store.jdbc.JdbcFileEntrySettings;
0020: import org.apache.lucene.store.jdbc.index.JdbcBufferedIndexInput;
0021: import org.apache.lucene.store.jdbc.index.JdbcBufferedIndexOutput;
0022: import org.apache.lucene.store.jdbc.index.RAMAndFileJdbcIndexOutput;
0023:
0024: /**
0025: * @author kimchy
0026: */
0027: public class LuceneEnvironment {
0028:
0029: /**
0030: * The default search that will be used for non prefixed query values.
0031: * Defaults to the value of the "all" property.
0032: */
0033: public static final String DEFAULT_SEARCH = "compass.engine.defaultsearch";
0034:
0035: public static abstract class Analyzer {
0036:
0037: /**
0038: * The prefix used for analyzer groups.
0039: */
0040: public static final String PREFIX = "compass.engine.analyzer";
0041:
0042: /**
0043: * The default anayzer group that must be set.
0044: */
0045: public static final String DEFAULT_GROUP = "default";
0046:
0047: /**
0048: * An optional analyzer group name that can be set, will be used when
0049: * searching.
0050: */
0051: public static final String SEARCH_GROUP = "search";
0052:
0053: /**
0054: * The name of the analyzer to use, can be ANALYZER_WHITESPACE,
0055: * ANALYZER_STANDARD, ANALYZER_SIMPLE, ANALYZER_STOP, or a fully
0056: * qualified class of the analyzer.
0057: *
0058: * <p>It is part of the anaylzer group, and should be constructed using the
0059: * {@link org.compass.core.config.CompassSettings#setGroupSettings(String,String,String[],String[])},
0060: * with the {@link #PREFIX} as the prefix, the analyzer group
0061: * name, and the type as one of the values.
0062: */
0063: public static final String TYPE = "type";
0064:
0065: /**
0066: * The fully qualified name of the anayzer factory. Must implement the
0067: * {@link org.compass.core.lucene.engine.analyzer.LuceneAnalyzerFactory}
0068: * inteface.
0069: *
0070: * <p>It is part of the anaylzer group, and should be constructed using the
0071: * {@link org.compass.core.config.CompassSettings#setGroupSettings(String,String,String[],String[])},
0072: * with the {@link #PREFIX} as the prefix, the analyzer group
0073: * name, and the type as one of the values.
0074: */
0075: public static final String FACTORY = "factory";
0076:
0077: /**
0078: * A comma separated list of stop words to use with the chosen analyzer.
0079: * If the string starts with <code>+</code>, the list of stop-words
0080: * will be added to the default set of stop words defined for the
0081: * analyzer. Only supported for the default analyzers that comes with
0082: * Compass.
0083: *
0084: * <p>It is part of the anaylzer group, and should be
0085: * constructed using the
0086: * {@link org.compass.core.config.CompassSettings#setGroupSettings(String,String,String[],String[])},
0087: * with the {@link #PREFIX} as the prefix, the analyzer group
0088: * name, and the stopwords as one of the values.
0089: */
0090: public static final String STOPWORDS = "stopwords";
0091:
0092: /**
0093: * A comma separated list of filter names to be applied to the analyzer. The names
0094: * match {@link org.compass.core.lucene.engine.analyzer.LuceneAnalyzerTokenFilterProvider}s
0095: * configured using the {@link AnalyzerFilter} configuration settings.
0096: */
0097: public static final String FILTERS = "filters";
0098:
0099: public abstract class CoreTypes {
0100:
0101: /**
0102: * An analyzer which tokenizes a text according to whitespaces.
0103: *
0104: * @see org.apache.lucene.analysis.WhitespaceAnalyzer
0105: */
0106: public static final String WHITESPACE = "whitespace";
0107:
0108: /**
0109: * The standard lucene analyzer.
0110: *
0111: * @see org.apache.lucene.analysis.standard.StandardAnalyzer
0112: */
0113: public static final String STANDARD = "standard";
0114:
0115: /**
0116: * Simple Lucene Analyzer.
0117: *
0118: * @see org.apache.lucene.analysis.SimpleAnalyzer
0119: */
0120: public static final String SIMPLE = "simple";
0121:
0122: /**
0123: * Lucene Stop analyzer.
0124: *
0125: * @see org.apache.lucene.analysis.StopAnalyzer
0126: */
0127: public static final String STOP = "stop";
0128:
0129: /**
0130: * Lucene Keyword analyzer.
0131: *
0132: * @see org.apache.lucene.analysis.KeywordAnalyzer
0133: */
0134: public static final String KEYWORD = "keyword";
0135: }
0136:
0137: public abstract class Snowball {
0138:
0139: /**
0140: *
0141: */
0142: public static final String SNOWBALL = "snowball";
0143:
0144: /**
0145: *
0146: */
0147: public static final String NAME_TYPE = "name";
0148:
0149: /**
0150: *
0151: */
0152: public static final String NAME_DANISH = "Danish";
0153:
0154: /**
0155: *
0156: */
0157: public static final String NAME_DUTCH = "Dutch";
0158:
0159: /**
0160: *
0161: */
0162: public static final String NAME_ENGLISH = "English";
0163:
0164: /**
0165: *
0166: */
0167: public static final String NAME_FINNISH = "Finnish";
0168:
0169: /**
0170: *
0171: */
0172: public static final String NAME_FRENCH = "French";
0173:
0174: /**
0175: *
0176: */
0177: public static final String NAME_GERMAN = "German";
0178:
0179: /**
0180: *
0181: */
0182: public static final String NAME_GERMAN2 = "German2";
0183:
0184: /**
0185: *
0186: */
0187: public static final String NAME_ITALIAN = "Italian";
0188:
0189: /**
0190: *
0191: */
0192: public static final String NAME_KP = "Kp";
0193:
0194: /**
0195: *
0196: */
0197: public static final String NAME_LOVINS = "Lovins";
0198:
0199: /**
0200: *
0201: */
0202: public static final String NAME_NORWEGIAN = "Norwegian";
0203:
0204: /**
0205: *
0206: */
0207: public static final String NAME_PORTER = "Porter";
0208:
0209: /**
0210: *
0211: */
0212: public static final String NAME_PORTUGUESE = "Portuguese";
0213:
0214: /**
0215: *
0216: */
0217: public static final String NAME_RUSSIAN = "Russian";
0218:
0219: /**
0220: *
0221: */
0222: public static final String NAME_SPANISH = "Spanish";
0223:
0224: /**
0225: *
0226: */
0227: public static final String NAME_SWEDISH = "Swedish";
0228: }
0229:
0230: public abstract class ExtendedTypes {
0231:
0232: /**
0233: *
0234: */
0235: public static final String BRAZILIAN = "brazilian";
0236:
0237: /**
0238: *
0239: */
0240: public static final String CJK = "cjk";
0241:
0242: /**
0243: *
0244: */
0245: public static final String CHINESE = "chinese";
0246:
0247: /**
0248: *
0249: */
0250: public static final String CZECH = "czech";
0251:
0252: /**
0253: *
0254: */
0255: public static final String GERMAN = "german";
0256:
0257: /**
0258: *
0259: */
0260: public static final String GREEK = "greek";
0261:
0262: /**
0263: *
0264: */
0265: public static final String FRENCH = "french";
0266:
0267: /**
0268: *
0269: */
0270: public static final String DUTCH = "dutch";
0271:
0272: /**
0273: *
0274: */
0275: public static final String RUSSIAN = "russian";
0276: }
0277: }
0278:
0279: public static abstract class AnalyzerFilter {
0280:
0281: /**
0282: * The prefix used for analyzer filter groups.
0283: */
0284: public static final String PREFIX = "compass.engine.analyzerfilter";
0285:
0286: /**
0287: * The fully qualified class name of the
0288: * {@link org.compass.core.lucene.engine.analyzer.LuceneAnalyzerTokenFilterProvider} implementation.
0289: */
0290: public static final String TYPE = "type";
0291:
0292: /**
0293: * The synonym type, used to set the {@link #TYPE} to
0294: * {@link org.compass.core.lucene.engine.analyzer.synonym.SynonymAnalyzerTokenFilterProvider}.
0295: */
0296: public static final String SYNONYM_TYPE = "synonym";
0297:
0298: public static abstract class Synonym {
0299:
0300: /**
0301: * The fully qualified class of the synonym lookup provider
0302: * ({@link org.compass.core.lucene.engine.analyzer.synonym.SynonymLookupProvider} implementation.
0303: */
0304: public static final String LOOKUP = "lookup";
0305: }
0306: }
0307:
0308: /**
0309: * Settings for Lucene highlighter.
0310: *
0311: * @author kimchy
0312: */
0313: public static abstract class Highlighter {
0314: /**
0315: * The prefix used for highlighter groups.
0316: */
0317: public static final String PREFIX = "compass.engine.highlighter";
0318:
0319: /**
0320: * The default highlighter group that must be set.
0321: */
0322: public static final String DEFAULT_GROUP = "default";
0323:
0324: /**
0325: * The text tokenizer type that will be used.
0326: */
0327: public static final String TEXT_TOKENIZER = "textTokenizer";
0328:
0329: /**
0330: * Low level. A boolean setting (<code>true</code>, or
0331: * <code>false</code>). If the query will be rewritten befored it is
0332: * used by the highlighter.
0333: */
0334: public static final String REWRITE_QUERY = "rewriteQuery";
0335:
0336: /**
0337: * Low level. A boolean setting (<code>true</code> or
0338: * <code>false</code>). If the idf value will be used during the
0339: * highlighting process. Used by formatters that a) score selected
0340: * fragments better b) use graded highlights eg chaning intensity of
0341: * font color. Automatically assigned for the provided formatters.
0342: */
0343: public static final String COMPUTE_IDF = "computeIdf";
0344:
0345: /**
0346: * Sets the maximum number of fragments that will be returned. Defaults
0347: * to <code>3</code>.
0348: */
0349: public static final String MAX_NUM_FRAGMENTS = "maxNumFragments";
0350:
0351: /**
0352: * Sets the separator string between fragments if using the combined
0353: * fragments highlight option. Defaults to <code>...</code>.
0354: */
0355: public static final String SEPARATOR = "separator";
0356:
0357: /**
0358: * Maximum bytes to analyze. Default to <code>50*1024</code> bytes.
0359: */
0360: public static final String MAX_BYTES_TO_ANALYZE = "maxBytesToAnalyze";
0361:
0362: /**
0363: * The fully qualified name of the highlighter factory. Must implement
0364: * the
0365: * {@link org.compass.core.lucene.engine.highlighter.LuceneHighlighterFactory}
0366: * inteface. <p/> It is part of the highlighter group, and should be
0367: * constructed using the
0368: * {@link org.compass.core.config.CompassSettings#setGroupSettings(String,String,String[],String[])},
0369: * with the {@link #PREFIX} as the prefix, the highlighter
0370: * group name, and the factory as one of the values.
0371: */
0372: public static final String FACTORY = "factory";
0373:
0374: /**
0375: * Settings for Lucene highlighter fragmenter.
0376: *
0377: * @author kimchy
0378: */
0379: public abstract class Fragmenter {
0380:
0381: /**
0382: * If set, sets the class name of the Lucene <code>Fragmenter</code>, or the actual type.
0383: */
0384: public static final String TYPE = "fragmenter.type";
0385:
0386: /**
0387: * A simple Lucene <code>Fragmenter</code>. Breaks text up into same-size fragments with no
0388: * concerns over spotting sentence boundaries.
0389: */
0390: public static final String TYPE_SIMPLE = "simple";
0391:
0392: /**
0393: * A null Lucene <code>Fragmenter</code>. Does not fragment the text.
0394: */
0395: public static final String TYPE_NULL = "null";
0396:
0397: /**
0398: * If not setting the {@link #TYPE} (and thus
0399: * using Lucene <code>SimpleFragmenter</code>), sets the size of
0400: * the fragment. Defaults to <code>100</code>.
0401: */
0402: public static final String SIMPLE_SIZE = "fragmenter.simple.size";
0403:
0404: }
0405:
0406: public abstract class Encoder {
0407:
0408: /**
0409: * If set, sets the type of the Lucene <code>Encoder</code>, or
0410: * it's fully qualifed name.
0411: */
0412: public static final String TYPE = "encoder.type";
0413:
0414: /**
0415: * Performs no encoding of the text.
0416: */
0417: public static final String DEFAULT = "default";
0418:
0419: /**
0420: * Simple encoder that encodes html tags.
0421: */
0422: public static final String HTML = "html";
0423: }
0424:
0425: /**
0426: * Settings for Lucene highlighter formatter.
0427: *
0428: * @author kimchy
0429: */
0430: public abstract class Formatter {
0431:
0432: /**
0433: * If set, sets the type of the Lucene <code>Formatter</code> or
0434: * it's fully qualified name. Defaults to {@link #SIMPLE}.
0435: */
0436: public static final String TYPE = "formatter.type";
0437:
0438: /**
0439: * A simple wrapper formatter. Wraps the highlight with pre and post
0440: * string (can be html or xml tags). They can be set using
0441: * {@link #SIMPLE_PRE_HIGHLIGHT} and
0442: * {@link #SIMPLE_POST_HIGHLIGHT}.
0443: */
0444: public static final String SIMPLE = "simple";
0445:
0446: /**
0447: * In case the highlighter uses the {@link #SIMPLE},
0448: * controlls the text that is appened before the highlighted text.
0449: * Defatuls to <code><b></code>.
0450: */
0451: public static final String SIMPLE_PRE_HIGHLIGHT = "formatter.simple.pre";
0452:
0453: /**
0454: * In case the highlighter uses the {@link #SIMPLE},
0455: * controlls the text that is appened after the highlighted text.
0456: * Defatuls to <code></b></code>.
0457: */
0458: public static final String SIMPLE_POST_HIGHLIGHT = "formatter.simple.post";
0459:
0460: /**
0461: * Wraps an html span tag around the highlighted text. The
0462: * background and foreground colors can be controlled and will have
0463: * different color intensity depending on the score.
0464: */
0465: public static final String HTML_SPAN_GRADIENT = "htmlSpanGradient";
0466:
0467: /**
0468: * The score (and above) displayed as maxColor.
0469: */
0470: public static final String HTML_SPAN_GRADIENT_MAX_SCORE = "formatter.htmlSpanGradient.maxScore";
0471:
0472: /**
0473: * The hex color used for representing IDF scores of zero eg #FFFFFF
0474: * (white) or null if no foreground color required.
0475: */
0476: public static final String HTML_SPAN_GRADIENT_MIN_FOREGROUND_COLOR = "formatter.htmlSpanGradient.minForegroundColor";
0477:
0478: /**
0479: * The largest hex color used for representing IDF scores eg #000000
0480: * (black) or null if no foreground color required.
0481: */
0482: public static final String HTML_SPAN_GRADIENT_MAX_FOREGROUND_COLOR = "formatter.htmlSpanGradient.maxForegroundColor";
0483:
0484: /**
0485: * The hex color used for representing IDF scores of zero eg #FFFFFF
0486: * (white) or null if no background color required.
0487: */
0488: public static final String HTML_SPAN_GRADIENT_MIN_BACKGROUND_COLOR = "formatter.htmlSpanGradient.minBackgroundColor";
0489:
0490: /**
0491: * The largest hex color used for representing IDF scores eg #000000
0492: * (black) or null if no background color required.
0493: */
0494: public static final String HTML_SPAN_GRADIENT_MAX_BACKGROUND_COLOR = "formatter.htmlSpanGradient.maxBackgroundColor";
0495: }
0496: }
0497:
0498: /* Transaction Locking Settings */
0499:
0500: public static abstract class Transaction {
0501:
0502: /**
0503: * The amount of time a transaction will wait in order to obtain it's
0504: * specific lock (in seconds). Defaults to 10 seconds.
0505: */
0506: public static final String LOCK_TIMEOUT = "compass.transaction.lockTimeout";
0507:
0508: /**
0509: * The interval that the transaction will check to see if it can obtain
0510: * the lock (in milliseconds). <p/> The default value is 100
0511: * milliseconds
0512: */
0513: public static final String LOCK_POLL_INTERVAL = "compass.transaction.lockPollInterval";
0514:
0515: /**
0516: * Should the cache be cleared on commit. Note, that setting it to <code>false</code>
0517: * might mean that the transaction isolation level will not function properly (for example,
0518: * with read_committed, it will mean that data that is committed will take time to be
0519: * reflected in the index). Defaults to <code>true</code>.
0520: */
0521: public static final String CLEAR_CACHE_ON_COMMIT = "compass.transaction.clearCacheOnCommit";
0522:
0523: /**
0524: * Transaction log settings
0525: */
0526: public static final class ReadCommittedTransLog {
0527:
0528: /**
0529: * The connection type for the read committed transactional log. Can be either <code>ram://</code>
0530: * or <code>file://</code>.
0531: */
0532: public static final String CONNECTION = "compass.transaction.readcommitted.translog.connection";
0533:
0534: /**
0535: * Should the transactional index be optimized before it is added to the actual index. Defaults to
0536: * <code>true</code>.
0537: */
0538: public static final String OPTIMIZE_TRANS_LOG = "compass.transaction.readcommitted.translog.optimize";
0539: }
0540: }
0541:
0542: /* Optimizer Settings */
0543:
0544: public static abstract class Optimizer {
0545:
0546: /**
0547: * The fully qualified class name of the optimizer.
0548: */
0549: public static final String TYPE = "compass.engine.optimizer.type";
0550:
0551: /**
0552: * If the optimizer should be scheduled (can be "true" or "false"). <p/>
0553: * Defaults to <code>true</code>
0554: */
0555: public static final String SCHEDULE = "compass.engine.optimizer.schedule";
0556:
0557: /**
0558: * Determines the how often the optimizer will kick in (in seconds).
0559: * <p/> Default is 10 seconds. Can be float number.
0560: */
0561: public static final String SCHEDULE_PERIOD = "compass.engine.optimizer.schedule.period";
0562:
0563: public abstract class Aggressive {
0564:
0565: /**
0566: * Determines how often the aggressive optimizer will optimize the
0567: * index. <p/> Defaults to 10.
0568: */
0569: public static final String MERGE_FACTOR = "compass.engine.optimizer.aggressive.mergeFactor";
0570:
0571: }
0572:
0573: /**
0574: * Set of environment settings for the adaptive optimizer.
0575: *
0576: * @author kimchy
0577: */
0578: public abstract class Adaptive {
0579:
0580: /**
0581: * Determines how often the adaptive optimizer will optimize the
0582: * index. <p/> Defaults to 10.
0583: */
0584: public static final String MERGE_FACTOR = "compass.engine.optimizer.adaptive.mergeFactor";
0585: }
0586:
0587: }
0588:
0589: public static abstract class SpellCheck {
0590:
0591: public static final String PREFIX = "compass.engine.spellcheck.";
0592:
0593: /**
0594: * Should the spell check module be enabled or not. Defaults to <code>false</code>.
0595: */
0596: public static final String ENABLE = PREFIX + "enable";
0597:
0598: /**
0599: * A globabl set of comma separated properties that will be included for each sub index.
0600: */
0601: public static final String GLOBAL_INCLUDE_PROPERTIES = PREFIX
0602: + "globablIncludeProperties";
0603:
0604: /**
0605: * A globabl set of comma separated properties that will be exluded for each sub index.
0606: */
0607: public static final String GLOBAL_EXCLUDE_PROPERTY = PREFIX
0608: + "globalExcludeProperties";
0609:
0610: /**
0611: * The default property for the spell check.
0612: */
0613: public static final String DEFAULT_PROPERTY = PREFIX
0614: + "defaultProperty";
0615:
0616: /**
0617: * The default mode for inclduing/excluding of proeprties from the spell check index. Only applies on resource
0618: * mappings (class/resource/xml-object) that have their spell-check default value (which is NA).
0619: *
0620: * <p>If not set, will use just the all proeprty for mappings it can apply to. If set to <code>INCLUDE</code>
0621: * will include by default all the given proeprties unless there are specific ones that have <code>EXLCUDE</code>
0622: * mappings. If set to <code>EXCLUDE</codE> will exclude all proeprties by default unless the given proeprties
0623: * are marked with <code>INCLUDE</code>.
0624: */
0625: public static final String DEFAULT_MODE = PREFIX
0626: + "defaultMode";
0627:
0628: /**
0629: * The default accuracy that will be used. Defaults to <code>0.5</code>.
0630: */
0631: public static final String ACCURACY = PREFIX + "accuracy";
0632:
0633: /**
0634: * Sets the dictionary threshold, which controls the minimum
0635: * number of documents (of the total) where a term should appear. Defaults to <code>0.0f</code>.
0636: */
0637: public static final String DICTIONARY_THRESHOLD = PREFIX
0638: + "dictionaryThreshold";
0639:
0640: /**
0641: * Set to <code>true</code> in order to have a scheduled task that rebuilds the spell index
0642: * if needed.
0643: */
0644: public static final String SCHEDULE = PREFIX + "schedule";
0645:
0646: /**
0647: * The initial delay of the scheduled rebuild. In seconds.
0648: */
0649: public static final String SCHEDULE_INITIAL_DELAY = PREFIX
0650: + "scheduleInitialDelay";
0651:
0652: /**
0653: * Set <b>in seconds</b> the interval at which a check and a possible rebuild of the spell check
0654: * index will occur. Defaults to <b>10</b> minutes.
0655: */
0656: public static final String SCHEDULE_INTERVAL = PREFIX
0657: + "scheduleInterval";
0658: }
0659:
0660: /**
0661: * Specific environment settings for the <code>batch_insert</code> settings.
0662: */
0663: public static abstract class SearchEngineIndex {
0664:
0665: /**
0666: * <p>Determines the largest segment (measured by
0667: * document count) that may be merged with other segments.
0668: * Small values (e.g., less than 10,000) are best for
0669: * interactive indexing, as this limits the length of
0670: * pauses while indexing to a few seconds. Larger values
0671: * are best for batched indexing and speedier
0672: * searches.</p>
0673: *
0674: * <p>The default value is {@link Integer#MAX_VALUE}.</p>
0675: */
0676: public static final String MAX_MERGE_DOCS = "compass.engine.maxMergeDocs";
0677:
0678: /**
0679: * Determines how often segment indices are merged by addDocument(). With
0680: * smaller values, less RAM is used while indexing, and searches on
0681: * unoptimized indices are faster, but indexing speed is slower. With larger
0682: * values, more RAM is used during indexing, and while searches on unoptimized
0683: * indices are slower, indexing is faster. Thus larger values (> 10) are best
0684: * for batch index creation, and smaller values (< 10) for indices that are
0685: * interactively maintained.
0686: *
0687: * <p>Defaults to <code>10</code>.
0688: */
0689: public static final String MERGE_FACTOR = "compass.engine.mergeFactor";
0690:
0691: /**
0692: * Determines the minimal number of documents required
0693: * before the buffered in-memory documents are flushed as
0694: * a new Segment. Large values generally gives faster
0695: * indexing.
0696: *
0697: * <p>When this is set, the writer will flush every
0698: * maxBufferedDocs added documents. Pass in {@link
0699: * org.apache.lucene.index.IndexWriter#DISABLE_AUTO_FLUSH} to prevent triggering a flush due
0700: * to number of buffered documents. Note that if flushing
0701: * by RAM usage is also enabled, then the flush will be
0702: * triggered by whichever comes first.
0703: *
0704: * <p>Disabled by default (writer flushes by RAM usage).
0705: */
0706: public static final String MAX_BUFFERED_DOCS = "compass.engine.maxBufferedDocs";
0707:
0708: /**
0709: * <p>Determines the minimal number of delete terms required before the buffered
0710: * in-memory delete terms are applied and flushed. If there are documents
0711: * buffered in memory at the time, they are merged and a new segment is
0712: * created.</p>
0713: *
0714: * <p>Disabled by default (writer flushes by RAM usage).</p>
0715: */
0716: public static final String MAX_BUFFERED_DELETED_TERMS = "compass.engine.maxBufferedDeletedTerms";
0717:
0718: /**
0719: * Expert: Set the interval between indexed terms. Large values cause less
0720: * memory to be used by IndexReader, but slow random-access to terms. Small
0721: * values cause more memory to be used by an IndexReader, and speed
0722: * random-access to terms.
0723: *
0724: * This parameter determines the amount of computation required per query
0725: * term, regardless of the number of documents that contain that term. In
0726: * particular, it is the maximum number of other terms that must be
0727: * scanned before a term is located and its frequency and position information
0728: * may be processed. In a large index with user-entered query terms, query
0729: * processing time is likely to be dominated not by term lookup but rather
0730: * by the processing of frequency and positional data. In a small index
0731: * or when many uncommon query terms are generated (e.g., by wildcard
0732: * queries) term lookup may become a dominant cost.
0733: *
0734: * In particular, <code>numUniqueTerms/interval</code> terms are read into
0735: * memory by an IndexReader, and, on average, <code>interval/2</code> terms
0736: * must be scanned for each random term access.
0737: *
0738: * @see org.apache.lucene.index.IndexWriter#DEFAULT_TERM_INDEX_INTERVAL
0739: */
0740: public static final String TERM_INDEX_INTERVAL = "compass.engine.termIndexInterval";
0741:
0742: /**
0743: * Determines the amount of RAM that may be used for
0744: * buffering added documents before they are flushed as a
0745: * new Segment. Generally for faster indexing performance
0746: * it's best to flush by RAM usage instead of document
0747: * count and use as large a RAM buffer as you can.
0748: *
0749: * <p>When this is set, the writer will flush whenever
0750: * buffered documents use this much RAM. Pass in {@link
0751: * org.apache.lucene.index.IndexWriter#DISABLE_AUTO_FLUSH} to prevent triggering a flush due
0752: * to RAM usage. Note that if flushing by document count
0753: * is also enabled, then the flush will be triggered by
0754: * whichever comes first.</p>
0755: *
0756: * <p> The default value is {@link org.apache.lucene.index.IndexWriter#DEFAULT_RAM_BUFFER_SIZE_MB}.</p>
0757: */
0758: public static final String RAM_BUFFER_SIZE = "compass.engine.ramBufferSize";
0759:
0760: /**
0761: * Setting to turn on usage of a compound file. When on, multiple files for
0762: * each segment are merged into a single file once the segment creation is
0763: * finished. This is done regardless of what directory is in use. <p/>
0764: * Default value id <code>true</code>
0765: */
0766: public static final String USE_COMPOUND_FILE = "compass.engine.useCompoundFile";
0767:
0768: /**
0769: * The maximum number of terms that will be indexed for a single field in a
0770: * document. This limits the amount of memory required for indexing, so that
0771: * collections with very large files will not crash the indexing process by
0772: * running out of memory. <p/>Note that this effectively truncates large
0773: * documents, excluding from the index terms that occur further in the
0774: * document. If you know your source documents are large, be sure to set
0775: * this value high enough to accomodate the expected size. If you set it to
0776: * Integer.MAX_VALUE, then the only limit is your memory, but you should
0777: * anticipate an OutOfMemoryError. <p/>By default, no more than 10,000 terms
0778: * will be indexed for a field.
0779: */
0780: public static final String MAX_FIELD_LENGTH = "compass.engine.maxFieldLength";
0781:
0782: /**
0783: * Sets how often (in milliseconds) the index manager will check if the index
0784: * cache needs to be invalidated. Defaults to <code>5000</code>. Setting it to
0785: * <code>0</code> means that the cache will check if it needs to be invalidated all the time. Setting
0786: * it to <code>-1</code> means that the cache will never check if it needs to
0787: * be invalidated, note, that it is perfectly fine if a single instance is
0788: * manipulating the index. It works, since the cache is invalidated when a
0789: * transaction is committed and a dirty operation has occured.
0790: */
0791: public static final String CACHE_INTERVAL_INVALIDATION = "compass.engine.cacheIntervalInvalidation";
0792:
0793: /**
0794: * The index manager schedule interval (in seconds) where different actions related to index manager will happen (such
0795: * as global cache interval checks. If set to <code>-1</code>, not scheduling will happen.
0796: */
0797: public static final String INDEX_MANAGER_SCHEDULE_INTERVAL = "compass.engine.indexManagerScheduleInterval";
0798:
0799: /**
0800: * Defaults to <code>false</code>. If set to <code>true</code>, will cause index manager operation (including
0801: * replace index) to wait for all other Compass instances to invalidate their cache. The wait time will be
0802: * the same as the {@link #INDEX_MANAGER_SCHEDULE_INTERVAL}.
0803: */
0804: public static final String WAIT_FOR_CACHE_INVALIDATION_ON_INDEX_OPERATION = "compass.engine.waitForCacheInvalidationOnIndexOperation";
0805: }
0806:
0807: /**
0808: * Settings applicable when storing the index within a database.
0809: */
0810: public static abstract class JdbcStore {
0811:
0812: /**
0813: * The dialect (database) that is used when storing the index in the database
0814: */
0815: public static final String DIALECT = "compass.engine.store.jdbc.dialect";
0816:
0817: /**
0818: * Some of the entries in the database are marked as deleted, and not actually gets to be
0819: * deleted from the database. The settings controls the delta time of when they should be deleted.
0820: * They will be deleted if they were marked for deleted "delta" time ago
0821: * (base on database time, if possible by dialect).
0822: */
0823: public static final String DELETE_MARK_DELETED_DELTA = "compass.engine.store.jdbc.deleteMarkDeletedDelta";
0824:
0825: /**
0826: * The class name of the Jdbc lock to be used.
0827: */
0828: public static final String LOCK_TYPE = "compass.engine.store.jdbc.lockType";
0829:
0830: /**
0831: * If the connection is managed or not. Basically, if set to <code>false</code>, compass
0832: * will commit and rollback the transaction. If set to <code>true</code>, compass will
0833: * not perform it. Defaults to <code>false</code>. Should be set to <code>true</code> if
0834: * using external transaction managers (like JTA or Spring <code>PlatformTransactionManager</literal>),
0835: * and <code>false</code> if using <code>LocalTransactionFactory</code>.
0836: */
0837: public static final String MANAGED = "compass.engine.store.jdbc.managed";
0838:
0839: /**
0840: * If set to <code>true</code>, no database schema level operations will be performed (drop and create
0841: * tables). When deleting the data in the index, the content will be deleted, but the table will not
0842: * be dropped. Default to <code>false</code>.
0843: */
0844: public static final String DISABLE_SCHEMA_OPERATIONS = "compass.engine.store.jdbc.disableSchemaOperations";
0845:
0846: public abstract class Connection {
0847: /**
0848: * The jdbc driver class
0849: */
0850: public static final String DRIVER_CLASS = "compass.engine.store.jdbc.connection.driverClass";
0851:
0852: /**
0853: * the jdbc connection user name
0854: */
0855: public static final String USERNAME = "compass.engine.store.jdbc.connection.username";
0856:
0857: /**
0858: * The jdbc connection password
0859: */
0860: public static final String PASSWORD = "compass.engine.store.jdbc.connection.password";
0861:
0862: /**
0863: * Sets the auto commit for the <code>Connection</code> created by the <code>DataSource</code>.
0864: * Defaults to <code>false</code>. Can be either <code>false</code>, <code>true</code> or
0865: * <code>external</code> (let outer configuration management to set it).
0866: */
0867: public static final String AUTO_COMMIT = "compass.engine.store.jdbc.connection.autoCommit";
0868: }
0869:
0870: public abstract class DataSourceProvider {
0871:
0872: /**
0873: * The class for the data source provider. Responsible for creating data sources.
0874: */
0875: public static final String CLASS = "compass.engine.store.jdbc.connection.provider.class";
0876:
0877: public abstract class Dbcp {
0878:
0879: private static final String PREFIX = "compass.engine.store.jdbc.connection.provider.dbcp.";
0880:
0881: /**
0882: * The default TransactionIsolation state of connections created by this pool.
0883: */
0884: public static final String DEFAULT_TRANSACTION_ISOLATION = PREFIX
0885: + "defaultTransactionIsolation";
0886:
0887: /**
0888: * The initial number of connections that are created when the pool is started.
0889: */
0890: public static final String INITIAL_SIZE = PREFIX
0891: + "initialSize";
0892:
0893: /**
0894: * The maximum number of active connections that can be allocated from this pool at the same time,
0895: * or zero for no limit.
0896: */
0897: public static final String MAX_ACTIVE = PREFIX
0898: + "maxActive";
0899:
0900: /**
0901: * The maximum number of active connections that can remain idle in the pool,
0902: * without extra ones being released, or zero for no limit.
0903: */
0904: public static final String MAX_IDLE = PREFIX
0905: + "maxIdle";
0906:
0907: /**
0908: * The minimum number of active connections that can remain idle in the pool,
0909: * without extra ones being created, or 0 to create none.
0910: */
0911: public static final String MIN_IDLE = PREFIX
0912: + "minIdle";
0913:
0914: /**
0915: * The maximum number of milliseconds that the pool will wait (when there are no available connections)
0916: * for a connection to be returned before throwing an exception, or -1 to wait indefinitely.
0917: */
0918: public static final String MAX_WAIT = PREFIX
0919: + "maxWait";
0920:
0921: /**
0922: * The maximum number of open statements that can be allocated from the statement pool at the same time,
0923: * or zero for no limit.
0924: */
0925: public static final String MAX_OPEN_PREPARED_STATEMENTS = PREFIX
0926: + "maxOpenPreparedStatements";
0927:
0928: /**
0929: * Sets if the pool will cache prepared statements.
0930: */
0931: public static final String POOL_PREPARED_STATEMENTS = PREFIX
0932: + "poolPreparedStatements";
0933: }
0934:
0935: }
0936:
0937: public abstract class DDL {
0938:
0939: /**
0940: * The name of the column name. Defaults to name_.
0941: */
0942: public static final String NAME_NAME = "compass.engine.store.jdbc.ddl.name.name";
0943:
0944: /**
0945: * The length of the name column. Defaults to 50.
0946: */
0947: public static final String NAME_LENGTH = "compass.engine.store.jdbc.ddl.name.length";
0948:
0949: /**
0950: * The name of the value colum. Defaults to value_.
0951: */
0952: public static final String VALUE_NAME = "compass.engine.store.jdbc.ddl.value.name";
0953:
0954: /**
0955: * The length (in K) of the value column (for databases that require it). Defaults to 500 * 1024 K.
0956: */
0957: public static final String VALUE_LENGTH = "compass.engine.store.jdbc.ddl.value.length";
0958:
0959: /**
0960: * The name of the size column. Defaults to size_.
0961: */
0962: public static final String SIZE_NAME = "compass.engine.store.jdbc.ddl.size.name";
0963:
0964: /**
0965: * The name of the last modified column. Defaults to lf_.
0966: */
0967: public static final String LAST_MODIFIED_NAME = "compass.engine.store.jdbc.ddl.lastModified.name";
0968:
0969: /**
0970: * The name of the deleted column. Defaults to deleted_.
0971: */
0972: public static final String DELETED_NAME = "compass.engine.store.jdbc.ddl.deleted.name";
0973: }
0974:
0975: public abstract class FileEntry {
0976:
0977: public static final String PREFIX = "compass.engine.store.jdbc.fe";
0978:
0979: /**
0980: * The buffer size for implemenations of Lucene <code>IndexInput</code> where applicable.
0981: */
0982: public static final String INDEX_INPUT_BUFFER_SIZE = JdbcBufferedIndexInput.BUFFER_SIZE_SETTING;
0983:
0984: /**
0985: * The buffer size for implemenations of Lucene <code>IndexOutput</code> where applicable.
0986: */
0987: public static final String INDEX_OUTPUT_BUFFER_SIZE = JdbcBufferedIndexOutput.BUFFER_SIZE_SETTING;
0988:
0989: /**
0990: * The fully qualifed class of the <code>IndexInput</code> implementation.
0991: */
0992: public static final String INDEX_INPUT_TYPE = JdbcFileEntrySettings.INDEX_INPUT_TYPE_SETTING;
0993:
0994: /**
0995: * The fully qualifed class of the <code>IndexOutput</code> implementation.
0996: */
0997: public static final String INDEX_OUTPUT_TYPE = JdbcFileEntrySettings.INDEX_OUTPUT_TYPE_SETTING;
0998:
0999: /**
1000: * The fully qualifed class of the <code>FileEntryHandler</code> implementation.
1001: */
1002: public static final String FILE_ENTRY_HANDLER_TYPE = JdbcFileEntrySettings.FILE_ENTRY_HANDLER_TYPE;
1003:
1004: /**
1005: * The threshold value to be used for <code>RAMAndFileJdbcIndexOutput<code>.
1006: */
1007: public static final String INDEX_OUTPUT_THRESHOLD = RAMAndFileJdbcIndexOutput.INDEX_OUTPUT_THRESHOLD_SETTING;
1008: }
1009: }
1010:
1011: public static abstract class DirectoryWrapper {
1012:
1013: public static final String PREFIX = "compass.engine.store.wrapper";
1014:
1015: public static final String TYPE = "type";
1016: }
1017:
1018: /**
1019: * Lucene {@link org.apache.lucene.store.LockFactory} creation settings.
1020: */
1021: public static abstract class LockFactory {
1022:
1023: /**
1024: * The settings prefix for LockFactory
1025: */
1026: public static final String PREFIX = "compass.engine.store.lockFactory";
1027:
1028: /**
1029: * The type of the lock factory. Can either hold values stated at {@link Type} or
1030: * the fully qualified class name of the {@link org.apache.lucene.store.LockFactory}
1031: * implementation.
1032: */
1033: public static final String TYPE = PREFIX + ".type";
1034:
1035: /**
1036: * Certain implementation (such as {@link Type#SIMPLE_FS} or {@link Type#NATIVE_FS})
1037: * also accept an optional path where to store the index locking.
1038: */
1039: public static final String PATH = PREFIX + ".path";
1040:
1041: public static abstract class Type {
1042:
1043: /**
1044: * No locking is perfomed, generally should not be used. Maps to Lucene
1045: * {@link org.apache.lucene.store.NoLockFactory}.
1046: */
1047: public static final String NO_LOCKING = "nolock";
1048:
1049: /**
1050: * The default lock factory uses simple FS operations to write a lock file.
1051: * Maps to Lucene {@link org.apache.lucene.store.SimpleFSLockFactory}.
1052: */
1053: public static final String SIMPLE_FS = "simplefs";
1054:
1055: /**
1056: * A native FS lock factory (uses NIO). Maps to Lucene
1057: * {@link org.apache.lucene.store.NativeFSLockFactory}.
1058: */
1059: public static final String NATIVE_FS = "nativefs";
1060:
1061: /**
1062: * A single instance lock fatory (uses memory based ones). Maps to
1063: * Lucene {@link org.apache.lucene.store.SingleInstanceLockFactory}.
1064: */
1065: public static final String SINGLE_INSTANCE = "singleinstance";
1066: }
1067: }
1068:
1069: /**
1070: * Controls Lucene {@link org.compass.core.lucene.LuceneEnvironment.MergeScheduler} configuration.
1071: */
1072: public static abstract class MergeScheduler {
1073:
1074: /**
1075: * The prefix setting for merge scheduler.
1076: */
1077: public static final String PREFIX = "compass.engine.merge.scheduler";
1078:
1079: /**
1080: * The type of the {@link org.compass.core.lucene.engine.merge.scheduler.MergeSchedulerProvider} that
1081: * will be created. Can be one of the constant names of specific types (inner classes) or the
1082: * FQN of a merge scheduler provider.
1083: */
1084: public static final String TYPE = PREFIX + ".type";
1085:
1086: /**
1087: * Allows to cofnigure {@link org.apache.lucene.index.SerialMergeScheduler}.
1088: */
1089: public abstract class Serial {
1090:
1091: /**
1092: * The name of the serial merge scheduler to be used as the merge scheduler type.
1093: */
1094: public static final String NAME = "serial";
1095: }
1096:
1097: /**
1098: * Allows to configure {@link org.apache.lucene.index.ConcurrentMergeScheduler}.
1099: */
1100: public abstract class Concurrent {
1101:
1102: /**
1103: * The name of the concurrent merge scheduler to be used as the merge scheduler type.
1104: */
1105: public static final String NAME = "concurrent";
1106:
1107: /**
1108: * The maximum thread count that can be created for merges.
1109: */
1110: public static final String MAX_THREAD_COUNT = "maxThreadCount";
1111:
1112: /**
1113: * The thread priority of merge threads.
1114: */
1115: public static final String THREAD_PRIORITY = "threadPriority";
1116: }
1117:
1118: /**
1119: * Allows to configure Compass {@link org.apache.lucene.index.ExecutorMergeScheduler}.
1120: */
1121: public abstract class Executor {
1122:
1123: /**
1124: * The name of the executor merge scheduler to be used as the merge scheduler type.
1125: */
1126: public static final String NAME = "executor";
1127:
1128: /**
1129: * The maximum concurrent merges that are allowed to be executed.
1130: */
1131: public static final String MAX_CONCURRENT_MERGE = "maxConcurrentMerge";
1132:
1133: }
1134: }
1135:
1136: /**
1137: * Controls Lucene {@link org.apache.lucene.index.MergePolicy} configuration.
1138: */
1139: public static abstract class MergePolicy {
1140:
1141: /**
1142: * The prefix setting for merge policy.
1143: */
1144: public static final String PREFIX = "compass.engine.merge.policy";
1145:
1146: /**
1147: * The type of the {@link org.compass.core.lucene.engine.merge.policy.MergePolicyProvider} that
1148: * will be created. Can be one of the constant names of specific types (inner classes) or the
1149: * FQN of a merge policy provider.
1150: */
1151: public static final String TYPE = PREFIX + ".type";
1152:
1153: /**
1154: * Allows to cofnigure {@link org.apache.lucene.index.LogByteSizeMergePolicy}.
1155: */
1156: public abstract class LogByteSize {
1157:
1158: /**
1159: * The name of the merge policy to be used with the merge policy type.
1160: */
1161: public static final String NAME = "logbytesize";
1162:
1163: /**
1164: * @see {@link org.apache.lucene.index.LogByteSizeMergePolicy#setMaxMergeMB(double)}.
1165: */
1166: public static final String MAX_MERGE_MB = PREFIX
1167: + ".maxMergeMB";
1168:
1169: /**
1170: * @see {@link org.apache.lucene.index.LogByteSizeMergePolicy#setMinMergeMB(double)}.
1171: */
1172: public static final String MIN_MERGE_MB = PREFIX
1173: + ".minMergeMB";
1174: }
1175:
1176: /**
1177: * Allows to configure {@link org.apache.lucene.index.LogDocMergePolicy}.
1178: */
1179: public abstract class LogDoc {
1180:
1181: /**
1182: * The name of the merge policy to be used with the merge policy type.
1183: */
1184: public static final String NAME = "logdoc";
1185:
1186: /**
1187: * @see {@link org.apache.lucene.index.LogDocMergePolicy#setMaxMergeDocs(int)}.
1188: */
1189: public static final String MAX_MERGE_DOCS = PREFIX
1190: + ".maxMergeDocs";
1191:
1192: /**
1193: * @see {@link org.apache.lucene.index.LogDocMergePolicy#setMinMergeDocs(int)}.
1194: */
1195: public static final String MIN_MERGE_DOCS = PREFIX
1196: + ".minMergeDocs";
1197: }
1198: }
1199:
1200: /**
1201: * Settings used to control Lucene {@link org.apache.lucene.index.IndexDeletionPolicy}
1202: * creation.
1203: */
1204: public static abstract class IndexDeletionPolicy {
1205:
1206: public static final String PREFIX = "compass.engine.store.indexDeletionPolicy";
1207:
1208: /**
1209: * The type of the index deleteion policy. Can eb one of the logical names that
1210: * comes built in with Compass, such as {@link org.compass.core.lucene.LuceneEnvironment.IndexDeletionPolicy.KeepLastCommit#NAME},
1211: * or the fully qualified class name of the actual implementation. In suce a case, the implementation
1212: * can also implement {@link org.compass.core.config.CompassConfigurable} and/or
1213: * {@link org.compass.core.lucene.engine.indexdeletionpolicy.DirectoryConfigurable} in order
1214: * to be further configured.
1215: */
1216: public static final String TYPE = PREFIX + ".type";
1217:
1218: /**
1219: * An index deletion policy that keeps only the last commit. Maps to
1220: * Lucene {@link org.apache.lucene.index.KeepOnlyLastCommitDeletionPolicy}.
1221: */
1222: public static abstract class KeepLastCommit {
1223:
1224: /**
1225: * The name to put under the
1226: */
1227: public static final String NAME = "keeplastcommit";
1228: }
1229:
1230: /**
1231: * An index deletion policy that keeps on the last N number of commits.
1232: * Maps to {@link org.compass.core.lucene.engine.indexdeletionpolicy.KeepLastNDeletionPolicy}.
1233: */
1234: public static abstract class KeepLastN {
1235:
1236: public static final String NAME = "keeplastn";
1237:
1238: public static final String NUM_TO_KEEP = PREFIX
1239: + ".numToKeep";
1240: }
1241:
1242: public static abstract class ExpirationTime {
1243:
1244: public static final String NAME = "expirationtime";
1245:
1246: public static final String EXPIRATION_TIME_IN_SECONDS = PREFIX
1247: + ".expirationTimeInSeconds";
1248: }
1249:
1250: public static abstract class KeepAll {
1251:
1252: public static final String NAME = "keepall";
1253: }
1254:
1255: public static abstract class KeepNoneOnInit {
1256:
1257: public static final String NAME = "keepnoneoninit";
1258: }
1259: }
1260:
1261: /**
1262: * Settings for different query parser implementations.
1263: */
1264: public static abstract class QueryParser {
1265:
1266: /**
1267: * The prefix used for query parser groups.
1268: */
1269: public static final String PREFIX = "compass.engine.queryParser";
1270:
1271: /**
1272: * The type of the query parser. A fully qualified class name, must
1273: * implement {@link org.compass.core.lucene.engine.queryparser.LuceneQueryParser}.
1274: */
1275: public static final String TYPE = "type";
1276:
1277: /**
1278: * The default query parser group that must be set.
1279: */
1280: public static final String DEFAULT_GROUP = "default";
1281:
1282: /**
1283: * The spell check group.
1284: */
1285: public static final String SPELLCHECK_GROUP = "spellcheck";
1286:
1287: /**
1288: * The default parsers implementation allows to set if leading wildcards
1289: * are allowed or not. Boolen value defaults to <code>true</code>.
1290: */
1291: public static final String DEFAULT_PARSER_ALLOW_LEADING_WILDCARD = "allowLeadingWildcard";
1292:
1293: /**
1294: * The default parsers implementation allows to use contanst score prefix query. Constnat score
1295: * prefix query allows for faster prefix queries but lack in highlighting support.
1296: * Boolen value. Defaults to <code>true</code>.
1297: */
1298: public static final String DEFAULT_PARSER_ALLOW_CONSTANT_SCORE_PREFIX_QUERY = "allowConstantScorePrefixQuery";
1299:
1300: /**
1301: * The default operator when parsing query strings. Defaults to <code>AND</code>. Can be either
1302: * <code>AND</code> or <code>OR</code>.
1303: */
1304: public static final String DEFAULT_PARSER_DEFAULT_OPERATOR = "defaultOperator";
1305: }
1306:
1307: public static abstract class LocalCache {
1308:
1309: public static final String DISABLE_LOCAL_CACHE = "compass.engine.disableLocalCache";
1310:
1311: public static final String PREFIX = "compass.engine.localCache";
1312:
1313: public static final String CONNECTION = "connection";
1314:
1315: public static final String DEFAULT_NAME = "__default__";
1316: }
1317: }
|