001: package org.apache.lucene.search.highlight;
002:
003: /**
004: * Copyright 2002-2004 The Apache Software Foundation
005: *
006: * Licensed under the Apache License, Version 2.0 (the "License");
007: * you may not use this file except in compliance with the License.
008: * You may obtain a copy of the License at
009: *
010: * http://www.apache.org/licenses/LICENSE-2.0
011: *
012: * Unless required by applicable law or agreed to in writing, software
013: * distributed under the License is distributed on an "AS IS" BASIS,
014: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015: * See the License for the specific language governing permissions and
016: * limitations under the License.
017: */
018: import org.apache.lucene.analysis.Token;
019:
020: /**
021: * One, or several overlapping tokens, along with the score(s) and the scope of
022: * the original text
023: *
024: * @author MAHarwood
025: */
026: public class TokenGroup {
027:
028: private static final int MAX_NUM_TOKENS_PER_GROUP = 50;
029:
030: Token[] tokens = new Token[MAX_NUM_TOKENS_PER_GROUP];
031:
032: float[] scores = new float[MAX_NUM_TOKENS_PER_GROUP];
033:
034: int numTokens = 0;
035:
036: int startOffset = 0;
037:
038: int endOffset = 0;
039:
040: void addToken(Token token, float score) {
041: if (numTokens < MAX_NUM_TOKENS_PER_GROUP) {
042: if (numTokens == 0) {
043: startOffset = token.startOffset();
044: endOffset = token.endOffset();
045: } else {
046: startOffset = Math
047: .min(startOffset, token.startOffset());
048: endOffset = Math.max(endOffset, token.endOffset());
049: }
050: tokens[numTokens] = token;
051: scores[numTokens] = score;
052: numTokens++;
053: }
054: }
055:
056: boolean isDistinct(Token token) {
057: return token.startOffset() >= endOffset;
058: }
059:
060: void clear() {
061: numTokens = 0;
062: }
063:
064: /**
065: * @param index
066: * a value between 0 and numTokens -1
067: * @return the "n"th token
068: */
069: public Token getToken(int index) {
070: return tokens[index];
071: }
072:
073: /**
074: * @param index
075: * a value between 0 and numTokens -1
076: * @return the "n"th score
077: */
078: public float getScore(int index) {
079: return scores[index];
080: }
081:
082: /**
083: * @return the end position in the original text
084: */
085: public int getEndOffset() {
086: return endOffset;
087: }
088:
089: /**
090: * @return the number of tokens in this group
091: */
092: public int getNumTokens() {
093: return numTokens;
094: }
095:
096: /**
097: * @return the start position in the original text
098: */
099: public int getStartOffset() {
100: return startOffset;
101: }
102:
103: /**
104: * @return all tokens' scores summed up
105: */
106: public float getTotalScore() {
107: float total = 0;
108: for (int i = 0; i < numTokens; i++) {
109: total += scores[i];
110: }
111: return total;
112: }
113: }
|