001: package org.apache.lucene.search.highlight;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019: import org.apache.lucene.analysis.Token;
020:
021: /**
022: * One, or several overlapping tokens, along with the score(s) and the
023: * scope of the original text
024: * @author MAHarwood
025: */
026: public class TokenGroup {
027:
028: private static final int MAX_NUM_TOKENS_PER_GROUP = 50;
029: Token[] tokens = new Token[MAX_NUM_TOKENS_PER_GROUP];
030: float[] scores = new float[MAX_NUM_TOKENS_PER_GROUP];
031: int numTokens = 0;
032: int startOffset = 0;
033: int endOffset = 0;
034: float tot;
035:
036: int matchStartOffset, matchEndOffset;
037:
038: void addToken(Token token, float score) {
039: if (numTokens < MAX_NUM_TOKENS_PER_GROUP) {
040: if (numTokens == 0) {
041: startOffset = matchStartOffset = token.startOffset();
042: endOffset = matchEndOffset = token.endOffset();
043: tot += score;
044: } else {
045: startOffset = Math
046: .min(startOffset, token.startOffset());
047: endOffset = Math.max(endOffset, token.endOffset());
048: if (score > 0) {
049: if (tot == 0) {
050: matchStartOffset = token.startOffset();
051: matchEndOffset = token.endOffset();
052: } else {
053: matchStartOffset = Math.min(matchStartOffset,
054: token.startOffset());
055: matchEndOffset = Math.max(matchEndOffset, token
056: .endOffset());
057: }
058: tot += score;
059: }
060: }
061: tokens[numTokens] = token;
062: scores[numTokens] = score;
063: numTokens++;
064: }
065: }
066:
067: boolean isDistinct(Token token) {
068: return token.startOffset() >= endOffset;
069: }
070:
071: void clear() {
072: numTokens = 0;
073: tot = 0;
074: }
075:
076: /**
077: *
078: * @param index a value between 0 and numTokens -1
079: * @return the "n"th token
080: */
081: public Token getToken(int index) {
082: return tokens[index];
083: }
084:
085: /**
086: *
087: * @param index a value between 0 and numTokens -1
088: * @return the "n"th score
089: */
090: public float getScore(int index) {
091: return scores[index];
092: }
093:
094: /**
095: * @return the end position in the original text
096: */
097: public int getEndOffset() {
098: return endOffset;
099: }
100:
101: /**
102: * @return the number of tokens in this group
103: */
104: public int getNumTokens() {
105: return numTokens;
106: }
107:
108: /**
109: * @return the start position in the original text
110: */
111: public int getStartOffset() {
112: return startOffset;
113: }
114:
115: /**
116: * @return all tokens' scores summed up
117: */
118: public float getTotalScore() {
119: return tot;
120: }
121: }
|