01: package org.apache.lucene.search.highlight;
02:
03: /**
04: * Copyright 2002-2004 The Apache Software Foundation
05: *
06: * Licensed under the Apache License, Version 2.0 (the "License");
07: * you may not use this file except in compliance with the License.
08: * You may obtain a copy of the License at
09: *
10: * http://www.apache.org/licenses/LICENSE-2.0
11: *
12: * Unless required by applicable law or agreed to in writing, software
13: * distributed under the License is distributed on an "AS IS" BASIS,
14: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15: * See the License for the specific language governing permissions and
16: * limitations under the License.
17: */
18:
19: import org.apache.lucene.analysis.Token;
20:
21: /**
22: * {@link Fragmenter} implementation which breaks text up into same-size
23: * fragments with no concerns over spotting sentence boundaries.
24: *
25: * @author mark@searcharea.co.uk
26: */
27: public class SimpleFragmenter implements Fragmenter {
28: private static final int DEFAULT_FRAGMENT_SIZE = 100;
29:
30: private int currentNumFrags;
31:
32: private int fragmentSize;
33:
34: public SimpleFragmenter() {
35: this (DEFAULT_FRAGMENT_SIZE);
36: }
37:
38: /**
39: * @param fragmentSize
40: * size in bytes of each fragment
41: */
42: public SimpleFragmenter(int fragmentSize) {
43: this .fragmentSize = fragmentSize;
44: }
45:
46: /*
47: * (non-Javadoc)
48: *
49: * @see org.apache.lucene.search.highlight.TextFragmenter#start(java.lang.String)
50: */
51: public void start(String originalText) {
52: currentNumFrags = 1;
53: }
54:
55: /*
56: * (non-Javadoc)
57: *
58: * @see org.apache.lucene.search.highlight.TextFragmenter#isNewFragment(org.apache.lucene.analysis.Token)
59: */
60: public boolean isNewFragment(Token token) {
61: boolean isNewFrag = token.endOffset() >= (fragmentSize * currentNumFrags);
62: if (isNewFrag) {
63: currentNumFrags++;
64: }
65: return isNewFrag;
66: }
67:
68: /**
69: * @return size in bytes of each fragment
70: */
71: public int getFragmentSize() {
72: return fragmentSize;
73: }
74:
75: /**
76: * @param size
77: * size in bytes of each fragment
78: */
79: public void setFragmentSize(int size) {
80: fragmentSize = size;
81: }
82:
83: }
|