01: package org.apache.lucene.search.highlight;
02:
03: /**
04: * Copyright 2002-2004 The Apache Software Foundation
05: *
06: * Licensed under the Apache License, Version 2.0 (the "License");
07: * you may not use this file except in compliance with the License.
08: * You may obtain a copy of the License at
09: *
10: * http://www.apache.org/licenses/LICENSE-2.0
11: *
12: * Unless required by applicable law or agreed to in writing, software
13: * distributed under the License is distributed on an "AS IS" BASIS,
14: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15: * See the License for the specific language governing permissions and
16: * limitations under the License.
17: */
18:
19: import org.apache.lucene.analysis.Token;
20:
21: /**
22: * {@link Fragmenter} implementation which breaks text up into same-size
23: * fragments with no concerns over spotting sentence boundaries.
24: * @author mark@searcharea.co.uk
25: */
26: public class SimpleFragmenter implements Fragmenter {
27: private static final int DEFAULT_FRAGMENT_SIZE = 100;
28: private int currentNumFrags;
29: private int fragmentSize;
30:
31: public SimpleFragmenter() {
32: this (DEFAULT_FRAGMENT_SIZE);
33: }
34:
35: /**
36: *
37: * @param fragmentSize size in bytes of each fragment
38: */
39: public SimpleFragmenter(int fragmentSize) {
40: this .fragmentSize = fragmentSize;
41: }
42:
43: /* (non-Javadoc)
44: * @see org.apache.lucene.search.highlight.TextFragmenter#start(java.lang.String)
45: */
46: public void start(String originalText) {
47: currentNumFrags = 1;
48: }
49:
50: /* (non-Javadoc)
51: * @see org.apache.lucene.search.highlight.TextFragmenter#isNewFragment(org.apache.lucene.analysis.Token)
52: */
53: public boolean isNewFragment(Token token) {
54: boolean isNewFrag = token.endOffset() >= (fragmentSize * currentNumFrags);
55: if (isNewFrag) {
56: currentNumFrags++;
57: }
58: return isNewFrag;
59: }
60:
61: /**
62: * @return size in bytes of each fragment
63: */
64: public int getFragmentSize() {
65: return fragmentSize;
66: }
67:
68: /**
69: * @param size size in bytes of each fragment
70: */
71: public void setFragmentSize(int size) {
72: fragmentSize = size;
73: }
74:
75: }
|