01: package org.apache.lucene.search.highlight;
02:
03: /**
04: * Licensed to the Apache Software Foundation (ASF) under one or more
05: * contributor license agreements. See the NOTICE file distributed with
06: * this work for additional information regarding copyright ownership.
07: * The ASF licenses this file to You under the Apache License, Version 2.0
08: * (the "License"); you may not use this file except in compliance with
09: * the License. You may obtain a copy of the License at
10: *
11: * http://www.apache.org/licenses/LICENSE-2.0
12: *
13: * Unless required by applicable law or agreed to in writing, software
14: * distributed under the License is distributed on an "AS IS" BASIS,
15: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16: * See the License for the specific language governing permissions and
17: * limitations under the License.
18: */
19:
20: import org.apache.lucene.analysis.Token;
21:
22: /**
23: * {@link Fragmenter} implementation which breaks text up into same-size
24: * fragments with no concerns over spotting sentence boundaries.
25: * @author mark@searcharea.co.uk
26: */
27: public class SimpleFragmenter implements Fragmenter {
28: private static final int DEFAULT_FRAGMENT_SIZE = 100;
29: private int currentNumFrags;
30: private int fragmentSize;
31:
32: public SimpleFragmenter() {
33: this (DEFAULT_FRAGMENT_SIZE);
34: }
35:
36: /**
37: *
38: * @param fragmentSize size in bytes of each fragment
39: */
40: public SimpleFragmenter(int fragmentSize) {
41: this .fragmentSize = fragmentSize;
42: }
43:
44: /* (non-Javadoc)
45: * @see org.apache.lucene.search.highlight.TextFragmenter#start(java.lang.String)
46: */
47: public void start(String originalText) {
48: currentNumFrags = 1;
49: }
50:
51: /* (non-Javadoc)
52: * @see org.apache.lucene.search.highlight.TextFragmenter#isNewFragment(org.apache.lucene.analysis.Token)
53: */
54: public boolean isNewFragment(Token token) {
55: boolean isNewFrag = token.endOffset() >= (fragmentSize * currentNumFrags);
56: if (isNewFrag) {
57: currentNumFrags++;
58: }
59: return isNewFrag;
60: }
61:
62: /**
63: * @return size in bytes of each fragment
64: */
65: public int getFragmentSize() {
66: return fragmentSize;
67: }
68:
69: /**
70: * @param size size in bytes of each fragment
71: */
72: public void setFragmentSize(int size) {
73: fragmentSize = size;
74: }
75:
76: }
|