01: package org.apache.lucene.analysis;
02:
03: /**
04: * Licensed to the Apache Software Foundation (ASF) under one or more
05: * contributor license agreements. See the NOTICE file distributed with
06: * this work for additional information regarding copyright ownership.
07: * The ASF licenses this file to You under the Apache License, Version 2.0
08: * (the "License"); you may not use this file except in compliance with
09: * the License. You may obtain a copy of the License at
10: *
11: * http://www.apache.org/licenses/LICENSE-2.0
12: *
13: * Unless required by applicable law or agreed to in writing, software
14: * distributed under the License is distributed on an "AS IS" BASIS,
15: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16: * See the License for the specific language governing permissions and
17: * limitations under the License.
18: */
19:
20: import org.apache.lucene.index.Payload;
21:
22: import java.io.IOException;
23:
24: /** A TokenStream enumerates the sequence of tokens, either from
25: fields of a document or from query text.
26: <p>
27: This is an abstract class. Concrete subclasses are:
28: <ul>
29: <li>{@link Tokenizer}, a TokenStream
30: whose input is a Reader; and
31: <li>{@link TokenFilter}, a TokenStream
32: whose input is another TokenStream.
33: </ul>
34: NOTE: subclasses must override at least one of {@link
35: #next()} or {@link #next(Token)}.
36: */
37:
38: public abstract class TokenStream {
39:
40: /** Returns the next token in the stream, or null at EOS.
41: * The returned Token is a "full private copy" (not
42: * re-used across calls to next()) but will be slower
43: * than calling {@link #next(Token)} instead.. */
44: public Token next() throws IOException {
45: Token result = next(new Token());
46:
47: if (result != null) {
48: Payload p = result.getPayload();
49: if (p != null) {
50: result.setPayload((Payload) p.clone());
51: }
52: }
53:
54: return result;
55: }
56:
57: /** Returns the next token in the stream, or null at EOS.
58: * When possible, the input Token should be used as the
59: * returned Token (this gives fastest tokenization
60: * performance), but this is not required and a new Token
61: * may be returned. Callers may re-use a single Token
62: * instance for successive calls to this method.
63: * <p>
64: * This implicitly defines a "contract" between
65: * consumers (callers of this method) and
66: * producers (implementations of this method
67: * that are the source for tokens):
68: * <ul>
69: * <li>A consumer must fully consume the previously
70: * returned Token before calling this method again.</li>
71: * <li>A producer must call {@link Token#clear()}
72: * before setting the fields in it & returning it</li>
73: * </ul>
74: * Note that a {@link TokenFilter} is considered a consumer.
75: * @param result a Token that may or may not be used to return
76: * @return next token in the stream or null if end-of-stream was hit
77: */
78: public Token next(Token result) throws IOException {
79: return next();
80: }
81:
82: /** Resets this stream to the beginning. This is an
83: * optional operation, so subclasses may or may not
84: * implement this method. Reset() is not needed for
85: * the standard indexing process. However, if the Tokens
86: * of a TokenStream are intended to be consumed more than
87: * once, it is necessary to implement reset().
88: */
89: public void reset() throws IOException {
90: }
91:
92: /** Releases resources associated with this stream. */
93: public void close() throws IOException {
94: }
95: }
|