001: /**
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */package org.apache.solr.analysis;
017:
018: import org.apache.lucene.analysis.Token;
019: import org.apache.lucene.analysis.TokenStream;
020:
021: import java.io.IOException;
022: import java.util.LinkedList;
023: import java.util.List;
024:
025: /**
026: * Handles input and output buffering of TokenStream
027: *
028: * <pre>
029: * // Example of a class implementing the rule "A" "B" => "Q" "B"
030: * class MyTokenStream extends BufferedTokenStream {
031: * public MyTokenStream(TokenStream input) {super(input);}
032: * protected Token process(Token t) throws IOException {
033: * if ("A".equals(t.termText())) {
034: * Token t2 = read();
035: * if (t2!=null && "B".equals(t2.termText())) t.setTermText("Q");
036: * if (t2!=null) pushBack(t2);
037: * }
038: * return t;
039: * }
040: * }
041: *
042: * // Example of a class implementing "A" "B" => "A" "A" "B"
043: * class MyTokenStream extends BufferedTokenStream {
044: * public MyTokenStream(TokenStream input) {super(input);}
045: * protected Token process(Token t) throws IOException {
046: * if ("A".equals(t.termText()) && "B".equals(peek(1).termText()))
047: * write(t);
048: * return t;
049: * }
050: * }
051: * </pre>
052: *
053: *
054: * @author yonik
055: * @version $Id$
056: */
057: public abstract class BufferedTokenStream extends TokenStream {
058: // in the futute, might be faster if we implemented as an array based CircularQueue
059: private final LinkedList<Token> inQueue = new LinkedList<Token>();
060: private final LinkedList<Token> outQueue = new LinkedList<Token>();
061: private final TokenStream input;
062:
063: public BufferedTokenStream(TokenStream input) {
064: this .input = input;
065: }
066:
067: /**
068: * Process a token. Subclasses may read more tokens from the input stream,
069: * write more tokens to the output stream, or simply return the next token
070: * to be output. Subclasses may return null if the token is to be dropped.
071: * If a subclass writes tokens to the output stream and returns a
072: * non-null Token, the returned Token is considered to be at the head of
073: * the token output stream.
074: */
075: protected abstract Token process(Token t) throws IOException;
076:
077: public final Token next() throws IOException {
078: while (true) {
079: if (!outQueue.isEmpty())
080: return outQueue.removeFirst();
081: Token t = read();
082: if (null == t)
083: return null;
084: Token out = process(t);
085: if (null != out)
086: return out;
087: // loop back to top in case process() put something on the output queue
088: }
089: }
090:
091: /**
092: * Read a token from the buffered input stream.
093: * @return null at EOS
094: */
095: protected Token read() throws IOException {
096: if (inQueue.isEmpty()) {
097: Token t = input.next();
098: return t;
099: }
100: return inQueue.removeFirst();
101: }
102:
103: /**
104: * Push a token back into the buffered input stream, such that it will
105: * be returned by a future call to <code>read()</code>
106: */
107: protected void pushBack(Token t) {
108: inQueue.addFirst(t);
109: }
110:
111: /**
112: * Peek n tokens ahead in the buffered input stream, without modifying
113: * the stream.
114: * @param n Number of tokens into the input stream to peek, 1 based ...
115: * 0 is invalid
116: * @return a Token which exists in the input stream, any modifications
117: * made to this Token will be "real" if/when the Token is
118: * <code>read()</code> from the stream.
119: */
120: protected Token peek(int n) throws IOException {
121: int fillCount = n - inQueue.size();
122: for (int i = 0; i < fillCount; i++) {
123: Token t = input.next();
124: if (null == t)
125: return null;
126: inQueue.addLast(t);
127: }
128: return inQueue.get(n - 1);
129: }
130:
131: /**
132: * Write a token to the buffered output stream
133: */
134: protected void write(Token t) {
135: outQueue.addLast(t);
136: }
137:
138: /**
139: * Provides direct Iterator access to the buffered output stream.
140: * Modifying any token in this Iterator will affect the resulting stream.
141: */
142: protected Iterable<Token> output() {
143: return outQueue;
144: }
145:
146: }
|