001: package org.apache.lucene.document;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import org.apache.lucene.index.IndexReader;
021: import org.apache.lucene.search.Hits;
022: import org.apache.lucene.search.Searcher;
023:
024: import java.util.*; // for javadoc
025:
026: /** Documents are the unit of indexing and search.
027: *
028: * A Document is a set of fields. Each field has a name and a textual value.
029: * A field may be {@link Fieldable#isStored() stored} with the document, in which
030: * case it is returned with search hits on the document. Thus each document
031: * should typically contain one or more stored fields which uniquely identify
032: * it.
033: *
034: * <p>Note that fields which are <i>not</i> {@link Fieldable#isStored() stored} are
035: * <i>not</i> available in documents retrieved from the index, e.g. with {@link
036: * Hits#doc(int)}, {@link Searcher#doc(int)} or {@link
037: * IndexReader#document(int)}.
038: */
039:
040: public final class Document implements java.io.Serializable {
041: List fields = new ArrayList();
042: private float boost = 1.0f;
043:
044: /** Constructs a new document with no fields. */
045: public Document() {
046: }
047:
048: /** Sets a boost factor for hits on any field of this document. This value
049: * will be multiplied into the score of all hits on this document.
050: *
051: * <p>The default value is 1.0.
052: *
053: * <p>Values are multiplied into the value of {@link Fieldable#getBoost()} of
054: * each field in this document. Thus, this method in effect sets a default
055: * boost for the fields of this document.
056: *
057: * @see Fieldable#setBoost(float)
058: */
059: public void setBoost(float boost) {
060: this .boost = boost;
061: }
062:
063: /** Returns, at indexing time, the boost factor as set by {@link #setBoost(float)}.
064: *
065: * <p>Note that once a document is indexed this value is no longer available
066: * from the index. At search time, for retrieved documents, this method always
067: * returns 1. This however does not mean that the boost value set at indexing
068: * time was ignored - it was just combined with other indexing time factors and
069: * stored elsewhere, for better indexing and search performance. (For more
070: * information see the "norm(t,d)" part of the scoring formula in
071: * {@link org.apache.lucene.search.Similarity Similarity}.)
072: *
073: * @see #setBoost(float)
074: */
075: public float getBoost() {
076: return boost;
077: }
078:
079: /**
080: * <p>Adds a field to a document. Several fields may be added with
081: * the same name. In this case, if the fields are indexed, their text is
082: * treated as though appended for the purposes of search.</p>
083: * <p> Note that add like the removeField(s) methods only makes sense
084: * prior to adding a document to an index. These methods cannot
085: * be used to change the content of an existing index! In order to achieve this,
086: * a document has to be deleted from an index and a new changed version of that
087: * document has to be added.</p>
088: */
089: public final void add(Fieldable field) {
090: fields.add(field);
091: }
092:
093: /**
094: * <p>Removes field with the specified name from the document.
095: * If multiple fields exist with this name, this method removes the first field that has been added.
096: * If there is no field with the specified name, the document remains unchanged.</p>
097: * <p> Note that the removeField(s) methods like the add method only make sense
098: * prior to adding a document to an index. These methods cannot
099: * be used to change the content of an existing index! In order to achieve this,
100: * a document has to be deleted from an index and a new changed version of that
101: * document has to be added.</p>
102: */
103: public final void removeField(String name) {
104: Iterator it = fields.iterator();
105: while (it.hasNext()) {
106: Fieldable field = (Fieldable) it.next();
107: if (field.name().equals(name)) {
108: it.remove();
109: return;
110: }
111: }
112: }
113:
114: /**
115: * <p>Removes all fields with the given name from the document.
116: * If there is no field with the specified name, the document remains unchanged.</p>
117: * <p> Note that the removeField(s) methods like the add method only make sense
118: * prior to adding a document to an index. These methods cannot
119: * be used to change the content of an existing index! In order to achieve this,
120: * a document has to be deleted from an index and a new changed version of that
121: * document has to be added.</p>
122: */
123: public final void removeFields(String name) {
124: Iterator it = fields.iterator();
125: while (it.hasNext()) {
126: Fieldable field = (Fieldable) it.next();
127: if (field.name().equals(name)) {
128: it.remove();
129: }
130: }
131: }
132:
133: /** Returns a field with the given name if any exist in this document, or
134: * null. If multiple fields exists with this name, this method returns the
135: * first value added.
136: * Do not use this method with lazy loaded fields.
137: */
138: public final Field getField(String name) {
139: for (int i = 0; i < fields.size(); i++) {
140: Field field = (Field) fields.get(i);
141: if (field.name().equals(name))
142: return field;
143: }
144: return null;
145: }
146:
147: /** Returns a field with the given name if any exist in this document, or
148: * null. If multiple fields exists with this name, this method returns the
149: * first value added.
150: */
151: public Fieldable getFieldable(String name) {
152: for (int i = 0; i < fields.size(); i++) {
153: Fieldable field = (Fieldable) fields.get(i);
154: if (field.name().equals(name))
155: return field;
156: }
157: return null;
158: }
159:
160: /** Returns the string value of the field with the given name if any exist in
161: * this document, or null. If multiple fields exist with this name, this
162: * method returns the first value added. If only binary fields with this name
163: * exist, returns null.
164: */
165: public final String get(String name) {
166: for (int i = 0; i < fields.size(); i++) {
167: Fieldable field = (Fieldable) fields.get(i);
168: if (field.name().equals(name) && (!field.isBinary()))
169: return field.stringValue();
170: }
171: return null;
172: }
173:
174: /** Returns an Enumeration of all the fields in a document.
175: * @deprecated use {@link #getFields()} instead
176: */
177: public final Enumeration fields() {
178: return new Enumeration() {
179: final Iterator iter = fields.iterator();
180:
181: public boolean hasMoreElements() {
182: return iter.hasNext();
183: }
184:
185: public Object nextElement() {
186: return iter.next();
187: }
188: };
189: }
190:
191: /** Returns a List of all the fields in a document.
192: * <p>Note that fields which are <i>not</i> {@link Fieldable#isStored() stored} are
193: * <i>not</i> available in documents retrieved from the index, e.g. with {@link
194: * Hits#doc(int)}, {@link Searcher#doc(int)} or {@link IndexReader#document(int)}.
195: */
196: public final List getFields() {
197: return fields;
198: }
199:
200: /**
201: * Returns an array of {@link Field}s with the given name.
202: * This method can return <code>null</code>.
203: * Do not use with lazy loaded fields.
204: *
205: * @param name the name of the field
206: * @return a <code>Field[]</code> array
207: */
208: public final Field[] getFields(String name) {
209: List result = new ArrayList();
210: for (int i = 0; i < fields.size(); i++) {
211: Field field = (Field) fields.get(i);
212: if (field.name().equals(name)) {
213: result.add(field);
214: }
215: }
216:
217: if (result.size() == 0)
218: return null;
219:
220: return (Field[]) result.toArray(new Field[result.size()]);
221: }
222:
223: /**
224: * Returns an array of {@link Fieldable}s with the given name.
225: * This method can return <code>null</code>.
226: *
227: * @param name the name of the field
228: * @return a <code>Fieldable[]</code> array or <code>null</code>
229: */
230: public Fieldable[] getFieldables(String name) {
231: List result = new ArrayList();
232: for (int i = 0; i < fields.size(); i++) {
233: Fieldable field = (Fieldable) fields.get(i);
234: if (field.name().equals(name)) {
235: result.add(field);
236: }
237: }
238:
239: if (result.size() == 0)
240: return null;
241:
242: return (Fieldable[]) result
243: .toArray(new Fieldable[result.size()]);
244: }
245:
246: /**
247: * Returns an array of values of the field specified as the method parameter.
248: * This method can return <code>null</code>.
249: *
250: * @param name the name of the field
251: * @return a <code>String[]</code> of field values or <code>null</code>
252: */
253: public final String[] getValues(String name) {
254: List result = new ArrayList();
255: for (int i = 0; i < fields.size(); i++) {
256: Fieldable field = (Fieldable) fields.get(i);
257: if (field.name().equals(name) && (!field.isBinary()))
258: result.add(field.stringValue());
259: }
260:
261: if (result.size() == 0)
262: return null;
263:
264: return (String[]) result.toArray(new String[result.size()]);
265: }
266:
267: /**
268: * Returns an array of byte arrays for of the fields that have the name specified
269: * as the method parameter. This method will return <code>null</code> if no
270: * binary fields with the specified name are available.
271: *
272: * @param name the name of the field
273: * @return a <code>byte[][]</code> of binary field values or <code>null</code>
274: */
275: public final byte[][] getBinaryValues(String name) {
276: List result = new ArrayList();
277: for (int i = 0; i < fields.size(); i++) {
278: Fieldable field = (Fieldable) fields.get(i);
279: if (field.name().equals(name) && (field.isBinary()))
280: result.add(field.binaryValue());
281: }
282:
283: if (result.size() == 0)
284: return null;
285:
286: return (byte[][]) result.toArray(new byte[result.size()][]);
287: }
288:
289: /**
290: * Returns an array of bytes for the first (or only) field that has the name
291: * specified as the method parameter. This method will return <code>null</code>
292: * if no binary fields with the specified name are available.
293: * There may be non-binary fields with the same name.
294: *
295: * @param name the name of the field.
296: * @return a <code>byte[]</code> containing the binary field value or <code>null</code>
297: */
298: public final byte[] getBinaryValue(String name) {
299: for (int i = 0; i < fields.size(); i++) {
300: Fieldable field = (Fieldable) fields.get(i);
301: if (field.name().equals(name) && (field.isBinary()))
302: return field.binaryValue();
303: }
304: return null;
305: }
306:
307: /** Prints the fields of a document for human consumption. */
308: public final String toString() {
309: StringBuffer buffer = new StringBuffer();
310: buffer.append("Document<");
311: for (int i = 0; i < fields.size(); i++) {
312: Fieldable field = (Fieldable) fields.get(i);
313: buffer.append(field.toString());
314: if (i != fields.size() - 1)
315: buffer.append(" ");
316: }
317: buffer.append(">");
318: return buffer.toString();
319: }
320: }
|