001: package org.apache.lucene.index;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import java.io.IOException;
021: import java.io.UnsupportedEncodingException;
022: import java.util.HashMap;
023: import java.util.Map;
024:
025: import org.apache.lucene.analysis.Analyzer;
026: import org.apache.lucene.analysis.WhitespaceAnalyzer;
027: import org.apache.lucene.document.Document;
028: import org.apache.lucene.document.Field;
029: import org.apache.lucene.document.Fieldable;
030: import org.apache.lucene.search.Similarity;
031: import org.apache.lucene.store.Directory;
032:
033: class DocHelper {
034: public static final String FIELD_1_TEXT = "field one text";
035: public static final String TEXT_FIELD_1_KEY = "textField1";
036: public static Field textField1 = new Field(TEXT_FIELD_1_KEY,
037: FIELD_1_TEXT, Field.Store.YES, Field.Index.TOKENIZED,
038: Field.TermVector.NO);
039:
040: public static final String FIELD_2_TEXT = "field field field two text";
041: //Fields will be lexicographically sorted. So, the order is: field, text, two
042: public static final int[] FIELD_2_FREQS = { 3, 1, 1 };
043: public static final String TEXT_FIELD_2_KEY = "textField2";
044: public static Field textField2 = new Field(TEXT_FIELD_2_KEY,
045: FIELD_2_TEXT, Field.Store.YES, Field.Index.TOKENIZED,
046: Field.TermVector.WITH_POSITIONS_OFFSETS);
047:
048: public static final String FIELD_2_COMPRESSED_TEXT = "field field field two text";
049: //Fields will be lexicographically sorted. So, the order is: field, text, two
050: public static final int[] COMPRESSED_FIELD_2_FREQS = { 3, 1, 1 };
051: public static final String COMPRESSED_TEXT_FIELD_2_KEY = "compressedTextField2";
052: public static Field compressedTextField2 = new Field(
053: COMPRESSED_TEXT_FIELD_2_KEY, FIELD_2_COMPRESSED_TEXT,
054: Field.Store.COMPRESS, Field.Index.TOKENIZED,
055: Field.TermVector.WITH_POSITIONS_OFFSETS);
056:
057: public static final String FIELD_3_TEXT = "aaaNoNorms aaaNoNorms bbbNoNorms";
058: public static final String TEXT_FIELD_3_KEY = "textField3";
059: public static Field textField3 = new Field(TEXT_FIELD_3_KEY,
060: FIELD_3_TEXT, Field.Store.YES, Field.Index.TOKENIZED);
061: static {
062: textField3.setOmitNorms(true);
063: }
064:
065: public static final String KEYWORD_TEXT = "Keyword";
066: public static final String KEYWORD_FIELD_KEY = "keyField";
067: public static Field keyField = new Field(KEYWORD_FIELD_KEY,
068: KEYWORD_TEXT, Field.Store.YES, Field.Index.UN_TOKENIZED);
069:
070: public static final String NO_NORMS_TEXT = "omitNormsText";
071: public static final String NO_NORMS_KEY = "omitNorms";
072: public static Field noNormsField = new Field(NO_NORMS_KEY,
073: NO_NORMS_TEXT, Field.Store.YES, Field.Index.NO_NORMS);
074:
075: public static final String UNINDEXED_FIELD_TEXT = "unindexed field text";
076: public static final String UNINDEXED_FIELD_KEY = "unIndField";
077: public static Field unIndField = new Field(UNINDEXED_FIELD_KEY,
078: UNINDEXED_FIELD_TEXT, Field.Store.YES, Field.Index.NO);
079:
080: public static final String UNSTORED_1_FIELD_TEXT = "unstored field text";
081: public static final String UNSTORED_FIELD_1_KEY = "unStoredField1";
082: public static Field unStoredField1 = new Field(
083: UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT,
084: Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO);
085:
086: public static final String UNSTORED_2_FIELD_TEXT = "unstored field text";
087: public static final String UNSTORED_FIELD_2_KEY = "unStoredField2";
088: public static Field unStoredField2 = new Field(
089: UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT,
090: Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.YES);
091:
092: public static final String LAZY_FIELD_BINARY_KEY = "lazyFieldBinary";
093: public static byte[] LAZY_FIELD_BINARY_BYTES;
094: public static Field lazyFieldBinary;
095:
096: public static final String LAZY_FIELD_KEY = "lazyField";
097: public static final String LAZY_FIELD_TEXT = "These are some field bytes";
098: public static Field lazyField = new Field(LAZY_FIELD_KEY,
099: LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.TOKENIZED);
100:
101: public static final String LARGE_LAZY_FIELD_KEY = "largeLazyField";
102: public static String LARGE_LAZY_FIELD_TEXT;
103: public static Field largeLazyField;
104:
105: //From Issue 509
106: public static final String FIELD_UTF1_TEXT = "field one \u4e00text";
107: public static final String TEXT_FIELD_UTF1_KEY = "textField1Utf8";
108: public static Field textUtfField1 = new Field(TEXT_FIELD_UTF1_KEY,
109: FIELD_UTF1_TEXT, Field.Store.YES, Field.Index.TOKENIZED,
110: Field.TermVector.NO);
111:
112: public static final String FIELD_UTF2_TEXT = "field field field \u4e00two text";
113: //Fields will be lexicographically sorted. So, the order is: field, text, two
114: public static final int[] FIELD_UTF2_FREQS = { 3, 1, 1 };
115: public static final String TEXT_FIELD_UTF2_KEY = "textField2Utf8";
116: public static Field textUtfField2 = new Field(TEXT_FIELD_UTF2_KEY,
117: FIELD_UTF2_TEXT, Field.Store.YES, Field.Index.TOKENIZED,
118: Field.TermVector.WITH_POSITIONS_OFFSETS);
119:
120: public static Map nameValues = null;
121:
122: // ordered list of all the fields...
123: // could use LinkedHashMap for this purpose if Java1.4 is OK
124: public static Field[] fields = new Field[] { textField1,
125: textField2, textField3, compressedTextField2, keyField,
126: noNormsField, unIndField, unStoredField1, unStoredField2,
127: textUtfField1, textUtfField2, lazyField, lazyFieldBinary,//placeholder for binary field, since this is null. It must be second to last.
128: largeLazyField //placeholder for large field, since this is null. It must always be last
129: };
130:
131: // Map<String fieldName, Fieldable field>
132: public static Map all = new HashMap();
133: public static Map indexed = new HashMap();
134: public static Map stored = new HashMap();
135: public static Map unstored = new HashMap();
136: public static Map unindexed = new HashMap();
137: public static Map termvector = new HashMap();
138: public static Map notermvector = new HashMap();
139: public static Map lazy = new HashMap();
140: public static Map noNorms = new HashMap();
141:
142: static {
143: //Initialize the large Lazy Field
144: StringBuffer buffer = new StringBuffer();
145: for (int i = 0; i < 10000; i++) {
146: buffer
147: .append("Lazily loading lengths of language in lieu of laughing ");
148: }
149:
150: try {
151: LAZY_FIELD_BINARY_BYTES = "These are some binary field bytes"
152: .getBytes("UTF8");
153: } catch (UnsupportedEncodingException e) {
154: }
155: lazyFieldBinary = new Field(LAZY_FIELD_BINARY_KEY,
156: LAZY_FIELD_BINARY_BYTES, Field.Store.YES);
157: fields[fields.length - 2] = lazyFieldBinary;
158: LARGE_LAZY_FIELD_TEXT = buffer.toString();
159: largeLazyField = new Field(LARGE_LAZY_FIELD_KEY,
160: LARGE_LAZY_FIELD_TEXT, Field.Store.YES,
161: Field.Index.TOKENIZED);
162: fields[fields.length - 1] = largeLazyField;
163: for (int i = 0; i < fields.length; i++) {
164: Fieldable f = fields[i];
165: add(all, f);
166: if (f.isIndexed())
167: add(indexed, f);
168: else
169: add(unindexed, f);
170: if (f.isTermVectorStored())
171: add(termvector, f);
172: if (f.isIndexed() && !f.isTermVectorStored())
173: add(notermvector, f);
174: if (f.isStored())
175: add(stored, f);
176: else
177: add(unstored, f);
178: if (f.getOmitNorms())
179: add(noNorms, f);
180: if (f.isLazy())
181: add(lazy, f);
182: }
183: }
184:
185: private static void add(Map map, Fieldable field) {
186: map.put(field.name(), field);
187: }
188:
189: static {
190: nameValues = new HashMap();
191: nameValues.put(TEXT_FIELD_1_KEY, FIELD_1_TEXT);
192: nameValues.put(TEXT_FIELD_2_KEY, FIELD_2_TEXT);
193: nameValues.put(COMPRESSED_TEXT_FIELD_2_KEY,
194: FIELD_2_COMPRESSED_TEXT);
195: nameValues.put(TEXT_FIELD_3_KEY, FIELD_3_TEXT);
196: nameValues.put(KEYWORD_FIELD_KEY, KEYWORD_TEXT);
197: nameValues.put(NO_NORMS_KEY, NO_NORMS_TEXT);
198: nameValues.put(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT);
199: nameValues.put(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT);
200: nameValues.put(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT);
201: nameValues.put(LAZY_FIELD_KEY, LAZY_FIELD_TEXT);
202: nameValues.put(LAZY_FIELD_BINARY_KEY, LAZY_FIELD_BINARY_BYTES);
203: nameValues.put(LARGE_LAZY_FIELD_KEY, LARGE_LAZY_FIELD_TEXT);
204: nameValues.put(TEXT_FIELD_UTF1_KEY, FIELD_UTF1_TEXT);
205: nameValues.put(TEXT_FIELD_UTF2_KEY, FIELD_UTF2_TEXT);
206: }
207:
208: /**
209: * Adds the fields above to a document
210: * @param doc The document to write
211: */
212: public static void setupDoc(Document doc) {
213: for (int i = 0; i < fields.length; i++) {
214: doc.add(fields[i]);
215: }
216: }
217:
218: /**
219: * Writes the document to the directory using a segment
220: * named "test"; returns the SegmentInfo describing the new
221: * segment
222: * @param dir
223: * @param doc
224: * @throws IOException
225: */
226: public static SegmentInfo writeDoc(Directory dir, Document doc)
227: throws IOException {
228: return writeDoc(dir, new WhitespaceAnalyzer(), Similarity
229: .getDefault(), doc);
230: }
231:
232: /**
233: * Writes the document to the directory using the analyzer
234: * and the similarity score; returns the SegmentInfo
235: * describing the new segment
236: * @param dir
237: * @param analyzer
238: * @param similarity
239: * @param doc
240: * @throws IOException
241: */
242: public static SegmentInfo writeDoc(Directory dir,
243: Analyzer analyzer, Similarity similarity, Document doc)
244: throws IOException {
245: IndexWriter writer = new IndexWriter(dir, analyzer);
246: writer.setSimilarity(similarity);
247: //writer.setUseCompoundFile(false);
248: writer.addDocument(doc);
249: writer.flush();
250: SegmentInfo info = writer.newestSegment();
251: writer.close();
252: return info;
253: }
254:
255: public static int numFields(Document doc) {
256: return doc.getFields().size();
257: }
258: }
|