001: package org.apache.lucene.benchmark.byTask;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import org.apache.lucene.analysis.Analyzer;
021: import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
022: import org.apache.lucene.benchmark.byTask.feeds.HTMLParser;
023: import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
024: import org.apache.lucene.benchmark.byTask.stats.Points;
025: import org.apache.lucene.benchmark.byTask.tasks.ReadTask;
026: import org.apache.lucene.benchmark.byTask.tasks.SearchTask;
027: import org.apache.lucene.benchmark.byTask.utils.Config;
028: import org.apache.lucene.benchmark.byTask.utils.FileUtils;
029: import org.apache.lucene.index.IndexReader;
030: import org.apache.lucene.index.IndexWriter;
031: import org.apache.lucene.store.Directory;
032: import org.apache.lucene.store.FSDirectory;
033: import org.apache.lucene.store.RAMDirectory;
034:
035: import java.io.File;
036: import java.util.HashMap;
037: import java.util.Iterator;
038:
039: /**
040: * Data maintained by a performance test run.
041: * <p>
042: * Data includes:
043: * <ul>
044: * <li>Configuration.
045: * <li>Directory, Writer, Reader.
046: * <li>Docmaker and a few instances of QueryMaker.
047: * <li>Analyzer.
048: * <li>Statistics data which updated during the run.
049: * </ul>
050: * Config properties: work.dir=<path to root of docs and index dirs| Default: work>
051: * </ul>
052: */
053: public class PerfRunData {
054:
055: private Points points;
056:
057: // objects used during performance test run
058: // directory, analyzer, docMaker - created at startup.
059: // reader, writer, searcher - maintained by basic tasks.
060: private Directory directory;
061: private Analyzer analyzer;
062: private DocMaker docMaker;
063: private HTMLParser htmlParser;
064:
065: // we use separate (identical) instances for each "read" task type, so each can iterate the quries separately.
066: private HashMap readTaskQueryMaker;
067: private Class qmkrClass;
068:
069: private IndexReader indexReader;
070: private IndexWriter indexWriter;
071: private Config config;
072: private long startTimeMillis;
073:
074: // constructor
075: public PerfRunData(Config config) throws Exception {
076: this .config = config;
077: // analyzer (default is standard analyzer)
078: analyzer = (Analyzer) Class
079: .forName(
080: config
081: .get("analyzer",
082: "org.apache.lucene.analysis.standard.StandardAnalyzer"))
083: .newInstance();
084: // doc maker
085: docMaker = (DocMaker) Class
086: .forName(
087: config
088: .get("doc.maker",
089: "org.apache.lucene.benchmark.byTask.feeds.SimpleDocMaker"))
090: .newInstance();
091: docMaker.setConfig(config);
092: // query makers
093: readTaskQueryMaker = new HashMap();
094: qmkrClass = Class
095: .forName(config
096: .get("query.maker",
097: "org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker"));
098: // html parser, used for some doc makers
099: htmlParser = (HTMLParser) Class
100: .forName(
101: config
102: .get("html.parser",
103: "org.apache.lucene.benchmark.byTask.feeds.DemoHTMLParser"))
104: .newInstance();
105: docMaker.setHTMLParser(htmlParser);
106:
107: // index stuff
108: reinit(false);
109:
110: // statistic points
111: points = new Points(config);
112:
113: if (Boolean.valueOf(config.get("log.queries", "false"))
114: .booleanValue()) {
115: System.out.println("------------> queries:");
116: System.out.println(getQueryMaker(new SearchTask(this ))
117: .printQueries());
118: }
119:
120: }
121:
122: // clean old stuff, reopen
123: public void reinit(boolean eraseIndex) throws Exception {
124:
125: // cleanup index
126: if (indexWriter != null) {
127: indexWriter.close();
128: indexWriter = null;
129: }
130: if (indexReader != null) {
131: indexReader.close();
132: indexReader = null;
133: }
134: if (directory != null) {
135: directory.close();
136: }
137:
138: // directory (default is ram-dir).
139: if ("FSDirectory".equals(config
140: .get("directory", "RAMDirectory"))) {
141: File workDir = new File(config.get("work.dir", "work"));
142: File indexDir = new File(workDir, "index");
143: if (eraseIndex && indexDir.exists()) {
144: FileUtils.fullyDelete(indexDir);
145: }
146: indexDir.mkdirs();
147: directory = FSDirectory.getDirectory(indexDir);
148: } else {
149: directory = new RAMDirectory();
150: }
151:
152: // inputs
153: resetInputs();
154:
155: // release unused stuff
156: System.runFinalization();
157: System.gc();
158: }
159:
160: public long setStartTimeMillis() {
161: startTimeMillis = System.currentTimeMillis();
162: return startTimeMillis;
163: }
164:
165: /**
166: * @return Start time in milliseconds
167: */
168: public long getStartTimeMillis() {
169: return startTimeMillis;
170: }
171:
172: /**
173: * @return Returns the points.
174: */
175: public Points getPoints() {
176: return points;
177: }
178:
179: /**
180: * @return Returns the directory.
181: */
182: public Directory getDirectory() {
183: return directory;
184: }
185:
186: /**
187: * @param directory The directory to set.
188: */
189: public void setDirectory(Directory directory) {
190: this .directory = directory;
191: }
192:
193: /**
194: * @return Returns the indexReader.
195: */
196: public IndexReader getIndexReader() {
197: return indexReader;
198: }
199:
200: /**
201: * @param indexReader The indexReader to set.
202: */
203: public void setIndexReader(IndexReader indexReader) {
204: this .indexReader = indexReader;
205: }
206:
207: /**
208: * @return Returns the indexWriter.
209: */
210: public IndexWriter getIndexWriter() {
211: return indexWriter;
212: }
213:
214: /**
215: * @param indexWriter The indexWriter to set.
216: */
217: public void setIndexWriter(IndexWriter indexWriter) {
218: this .indexWriter = indexWriter;
219: }
220:
221: /**
222: * @return Returns the anlyzer.
223: */
224: public Analyzer getAnalyzer() {
225: return analyzer;
226: }
227:
228: public void setAnalyzer(Analyzer analyzer) {
229: this .analyzer = analyzer;
230: }
231:
232: /**
233: * @return Returns the docMaker.
234: */
235: public DocMaker getDocMaker() {
236: return docMaker;
237: }
238:
239: /**
240: * @return Returns the config.
241: */
242: public Config getConfig() {
243: return config;
244: }
245:
246: public void resetInputs() {
247: docMaker.resetInputs();
248: Iterator it = readTaskQueryMaker.values().iterator();
249: while (it.hasNext()) {
250: ((QueryMaker) it.next()).resetInputs();
251: }
252: }
253:
254: /**
255: * @return Returns the queryMaker by read task type (class)
256: */
257: public QueryMaker getQueryMaker(ReadTask readTask) {
258: // mapping the query maker by task class allows extending/adding new search/read tasks
259: // without needing to modify this class.
260: Class readTaskClass = readTask.getClass();
261: QueryMaker qm = (QueryMaker) readTaskQueryMaker
262: .get(readTaskClass);
263: if (qm == null) {
264: try {
265: qm = (QueryMaker) qmkrClass.newInstance();
266: qm.setConfig(config);
267: } catch (Exception e) {
268: throw new RuntimeException(e);
269: }
270: readTaskQueryMaker.put(readTaskClass, qm);
271: }
272: return qm;
273: }
274:
275: /**
276: * @return Returns the htmlParser.
277: */
278: public HTMLParser getHtmlParser() {
279: return htmlParser;
280: }
281:
282: }
|