Source Code Cross Referenced for LuceneIndexTransformer.java in » Content-Management-System » apache-lenya-2.0 » org » apache » cocoon » transformation » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Content Management System » apache lenya 2.0 » org.apache.cocoon.transformation
Source Cross Referenced Class Diagram Java Document (Java Doc)
001:        /*
002:         * Licensed to the Apache Software Foundation (ASF) under one or more
003:         * contributor license agreements.  See the NOTICE file distributed with
004:         * this work for additional information regarding copyright ownership.
005:         * The ASF licenses this file to You under the Apache License, Version 2.0
006:         * (the "License"); you may not use this file except in compliance with
007:         * the License.  You may obtain a copy of the License at
008:         * 
009:         *      http://www.apache.org/licenses/LICENSE-2.0
010:         * 
011:         * Unless required by applicable law or agreed to in writing, software
012:         * distributed under the License is distributed on an "AS IS" BASIS,
013:         * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014:         * See the License for the specific language governing permissions and
015:         * limitations under the License.
016:         */
017:        package org.apache.cocoon.transformation;
018:
019:        import java.io.File;
020:        import java.io.IOException;
021:        import java.io.Serializable;
022:        import java.util.Map;
023:        import java.util.Stack;
024:
025:        import org.apache.avalon.framework.configuration.Configurable;
026:        import org.apache.avalon.framework.configuration.Configuration;
027:        import org.apache.avalon.framework.configuration.ConfigurationException;
028:        import org.apache.avalon.framework.context.Context;
029:        import org.apache.avalon.framework.context.ContextException;
030:        import org.apache.avalon.framework.context.Contextualizable;
031:        import org.apache.avalon.framework.parameters.Parameters;
032:
033:        import org.apache.cocoon.Constants;
034:        import org.apache.cocoon.ProcessingException;
035:        import org.apache.cocoon.caching.CacheableProcessingComponent;
036:        import org.apache.cocoon.components.search.LuceneCocoonHelper;
037:        import org.apache.cocoon.components.search.LuceneXMLIndexer;
038:        import org.apache.cocoon.environment.SourceResolver;
039:        import org.apache.commons.lang.BooleanUtils;
040:        import org.apache.excalibur.source.SourceValidity;
041:        import org.apache.excalibur.source.impl.validity.NOPValidity;
042:
043:        import org.apache.lucene.analysis.Analyzer;
044:        import org.apache.lucene.document.Document;
045:        import org.apache.lucene.document.Field;
046:        import org.apache.lucene.index.IndexWriter;
047:        import org.apache.lucene.index.IndexReader;
048:        import org.apache.lucene.index.Term;
049:        import org.apache.lucene.store.Directory;
050:        import org.xml.sax.Attributes;
051:        import org.xml.sax.SAXException;
052:        import org.xml.sax.helpers.AttributesImpl;
053:
054:        /**
055:         * <p style="font-weight: bold;">A lucene index creation transformer.</p>
056:         * <p>This transformer reads a document with elements in the namespace 
057:         * <code>http://apache.org/cocoon/lucene/1.0</code>, and creates a new Lucene Index,
058:         * or updates an existing one.</p>
059:         * <p>It has several parameters which can be set in the sitemap component configuration or as 
060:         * parameters to the transformation step in the pipeline, or finally as attributes of the root element
061:         * in the source XML document. The source document over-rides the transformation parameters, 
062:         * which in turn over-ride any configuration parameters.</p>
063:         * <dl>
064:         * <dt>
065:         * <dt style="font-weight: bold;">directory</dt>
066:         * <dd><p>Location of directory where index files are stored. 
067:         * This path is relative to the Cocoon work directory</p></dd>
068:         * <dt style="font-weight: bold;">create</dt>
069:         * <dd><p>This attribute controls whether the index is recreated.  </p>
070:         *    <ul><li><p>If create = "false" and the index already exists then the index will be updated. 
071:         *    Any documents which had already been indexed will be removed from the index and reinserted.</p></li>
072:         *    <li><p>If the index does not exist then it will be created even if <code>create</code>="false".</p></li>
073:         *    <li><p>If <code>create</code>="true" then any existing index will be destroyed and a new index created. 
074:         *     If you are rebuilding your entire index then you should set <code>create</code>="true" because the 
075:         *     indexer doesn't need to remove old documents from the index, so it will be faster.</p></li></ul>
076:         * </dd>
077:         * <dt style="font-weight: bold;">max-field-length</dt>
078:         * <dd><p>Maximum number of terms to index in a field (as far as the index is concerned,
079:         *    the document will effectively be truncated at this point. The default value, 10k, may not be sufficient for large documents.</p></dd>
080:         * <dt style="font-weight: bold;">analyzer</dt>
081:         * <dd><p>Class name of the Lucene text analyzer to use. Typically depends on the language of the text being indexed.
082:         * See the Lucene documentation for more information.</p></dd>
083:         * <dt style="font-weight: bold;">merge-factor</dt>
084:         * <dd>Determines how often segment indices are merged. See the Lucene documentation for more information.</dd>
085:         * </dl>
086:         * <dl>
087:         * <dt style="font-weight: bold;">A simple example of the input:</dt>
088:         * <dd>
089:         * <pre>&lt;?xml version="1.0" encoding="UTF-8"?&gt;
090:         * &lt;lucene:index xmlns:lucene="http://apache.org/cocoon/lucene/1.0" 
091:         *     merge-factor="20" 
092:         *     create="false" 
093:         *     directory="index" 
094:         *     max-field-length="10000"
095:         *     analyzer="org.apache.lucene.analysis.standard.StandardAnalyzer"&gt;
096:         *     &lt;lucene:document url="a.html"&gt;
097:         *             &lt;documentTitle lucene:store="true"&gt;Doggerel&lt;/documentTitle&gt;
098:         *             &lt;body&gt;The quick brown fox jumped over the lazy dog&lt;/body&gt;    
099:         *     &lt;/lucene:document&gt;
100:         *     &lt;lucene:document url="b.html"&gt;
101:         *             &lt;documentTitle lucene:store="true"&gt;Lorem Ipsum&lt;/documentTitle&gt;
102:         *             &lt;body&gt;Lorem ipsum dolor sit amet, consectetuer adipiscing elit.&lt;/body&gt;
103:         *             &lt;body&gt;Nunc a mauris blandit ligula scelerisque tristique.&lt;/body&gt;    
104:         *     &lt;/lucene:document&gt;
105:         * &lt;/lucene:index&gt;
106:         * </pre>
107:         * </dd>
108:         * </dl>
109:         *
110:         * @author <a href="mailto:vgritsenko@apache.org">Vadim Gritsenko</a>
111:         * @author <a href="mailto:conal@nzetc.org">Conal Tuohy</a>
112:         * @version $Id: LuceneIndexTransformer.java 433543 2006-08-22 06:22:54Z crossley $
113:         */
114:        public class LuceneIndexTransformer extends AbstractTransformer
115:                implements  CacheableProcessingComponent, Configurable,
116:                Contextualizable {
117:
118:            public static final String ANALYZER_CLASSNAME_CONFIG = "analyzer-classname";
119:            public static final String ANALYZER_CLASSNAME_PARAMETER = "analyzer-classname";
120:            public static final String ANALYZER_CLASSNAME_DEFAULT = "org.apache.lucene.analysis.standard.StandardAnalyzer";
121:            public static final String DIRECTORY_CONFIG = "directory";
122:            public static final String DIRECTORY_PARAMETER = "directory";
123:            public static final String DIRECTORY_DEFAULT = "index";
124:            public static final String MERGE_FACTOR_CONFIG = "merge-factor";
125:            public static final String MERGE_FACTOR_PARAMETER = "merge-factor";
126:            public static final int MERGE_FACTOR_DEFAULT = 20;
127:            public static final String MAX_FIELD_LENGTH_CONFIG = "max-field-length";
128:            public static final String MAX_FIELD_LENGTH_PARAMETER = "max-field-length";
129:            public static final int MAX_FIELD_LENGTH_DEFAULT = IndexWriter.DEFAULT_MAX_FIELD_LENGTH;
130:
131:            public static final String LUCENE_URI = "http://apache.org/cocoon/lucene/1.0";
132:            public static final String LUCENE_QUERY_ELEMENT = "index";
133:            public static final String LUCENE_QUERY_ANALYZER_ATTRIBUTE = "analyzer";
134:            public static final String LUCENE_QUERY_DIRECTORY_ATTRIBUTE = "directory";
135:            public static final String LUCENE_QUERY_CREATE_ATTRIBUTE = "create";
136:            public static final String LUCENE_QUERY_MERGE_FACTOR_ATTRIBUTE = "merge-factor";
137:            public static final String LUCENE_QUERY_MAX_FIELD_LENGTH_ATTRIBUTE = "max-field-length";
138:            public static final String LUCENE_DOCUMENT_ELEMENT = "document";
139:            public static final String LUCENE_DOCUMENT_URL_ATTRIBUTE = "url";
140:            public static final String LUCENE_ELEMENT_ATTR_TO_TEXT_ATTRIBUTE = "text-attr";
141:            public static final String LUCENE_ELEMENT_ATTR_STORE_VALUE = "store";
142:            public static final String LUCENE_ELAPSED_TIME_ATTRIBUTE = "elapsed-time";
143:            public static final String CDATA = "CDATA";
144:
145:            // The 3 states of the state machine
146:            private static final int STATE_GROUND = 0; // initial or "ground" state
147:            private static final int STATE_QUERY = 1; // processing a lucene:index (Query) element
148:            private static final int STATE_DOCUMENT = 2; // processing a lucene:document element
149:
150:            // Initialization time variables
151:            protected File workDir = null;
152:
153:            // Declaration time parameters values (specified in sitemap component config)
154:            private IndexerConfiguration configureConfiguration;
155:            // Invocation time parameters values (specified in sitemap transform parameters)
156:            private IndexerConfiguration setupConfiguration;
157:            // Parameters specified in the input document
158:            private IndexerConfiguration queryConfiguration;
159:
160:            // Runtime variables
161:            private int processing;
162:            private boolean createIndex = false;
163:            private IndexWriter writer;
164:            private StringBuffer bodyText;
165:            private Document bodyDocument;
166:            private String bodyDocumentURL;
167:            private Stack elementStack = new Stack();
168:            /**
169:             * Storage for the document element's attributes until the document
170:             * has been indexed, so that they can be copied to the output
171:             * along with a boolean <code>indexed</code> attribute.
172:             */
173:            private AttributesImpl documentAttributes;
174:            private long documentStartTime;
175:
176:            private static String uid(String url) {
177:                return url.replace('/', '\u0000'); // + "\u0000" + DateField.timeToString(urlConnection.getLastModified());
178:            }
179:
180:            /**
181:             * Configure the transformer. The configuration parameters are stored as
182:             * general defaults, which may be over-ridden by parameters specified as
183:             * parameters in the sitemap pipeline, or by attributes of the query
184:             * element(s) in the XML input document.
185:             */
186:            public void configure(Configuration conf)
187:                    throws ConfigurationException {
188:                this .configureConfiguration = new IndexerConfiguration(conf
189:                        .getChild(ANALYZER_CLASSNAME_CONFIG).getValue(
190:                                ANALYZER_CLASSNAME_DEFAULT), conf.getChild(
191:                        DIRECTORY_CONFIG).getValue(DIRECTORY_DEFAULT), conf
192:                        .getChild(MERGE_FACTOR_CONFIG).getValueAsInteger(
193:                                MERGE_FACTOR_DEFAULT), conf.getChild(
194:                        MAX_FIELD_LENGTH_CONFIG).getValueAsInteger(
195:                        MAX_FIELD_LENGTH_DEFAULT));
196:            }
197:
198:            /**
199:             * Setup the transformer.
200:             * Called when the pipeline is assembled.
201:             * The parameters are those specified as child elements of the
202:             * <code>&lt;map:transform&gt;</code> element in the sitemap.
203:             * These parameters are optional: 
204:             * If no parameters are specified here then the defaults are 
205:             * supplied by the component configuration.
206:             * Any parameters specified here may be over-ridden by attributes
207:             * of the lucene:index element in the input document.
208:             */
209:            public void setup(SourceResolver resolver, Map objectModel,
210:                    String src, Parameters parameters)
211:                    throws ProcessingException, SAXException, IOException {
212:                setupConfiguration = new IndexerConfiguration(parameters
213:                        .getParameter(ANALYZER_CLASSNAME_PARAMETER,
214:                                configureConfiguration.analyzerClassname),
215:                        parameters.getParameter(DIRECTORY_PARAMETER,
216:                                configureConfiguration.indexDirectory),
217:                        parameters.getParameterAsInteger(
218:                                MERGE_FACTOR_PARAMETER,
219:                                configureConfiguration.mergeFactor), parameters
220:                                .getParameterAsInteger(
221:                                        MAX_FIELD_LENGTH_PARAMETER,
222:                                        configureConfiguration.maxFieldLength));
223:            }
224:
225:            /**
226:             * Contextualize this class
227:             */
228:            public void contextualize(Context context) throws ContextException {
229:                this .workDir = (File) context.get(Constants.CONTEXT_WORK_DIR);
230:            }
231:
232:            public void recycle() {
233:                this .processing = STATE_GROUND;
234:                if (this .writer != null) {
235:                    try {
236:                        this .writer.close();
237:                    } catch (IOException ioe) {
238:                    }
239:                    this .writer = null;
240:                }
241:                this .bodyText = null;
242:                this .bodyDocument = null;
243:                this .bodyDocumentURL = null;
244:                this .elementStack.clear();
245:                super .recycle();
246:            }
247:
248:            /**
249:             * Generate the unique key.
250:             * This key must be unique inside the space of this component.
251:             *
252:             * @return The generated key
253:             */
254:            public Serializable getKey() {
255:                return "1";
256:            }
257:
258:            /**
259:             * Generate the validity object.
260:             *
261:             * @return The generated validity object or <code>null</code> if the
262:             *         component is currently not cacheable.
263:             */
264:            public SourceValidity getValidity() {
265:                return NOPValidity.SHARED_INSTANCE;
266:            }
267:
268:            public void startDocument() throws SAXException {
269:                super .startDocument();
270:            }
271:
272:            public void endDocument() throws SAXException {
273:                super .endDocument();
274:            }
275:
276:            /**
277:             * Begin the scope of a prefix-URI Namespace mapping.
278:             *
279:             * @param prefix The Namespace prefix being declared.
280:             * @param uri The Namespace URI the prefix is mapped to.
281:             */
282:            public void startPrefixMapping(String prefix, String uri)
283:                    throws SAXException {
284:                if (processing == STATE_GROUND) {
285:                    super .startPrefixMapping(prefix, uri);
286:                }
287:            }
288:
289:            /**
290:             * End the scope of a prefix-URI mapping.
291:             *
292:             * @param prefix The prefix that was being mapping.
293:             */
294:            public void endPrefixMapping(String prefix) throws SAXException {
295:                if (processing == STATE_GROUND) {
296:                    super .endPrefixMapping(prefix);
297:                }
298:            }
299:
300:            public void startElement(String namespaceURI, String localName,
301:                    String qName, Attributes atts) throws SAXException {
302:
303:                if (processing == STATE_GROUND) {
304:                    if (LUCENE_URI.equals(namespaceURI)
305:                            && LUCENE_QUERY_ELEMENT.equals(localName)) {
306:                        String sCreate = atts
307:                                .getValue(LUCENE_QUERY_CREATE_ATTRIBUTE);
308:                        createIndex = BooleanUtils.toBoolean(sCreate);
309:
310:                        String analyzerClassname = atts
311:                                .getValue(LUCENE_QUERY_ANALYZER_ATTRIBUTE);
312:                        String indexDirectory = atts
313:                                .getValue(LUCENE_QUERY_DIRECTORY_ATTRIBUTE);
314:                        String mergeFactor = atts
315:                                .getValue(LUCENE_QUERY_MERGE_FACTOR_ATTRIBUTE);
316:                        String maxFieldLength = atts
317:                                .getValue(LUCENE_QUERY_MAX_FIELD_LENGTH_ATTRIBUTE);
318:
319:                        queryConfiguration = new IndexerConfiguration(
320:                                analyzerClassname != null ? analyzerClassname
321:                                        : setupConfiguration.analyzerClassname,
322:                                indexDirectory != null ? indexDirectory
323:                                        : setupConfiguration.indexDirectory,
324:                                mergeFactor != null ? Integer
325:                                        .parseInt(mergeFactor)
326:                                        : setupConfiguration.mergeFactor,
327:                                maxFieldLength != null ? Integer
328:                                        .parseInt(maxFieldLength)
329:                                        : setupConfiguration.maxFieldLength);
330:
331:                        if (!createIndex) {
332:                            // Not asked to create the index - but check if this is necessary anyway:
333:                            try {
334:                                IndexReader reader = openReader();
335:                                reader.close();
336:                            } catch (IOException ioe) {
337:                                // couldn't open the index - so recreate it
338:                                createIndex = true;
339:                            }
340:                        }
341:                        // propagate the lucene:index to the next stage in the pipeline
342:                        super 
343:                                .startElement(namespaceURI, localName, qName,
344:                                        atts);
345:                        processing = STATE_QUERY;
346:                    } else {
347:                        super 
348:                                .startElement(namespaceURI, localName, qName,
349:                                        atts);
350:                    }
351:                } else if (processing == STATE_QUERY) {
352:                    // processing a lucene:index - expecting a lucene:document
353:                    if (LUCENE_URI.equals(namespaceURI)
354:                            && LUCENE_DOCUMENT_ELEMENT.equals(localName)) {
355:                        this .bodyDocumentURL = atts
356:                                .getValue(LUCENE_DOCUMENT_URL_ATTRIBUTE);
357:                        if (this .bodyDocumentURL == null) {
358:                            throw new SAXException(
359:                                    "<lucene:document> must have @url attribute");
360:                        }
361:
362:                        // Remember the time the document indexing began
363:                        this .documentStartTime = System.currentTimeMillis();
364:                        // remember these attributes so they can be passed on to the next stage in the pipeline,
365:                        // when this document element is ended.
366:                        this .documentAttributes = new AttributesImpl(atts);
367:                        this .bodyText = new StringBuffer();
368:                        this .bodyDocument = new Document();
369:                        this .elementStack.clear();
370:                        processing = STATE_DOCUMENT;
371:                    } else {
372:                        throw new SAXException(
373:                                "<lucene:index> element can contain only <lucene:document> elements!");
374:                    }
375:                } else if (processing == STATE_DOCUMENT) {
376:                    elementStack.push(new IndexHelperField(localName,
377:                            new AttributesImpl(atts)));
378:                }
379:            }
380:
381:            public void endElement(String namespaceURI, String localName,
382:                    String qName) throws SAXException {
383:
384:                if (processing == STATE_QUERY) {
385:                    if (LUCENE_URI.equals(namespaceURI)
386:                            && LUCENE_QUERY_ELEMENT.equals(localName)) {
387:                        // End query processing
388:                        try {
389:                            if (this .writer == null) {
390:                                openWriter();
391:                            }
392:                            this .writer.optimize();
393:                            this .writer.close();
394:                            this .writer = null;
395:                        } catch (IOException e) {
396:                            throw new SAXException(e);
397:                        }
398:                        // propagate the query element to the next stage in the pipeline
399:                        super .endElement(namespaceURI, localName, qName);
400:                        this .processing = STATE_GROUND;
401:                    } else {
402:                        throw new SAXException("</lucene:index> was expected!");
403:                    }
404:                } else if (processing == STATE_DOCUMENT) {
405:                    if (LUCENE_URI.equals(namespaceURI)
406:                            && LUCENE_DOCUMENT_ELEMENT.equals(localName)) {
407:                        // End document processing
408:                        this .bodyDocument.add(Field.UnStored(
409:                                LuceneXMLIndexer.BODY_FIELD, this .bodyText
410:                                        .toString()));
411:                        this .bodyText = null;
412:
413:                        this .bodyDocument.add(Field.UnIndexed(
414:                                LuceneXMLIndexer.URL_FIELD,
415:                                this .bodyDocumentURL));
416:                        // store: false, index: true, tokenize: false
417:                        this .bodyDocument.add(new Field(
418:                                LuceneXMLIndexer.UID_FIELD,
419:                                uid(this .bodyDocumentURL), false, true, false));
420:                        try {
421:                            reindexDocument();
422:                        } catch (IOException e) {
423:                            throw new SAXException(e);
424:                        }
425:                        this .bodyDocumentURL = null;
426:
427:                        // propagate the lucene:document element to the next stage in the pipeline
428:                        long elapsedTime = System.currentTimeMillis()
429:                                - this .documentStartTime;
430:                        //documentAttributes = new AttributesImpl();
431:                        this .documentAttributes.addAttribute("",
432:                                LUCENE_ELAPSED_TIME_ATTRIBUTE,
433:                                LUCENE_ELAPSED_TIME_ATTRIBUTE, CDATA, String
434:                                        .valueOf(elapsedTime));
435:                        super .startElement(namespaceURI, localName, qName,
436:                                this .documentAttributes);
437:                        super .endElement(namespaceURI, localName, qName);
438:                        this .processing = STATE_QUERY;
439:                    } else {
440:                        // End element processing
441:                        IndexHelperField tos = (IndexHelperField) elementStack
442:                                .pop();
443:                        StringBuffer text = tos.getText();
444:
445:                        Attributes atts = tos.getAttributes();
446:                        boolean attributesToText = atts.getIndex(LUCENE_URI,
447:                                LUCENE_ELEMENT_ATTR_TO_TEXT_ATTRIBUTE) != -1;
448:                        for (int i = 0; i < atts.getLength(); i++) {
449:                            // Ignore Lucene attributes
450:                            if (LUCENE_URI.equals(atts.getURI(i))) {
451:                                continue;
452:                            }
453:
454:                            String atts_lname = atts.getLocalName(i);
455:                            String atts_value = atts.getValue(i);
456:                            bodyDocument.add(Field.UnStored(localName + "@"
457:                                    + atts_lname, atts_value));
458:                            if (attributesToText) {
459:                                text.append(atts_value);
460:                                text.append(' ');
461:                                bodyText.append(atts_value);
462:                                bodyText.append(' ');
463:                            }
464:                        }
465:
466:                        boolean store = atts.getIndex(LUCENE_URI,
467:                                LUCENE_ELEMENT_ATTR_STORE_VALUE) != -1;
468:                        if (text != null && text.length() > 0) {
469:                            if (store) {
470:                                bodyDocument.add(Field.Text(localName, text
471:                                        .toString()));
472:                            } else {
473:                                bodyDocument.add(Field.UnStored(localName, text
474:                                        .toString()));
475:                            }
476:                        }
477:                    }
478:                } else {
479:                    // All other tags
480:                    super .endElement(namespaceURI, localName, qName);
481:                }
482:            }
483:
484:            public void characters(char[] ch, int start, int length)
485:                    throws SAXException {
486:
487:                if (processing == STATE_DOCUMENT && ch.length > 0 && start >= 0
488:                        && length > 1 && elementStack.size() > 0) {
489:                    String text = new String(ch, start, length);
490:                    ((IndexHelperField) elementStack.peek()).append(text);
491:                    bodyText.append(text);
492:                    bodyText.append(' ');
493:                } else if (processing == STATE_GROUND) {
494:                    super .characters(ch, start, length);
495:                }
496:            }
497:
498:            private void openWriter() throws IOException {
499:                File indexDirectory = new File(
500:                        queryConfiguration.indexDirectory);
501:                if (!indexDirectory.isAbsolute()) {
502:                    indexDirectory = new File(workDir,
503:                            queryConfiguration.indexDirectory);
504:                }
505:
506:                // If the index directory doesn't exist, then always create it.
507:                boolean indexExists = IndexReader.indexExists(indexDirectory);
508:                if (!indexExists) {
509:                    createIndex = true;
510:                }
511:
512:                // Get the index directory, creating it if necessary
513:                Directory directory = LuceneCocoonHelper.getDirectory(
514:                        indexDirectory, createIndex);
515:                Analyzer analyzer = LuceneCocoonHelper
516:                        .getAnalyzer(queryConfiguration.analyzerClassname);
517:                this .writer = new IndexWriter(directory, analyzer, createIndex);
518:                this .writer.mergeFactor = queryConfiguration.mergeFactor;
519:                this .writer.maxFieldLength = queryConfiguration.maxFieldLength;
520:            }
521:
522:            private IndexReader openReader() throws IOException {
523:                File indexDirectory = new File(
524:                        queryConfiguration.indexDirectory);
525:                if (!indexDirectory.isAbsolute()) {
526:                    indexDirectory = new File(workDir,
527:                            queryConfiguration.indexDirectory);
528:                }
529:                Directory directory = LuceneCocoonHelper.getDirectory(
530:                        indexDirectory, createIndex);
531:                IndexReader reader = IndexReader.open(directory);
532:                return reader;
533:            }
534:
535:            private void reindexDocument() throws IOException {
536:                if (this .createIndex) {
537:                    // The index is being created, so there's no need to delete the doc from an existing index.
538:                    // This means we can keep a single IndexWriter open throughout the process.
539:                    if (this .writer == null) {
540:                        openWriter();
541:                    }
542:                    this .writer.addDocument(this .bodyDocument);
543:                } else {
544:                    // This is an incremental reindex, so the document should be removed from the index before adding it
545:                    try {
546:                        IndexReader reader = openReader();
547:                        reader.delete(new Term(LuceneXMLIndexer.UID_FIELD,
548:                                uid(this .bodyDocumentURL)));
549:                        reader.close();
550:                    } catch (IOException e) { /* ignore */
551:                    }
552:                    openWriter();
553:                    this .writer.addDocument(this .bodyDocument);
554:                    this .writer.close();
555:                    this .writer = null;
556:                }
557:                this .bodyDocument = null;
558:            }
559:
560:            static class IndexHelperField {
561:                String localName;
562:                StringBuffer text;
563:                Attributes attributes;
564:
565:                IndexHelperField(String localName, Attributes atts) {
566:                    this .localName = localName;
567:                    this .attributes = atts;
568:                    this .text = new StringBuffer();
569:                }
570:
571:                public Attributes getAttributes() {
572:                    return attributes;
573:                }
574:
575:                public StringBuffer getText() {
576:                    return text;
577:                }
578:
579:                public void append(String text) {
580:                    this .text.append(text);
581:                }
582:
583:                public void append(char[] str, int offset, int length) {
584:                    this .text.append(str, offset, length);
585:                }
586:            }
587:
588:            static class IndexerConfiguration {
589:                String analyzerClassname;
590:                String indexDirectory;
591:                int mergeFactor;
592:                int maxFieldLength;
593:
594:                public IndexerConfiguration(String analyzerClassname,
595:                        String indexDirectory, int mergeFactor,
596:                        int maxFieldLength) {
597:                    this.analyzerClassname = analyzerClassname;
598:                    this.indexDirectory = indexDirectory;
599:                    this.mergeFactor = mergeFactor;
600:                    this.maxFieldLength = maxFieldLength;
601:                }
602:            }
603:        }
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.