01: /*
02: * regain - A file search engine providing plenty of formats
03: * Copyright (C) 2004 Til Schneider
04: *
05: * This library is free software; you can redistribute it and/or
06: * modify it under the terms of the GNU Lesser General Public
07: * License as published by the Free Software Foundation; either
08: * version 2.1 of the License, or (at your option) any later version.
09: *
10: * This library is distributed in the hope that it will be useful,
11: * but WITHOUT ANY WARRANTY; without even the implied warranty of
12: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13: * Lesser General Public License for more details.
14: *
15: * You should have received a copy of the GNU Lesser General Public
16: * License along with this library; if not, write to the Free Software
17: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18: *
19: * Contact: Til Schneider, info@murfman.de
20: *
21: * CVS information:
22: * $RCSfile$
23: * $Source$
24: * $Date: 2005-11-21 11:20:09 +0100 (Mo, 21 Nov 2005) $
25: * $Author: til132 $
26: * $Revision: 180 $
27: */
28: package net.sf.regain.crawler.preparator;
29:
30: import net.sf.regain.RegainException;
31: import net.sf.regain.crawler.document.AbstractPreparator;
32: import net.sf.regain.crawler.document.RawDocument;
33:
34: /**
35: * Präpariert ein Plain-Text-Dokument für die Indizierung.
36: * <p>
37: * Das Dokument wird dabei unverändert übernommen befreit.
38: *
39: * @author Til Schneider, www.murfman.de
40: */
41: public class PlainTextPreparator extends AbstractPreparator {
42:
43: /**
44: * Creates a new instance of PlainTextPreparator.
45: *
46: * @throws RegainException If creating the preparator failed.
47: */
48: public PlainTextPreparator() throws RegainException {
49: super ("txt");
50: }
51:
52: /**
53: * Präpariert ein Dokument für die Indizierung.
54: *
55: * @param rawDocument Das zu präpariernde Dokument.
56: *
57: * @throws RegainException Wenn die Präparation fehl schlug.
58: */
59: public void prepare(RawDocument rawDocument) throws RegainException {
60: setCleanedContent(rawDocument.getContentAsString());
61: }
62:
63: }
|