01: /* ====================================================================
02: Licensed to the Apache Software Foundation (ASF) under one or more
03: contributor license agreements. See the NOTICE file distributed with
04: this work for additional information regarding copyright ownership.
05: The ASF licenses this file to You under the Apache License, Version 2.0
06: (the "License"); you may not use this file except in compliance with
07: the License. You may obtain a copy of the License at
08:
09: http://www.apache.org/licenses/LICENSE-2.0
10:
11: Unless required by applicable law or agreed to in writing, software
12: distributed under the License is distributed on an "AS IS" BASIS,
13: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14: See the License for the specific language governing permissions and
15: limitations under the License.
16: ==================================================================== */
17: package org.apache.poi;
18:
19: /**
20: * Common Parent for Text Extractors
21: * of POI Documents.
22: * You will typically find the implementation of
23: * a given format's text extractor under
24: * org.apache.poi.[format].extractor .
25: * @see org.apache.poi.hssf.extractor.ExcelExtractor
26: * @see org.apache.poi.hslf.extractor.PowerPointExtractor
27: * @see org.apache.poi.hdgf.extractor.VisioTextExtractor
28: * @see org.apache.poi.hwpf.extractor.WordExtractor
29: */
30: public abstract class POITextExtractor {
31: /** The POIDocument that's open */
32: protected POIDocument document;
33:
34: /**
35: * Creates a new text extractor for the given document
36: */
37: public POITextExtractor(POIDocument document) {
38: this .document = document;
39: }
40:
41: /**
42: * Retrieves all the text from the document.
43: * How cells, paragraphs etc are separated in the text
44: * is implementation specific - see the javadocs for
45: * a specific project for details.
46: * @return All the text from the document
47: */
48: public abstract String getText();
49: }
|