01: /* ====================================================================
02: Licensed to the Apache Software Foundation (ASF) under one or more
03: contributor license agreements. See the NOTICE file distributed with
04: this work for additional information regarding copyright ownership.
05: The ASF licenses this file to You under the Apache License, Version 2.0
06: (the "License"); you may not use this file except in compliance with
07: the License. You may obtain a copy of the License at
08:
09: http://www.apache.org/licenses/LICENSE-2.0
10:
11: Unless required by applicable law or agreed to in writing, software
12: distributed under the License is distributed on an "AS IS" BASIS,
13: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14: See the License for the specific language governing permissions and
15: limitations under the License.
16: ==================================================================== */
17:
18: package org.apache.poi.hslf.extractor;
19:
20: import junit.framework.TestCase;
21: import java.util.Vector;
22:
23: /**
24: * Tests that the QuickButCruddyTextExtractor works correctly
25: *
26: * @author Nick Burch (nick at torchbox dot com)
27: */
28: public class TestCruddyExtractor extends TestCase {
29: // Extractor primed on the test data
30: private QuickButCruddyTextExtractor te;
31: // All the text to be found in the file
32: String[] allTheText = new String[] {
33: "This is a test title",
34: "This is a test subtitle\nThis is on page 1",
35: "Click to edit Master title style",
36: "Click to edit Master text styles\nSecond level\nThird level\nFourth level\nFifth level",
37: "*",
38: "*",
39: "*",
40: "*",
41: "*",
42: "Click to edit Master text styles\nSecond level\nThird level\nFourth level\nFifth level",
43: "*",
44: "*",
45: "These are the notes for page 1",
46: "This is a test title",
47: "This is a test subtitle\nThis is on page 1",
48: "This is the title on page 2",
49: "This is page two\nIt has several blocks of text\nNone of them have formattingT",
50: "These are the notes on page two, again lacking formatting",
51: "This is a test title",
52: "This is a test subtitle\nThis is on page 1",
53: "This is the title on page 2",
54: "This is page two\nIt has several blocks of text\nNone of them have formatting", };
55:
56: public TestCruddyExtractor() throws Exception {
57: String dirname = System.getProperty("HSLF.testdata.path");
58: String filename = dirname + "/basic_test_ppt_file.ppt";
59: te = new QuickButCruddyTextExtractor(filename);
60: }
61:
62: public void testReadAsVector() throws Exception {
63: // Extract the text from the file as a vector
64: Vector foundTextV = te.getTextAsVector();
65:
66: // Ensure they match
67: assertEquals(allTheText.length, foundTextV.size());
68: for (int i = 0; i < allTheText.length; i++) {
69: String foundText = (String) foundTextV.get(i);
70: assertEquals(allTheText[i], foundText);
71: }
72: }
73:
74: public void testReadAsString() throws Exception {
75: // Extract the text as a String
76: String foundText = te.getTextAsString();
77:
78: // Turn the string array into a single string
79: StringBuffer expectTextSB = new StringBuffer();
80: for (int i = 0; i < allTheText.length; i++) {
81: expectTextSB.append(allTheText[i]);
82: expectTextSB.append('\n');
83: }
84: String expectText = expectTextSB.toString();
85:
86: // Ensure they match
87: assertEquals(expectText, foundText);
88: }
89: }
|