001: /* ====================================================================
002: Licensed to the Apache Software Foundation (ASF) under one or more
003: contributor license agreements. See the NOTICE file distributed with
004: this work for additional information regarding copyright ownership.
005: The ASF licenses this file to You under the Apache License, Version 2.0
006: (the "License"); you may not use this file except in compliance with
007: the License. You may obtain a copy of the License at
008:
009: http://www.apache.org/licenses/LICENSE-2.0
010:
011: Unless required by applicable law or agreed to in writing, software
012: distributed under the License is distributed on an "AS IS" BASIS,
013: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: See the License for the specific language governing permissions and
015: limitations under the License.
016: ==================================================================== */
017:
018: package org.apache.poi.hslf.extractor;
019:
020: import junit.framework.TestCase;
021:
022: /**
023: * Tests that the extractor correctly gets the text out of our sample file
024: *
025: * @author Nick Burch (nick at torchbox dot com)
026: */
027: public class TextExtractor extends TestCase {
028: /** Extractor primed on the 2 page basic test data */
029: private PowerPointExtractor ppe;
030: /** Extractor primed on the 1 page but text-box'd test data */
031: private PowerPointExtractor ppe2;
032: /** Where to go looking for our test files */
033: private String dirname;
034:
035: public TextExtractor() throws Exception {
036: dirname = System.getProperty("HSLF.testdata.path");
037: String filename = dirname + "/basic_test_ppt_file.ppt";
038: ppe = new PowerPointExtractor(filename);
039: String filename2 = dirname + "/with_textbox.ppt";
040: ppe2 = new PowerPointExtractor(filename2);
041: }
042:
043: public void testReadSheetText() throws Exception {
044: // Basic 2 page example
045: String sheetText = ppe.getText();
046: String expectText = "This is a test title\nThis is a test subtitle\nThis is on page 1\nThis is the title on page 2\nThis is page two\nIt has several blocks of text\nNone of them have formatting\n";
047:
048: ensureTwoStringsTheSame(expectText, sheetText);
049:
050: // 1 page example with text boxes
051: sheetText = ppe2.getText();
052: expectText = "Hello, World!!!\nI am just a poor boy\nThis is Times New Roman\nPlain Text \n";
053:
054: ensureTwoStringsTheSame(expectText, sheetText);
055: }
056:
057: public void testReadNoteText() throws Exception {
058: // Basic 2 page example
059: String notesText = ppe.getNotes();
060: String expectText = "These are the notes for page 1\nThese are the notes on page two, again lacking formatting\n";
061:
062: ensureTwoStringsTheSame(expectText, notesText);
063:
064: // Other one doesn't have notes
065: notesText = ppe2.getNotes();
066: expectText = "";
067:
068: ensureTwoStringsTheSame(expectText, notesText);
069: }
070:
071: /**
072: * Test that when presented with a PPT file missing the odd
073: * core record, we can still get the rest of the text out
074: * @throws Exception
075: */
076: public void testMissingCoreRecords() throws Exception {
077: String filename = dirname + "/missing_core_records.ppt";
078: ppe = new PowerPointExtractor(filename);
079:
080: String text = ppe.getText(true, false);
081: String nText = ppe.getNotes();
082:
083: assertNotNull(text);
084: assertNotNull(nText);
085:
086: // Notes record were corrupt, so don't expect any
087: assertEquals(nText.length(), 0);
088:
089: // Slide records were fine
090: assertTrue(text
091: .startsWith("Using Disease Surveillance and Response"));
092: }
093:
094: private void ensureTwoStringsTheSame(String exp, String act)
095: throws Exception {
096: assertEquals(exp.length(), act.length());
097: char[] expC = exp.toCharArray();
098: char[] actC = act.toCharArray();
099: for (int i = 0; i < expC.length; i++) {
100: System.out.println(i + "\t" + expC[i] + " " + actC[i]);
101: assertEquals(expC[i], actC[i]);
102: }
103: assertEquals(exp, act);
104: }
105: }
|