01: /**********************************************************************************
02: * $URL: https://source.sakaiproject.org/svn/search/tags/sakai_2-4-1/search-impl/impl/src/java/org/sakaiproject/search/component/adapter/contenthosting/PPTContentDigester.java $
03: * $Id: PPTContentDigester.java 21946 2007-02-27 11:41:58Z ian@caret.cam.ac.uk $
04: ***********************************************************************************
05: *
06: * Copyright (c) 2003, 2004, 2005, 2006 The Sakai Foundation.
07: *
08: * Licensed under the Educational Community License, Version 1.0 (the "License");
09: * you may not use this file except in compliance with the License.
10: * You may obtain a copy of the License at
11: *
12: * http://www.opensource.org/licenses/ecl1.php
13: *
14: * Unless required by applicable law or agreed to in writing, software
15: * distributed under the License is distributed on an "AS IS" BASIS,
16: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17: * See the License for the specific language governing permissions and
18: * limitations under the License.
19: *
20: **********************************************************************************/package org.sakaiproject.search.component.adapter.contenthosting;
21:
22: import java.io.IOException;
23: import java.io.InputStream;
24: import java.io.Reader;
25: import java.io.StringReader;
26:
27: import org.apache.commons.logging.Log;
28: import org.apache.commons.logging.LogFactory;
29: import org.apache.poi.hslf.extractor.PowerPointExtractor;
30: import org.sakaiproject.content.api.ContentResource;
31: import org.sakaiproject.search.api.SearchUtils;
32:
33: /**
34: * @author ieb
35: */
36: public class PPTContentDigester extends BaseContentDigester {
37: private static Log log = LogFactory
38: .getLog(PPTContentDigester.class);
39:
40: static {
41: System.setProperty("org.apache.poi.util.POILogger",
42: "org.apache.poi.util.NullLogger");
43: }
44:
45: /*
46: * (non-Javadoc)
47: *
48: * @see org.sakaiproject.search.component.adapter.contenthosting.BaseContentDigester#getContent(org.sakaiproject.content.api.ContentResource)
49: */
50:
51: public String getContent(ContentResource contentResource) {
52: if (contentResource != null
53: && contentResource.getContentLength() > maxDigestSize) {
54: throw new RuntimeException(
55: "Attempt to get too much content as a string on "
56: + contentResource.getReference());
57: }
58: InputStream contentStream = null;
59:
60: try {
61: contentStream = contentResource.streamContent();
62: PowerPointExtractor pptExtractor = new PowerPointExtractor(
63: contentStream);
64: StringBuilder sb = new StringBuilder();
65: SearchUtils.appendCleanString(pptExtractor.getText(), sb);
66: sb.append(" ");
67: SearchUtils.appendCleanString(pptExtractor.getNotes(), sb);
68: return sb.toString();
69: } catch (Exception e) {
70: throw new RuntimeException(
71: "Failed to read content for indexing ", e);
72: } finally {
73: if (contentStream != null) {
74: try {
75: contentStream.close();
76: } catch (IOException e) {
77: }
78: }
79: }
80: }
81:
82: /*
83: * (non-Javadoc)
84: *
85: * @see org.sakaiproject.search.component.adapter.contenthosting.BaseContentDigester#getContentReader(org.sakaiproject.content.api.ContentResource)
86: */
87:
88: public Reader getContentReader(ContentResource contentResource) {
89: return new StringReader(getContent(contentResource));
90: }
91:
92: }
|