001: /**********************************************************************************
002: * $URL: https://source.sakaiproject.org/svn/search/tags/sakai_2-4-1/search-impl/impl/src/java/org/sakaiproject/search/component/adapter/contenthosting/XLContentDigester.java $
003: * $Id: XLContentDigester.java 21946 2007-02-27 11:41:58Z ian@caret.cam.ac.uk $
004: ***********************************************************************************
005: *
006: * Copyright (c) 2003, 2004, 2005, 2006 The Sakai Foundation.
007: *
008: * Licensed under the Educational Community License, Version 1.0 (the "License");
009: * you may not use this file except in compliance with the License.
010: * You may obtain a copy of the License at
011: *
012: * http://www.opensource.org/licenses/ecl1.php
013: *
014: * Unless required by applicable law or agreed to in writing, software
015: * distributed under the License is distributed on an "AS IS" BASIS,
016: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017: * See the License for the specific language governing permissions and
018: * limitations under the License.
019: *
020: **********************************************************************************/package org.sakaiproject.search.component.adapter.contenthosting;
021:
022: import java.io.IOException;
023: import java.io.InputStream;
024: import java.io.Reader;
025: import java.io.StringReader;
026:
027: import org.apache.commons.logging.Log;
028: import org.apache.commons.logging.LogFactory;
029: import org.apache.poi.hssf.usermodel.HSSFCell;
030: import org.apache.poi.hssf.usermodel.HSSFRichTextString;
031: import org.apache.poi.hssf.usermodel.HSSFRow;
032: import org.apache.poi.hssf.usermodel.HSSFSheet;
033: import org.apache.poi.hssf.usermodel.HSSFWorkbook;
034: import org.sakaiproject.content.api.ContentResource;
035: import org.sakaiproject.search.api.SearchUtils;
036:
037: /**
038: * @author ieb
039: */
040: public class XLContentDigester extends BaseContentDigester {
041: private static Log log = LogFactory.getLog(XLContentDigester.class);
042:
043: static {
044: System.setProperty("org.apache.poi.util.POILogger",
045: "org.apache.poi.util.NullLogger");
046: }
047:
048: /*
049: * (non-Javadoc)
050: *
051: * @see org.sakaiproject.search.component.adapter.contenthosting.BaseContentDigester#getContent(org.sakaiproject.content.api.ContentResource)
052: */
053:
054: public String getContent(ContentResource contentResource) {
055: if (contentResource != null
056: && contentResource.getContentLength() > maxDigestSize) {
057: throw new RuntimeException(
058: "Attempt to get too much content as a string on "
059: + contentResource.getReference());
060: }
061: InputStream contentStream = null;
062: try {
063: contentStream = contentResource.streamContent();
064: HSSFWorkbook workbook = new HSSFWorkbook(contentStream);
065: StringBuilder sb = new StringBuilder();
066: int nsheets = workbook.getNumberOfSheets();
067:
068: for (int i = 0; i < nsheets; i++) {
069: HSSFSheet sheet = workbook.getSheetAt(i);
070: int r = sheet.getFirstRowNum();
071: int lr = sheet.getLastRowNum();
072: for (; r <= lr; r++) {
073: HSSFRow row = sheet.getRow(r);
074: short c = row.getFirstCellNum();
075: short lc = row.getLastCellNum();
076: for (; c <= lc; c++) {
077: HSSFCell cell = row.getCell(c);
078: HSSFRichTextString cstr = cell
079: .getRichStringCellValue();
080: SearchUtils.appendCleanString(cstr.getString(),
081: sb);
082: sb.append(" ");
083: }
084: }
085: }
086: return sb.toString();
087: } catch (Exception e) {
088: throw new RuntimeException(
089: "Failed to read content for indexing ", e);
090: } finally {
091: if (contentStream != null) {
092: try {
093: contentStream.close();
094: } catch (IOException e) {
095: }
096: }
097: }
098:
099: }
100:
101: /*
102: * (non-Javadoc)
103: *
104: * @see org.sakaiproject.search.component.adapter.contenthosting.BaseContentDigester#getContentReader(org.sakaiproject.content.api.ContentResource)
105: */
106:
107: public Reader getContentReader(ContentResource contentResource) {
108: return new StringReader(getContent(contentResource));
109: }
110:
111: }
|