001: /**********************************************************************************
002: * $URL: https://source.sakaiproject.org/svn/search/tags/sakai_2-4-1/search-impl/impl/src/java/org/sakaiproject/search/component/adapter/contenthosting/DefaultFullContentDigester.java $
003: * $Id: DefaultFullContentDigester.java 21387 2007-02-11 19:37:04Z ian@caret.cam.ac.uk $
004: ***********************************************************************************
005: *
006: * Copyright (c) 2003, 2004, 2005, 2006 The Sakai Foundation.
007: *
008: * Licensed under the Educational Community License, Version 1.0 (the "License");
009: * you may not use this file except in compliance with the License.
010: * You may obtain a copy of the License at
011: *
012: * http://www.opensource.org/licenses/ecl1.php
013: *
014: * Unless required by applicable law or agreed to in writing, software
015: * distributed under the License is distributed on an "AS IS" BASIS,
016: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017: * See the License for the specific language governing permissions and
018: * limitations under the License.
019: *
020: **********************************************************************************/package org.sakaiproject.search.component.adapter.contenthosting;
021:
022: import java.io.FilterReader;
023: import java.io.IOException;
024: import java.io.InputStream;
025: import java.io.InputStreamReader;
026: import java.io.Reader;
027:
028: import org.apache.commons.logging.Log;
029: import org.apache.commons.logging.LogFactory;
030: import org.sakaiproject.content.api.ContentResource;
031: import org.sakaiproject.search.api.SearchUtils;
032:
033: /**
034: * @author ieb
035: */
036: public class DefaultFullContentDigester extends DefaultContentDigester {
037: private static final Log log = LogFactory
038: .getLog(DefaultFullContentDigester.class);
039:
040: private int maxDigestSize = 1024 * 1024 * 5; // 5M
041:
042: public String getContent(ContentResource contentResource) {
043: if (contentResource != null && !isBinary(contentResource)
044: && contentResource.getContentLength() > maxDigestSize) {
045: throw new RuntimeException(
046: "Attempt to get too much content as a string on "
047: + contentResource.getReference());
048: }
049: try {
050: return SearchUtils.appendCleanString(
051: new String(contentResource.getContent(), "UTF-8"),
052: null).toString();
053: } catch (Exception e) {
054: throw new RuntimeException("Failed to get content", e);
055: }
056: }
057:
058: public Reader getContentReader(ContentResource contentResource) {
059: InputStream contentStream = null;
060: // we dont close this as its used to stream,
061: // the caller MUST close the stream
062: try {
063: contentStream = contentResource.streamContent();
064: FilterStreamReader filterReader = new FilterStreamReader(
065: contentStream, maxDigestSize);
066: return filterReader;
067: } catch (Exception e) {
068: throw new RuntimeException("Failed to stream content ", e);
069: }
070:
071: }
072:
073: public boolean accept(String mimeType) {
074: return true;
075: }
076:
077: public class FilterStreamReader extends FilterReader {
078:
079: private InputStream inputStream = null;
080: private int maxDigestSize;
081: private int nread = 0;
082:
083: /*
084: * (non-Javadoc)
085: *
086: * @see java.io.FilterReader#read()
087: */
088: public int read() throws IOException {
089: if (nread > maxDigestSize) {
090: return -1;
091: }
092: char i = (char) super .read();
093: nread++;
094: if (Character.isLetterOrDigit(i))
095: return i;
096: return ' ';
097: }
098:
099: /*
100: * (non-Javadoc)
101: *
102: * @see java.io.FilterReader#read(char[], int, int)
103: */
104: public int read(char[] buffer, int start, int end)
105: throws IOException {
106: if (nread > maxDigestSize) {
107: return -1;
108: }
109: int size = super .read(buffer, start, end);
110: nread += size;
111: int last = start + size;
112: for (int i = size; i < last; i++) {
113: if (!Character.isLetterOrDigit(buffer[i])) {
114: buffer[i] = ' ';
115: }
116: }
117: return size;
118: }
119:
120: protected FilterStreamReader(Reader arg0) {
121: super (arg0);
122: }
123:
124: public FilterStreamReader(InputStream stream, int maxDigestSize) {
125: super (new InputStreamReader(stream));
126: inputStream = stream;
127: this .maxDigestSize = maxDigestSize;
128: }
129:
130: /* (non-Javadoc)
131: * @see java.io.FilterReader#close()
132: */
133: public void close() throws IOException {
134: super .close();
135: try {
136: this .in.close();
137: } catch (Exception ex) {
138: }
139: try {
140: inputStream.close();
141: } catch (Exception ex) {
142: }
143: inputStream = null;
144: }
145:
146: }
147:
148: /**
149: * @return Returns the maxDigestSize.
150: */
151: public int getMaxDigestSize() {
152: return maxDigestSize;
153: }
154:
155: /**
156: * @param maxDigestSize The maxDigestSize to set.
157: */
158: public void setMaxDigestSize(int maxDigestSize) {
159: this.maxDigestSize = maxDigestSize;
160: }
161:
162: }
|