001: /**********************************************************************************
002: * $URL:https://source.sakaiproject.org/svn/osp/trunk/presentation/api-impl/src/java/org/theospi/portfolio/presentation/export/PresentationExport.java $
003: * $Id:PresentationExport.java 9134 2006-05-08 20:28:42Z chmaurer@iupui.edu $
004: ***********************************************************************************
005: *
006: * Copyright (c) 2005, 2006 The Sakai Foundation.
007: *
008: * Licensed under the Educational Community License, Version 1.0 (the "License");
009: * you may not use this file except in compliance with the License.
010: * You may obtain a copy of the License at
011: *
012: * http://www.opensource.org/licenses/ecl1.php
013: *
014: * Unless required by applicable law or agreed to in writing, software
015: * distributed under the License is distributed on an "AS IS" BASIS,
016: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017: * See the License for the specific language governing permissions and
018: * limitations under the License.
019: *
020: **********************************************************************************/package org.theospi.portfolio.presentation.export;
021:
022: import java.io.BufferedInputStream;
023: import java.io.BufferedOutputStream;
024: import java.io.File;
025: import java.io.FileFilter;
026: import java.io.FileInputStream;
027: import java.io.IOException;
028: import java.io.InputStream;
029: import java.io.OutputStream;
030: import java.net.URL;
031: import java.util.ArrayList;
032: import java.util.Iterator;
033: import java.util.StringTokenizer;
034: import java.util.zip.Adler32;
035: import java.util.zip.CheckedOutputStream;
036: import java.util.zip.ZipEntry;
037: import java.util.zip.ZipOutputStream;
038:
039: import org.apache.commons.logging.Log;
040: import org.apache.commons.logging.LogFactory;
041:
042: import websphinx.Access;
043: import websphinx.Crawler;
044: import websphinx.DownloadParameters;
045: import websphinx.Link;
046: import websphinx.LinkEvent;
047: import websphinx.LinkListener;
048: import websphinx.Page;
049:
050: public class PresentationExport extends Crawler implements LinkListener {
051: protected final transient Log logger = LogFactory
052: .getLog(getClass());
053:
054: private PortfolioMirror mirror = null;
055: private String hostName = null;
056: private String webappName = null;
057: private String tempDirectory = null;
058: public static final int BUFFER = 1024 * 10;
059: private ArrayList errorLinks = new ArrayList();
060: private static SessionAccess access = new SessionAccess();
061:
062: public PresentationExport(String url, String tempDirectory)
063: throws IOException {
064: this .tempDirectory = tempDirectory;
065:
066: Access.setAccess(access);
067:
068: URL urlObj = new URL(url);
069: this .hostName = urlObj.getHost();
070: String path = urlObj.getPath();
071:
072: StringTokenizer tok = new StringTokenizer(path, "/", false);
073:
074: webappName = tok.nextToken();
075: if (!tok.hasMoreTokens()) {
076: webappName = "";
077: } else {
078: webappName = "/" + webappName;
079: }
080:
081: mirror = new PortfolioMirror(tempDirectory, webappName);
082:
083: this .setRootHrefs(url);
084: this .setLinkType(Crawler.ALL_LINKS);
085: this .setSynchronous(true);
086: this .setDomain(Crawler.WEB);
087: this .addLinkListener(this );
088:
089: DownloadParameters dp = getDownloadParameters();
090: dp = dp.changeMaxThreads(1);
091: setDownloadParameters(dp.changeMaxPageSize(2000));
092: }
093:
094: public void createZip(OutputStream out) throws IOException {
095: File directory = new File(tempDirectory + webappName);
096:
097: CheckedOutputStream checksum = new CheckedOutputStream(out,
098: new Adler32());
099: ZipOutputStream zos = new ZipOutputStream(
100: new BufferedOutputStream(checksum));
101:
102: recurseDirectory("", directory, zos);
103:
104: zos.finish();
105: zos.flush();
106: }
107:
108: /**
109: * places a directory into the zip stream
110: * @param parentPath
111: * @param directory
112: * @param zos
113: * @throws IOException
114: */
115: protected void recurseDirectory(String parentPath, File directory,
116: ZipOutputStream zos) throws IOException {
117: // get all files... go through those
118: File[] files = directory.listFiles(new DirectoryFileFilter(
119: false));
120:
121: if (files == null)
122: throw new NullPointerException(
123: "recursing through a directory which is not a directory: "
124: + parentPath + " ---- " + directory);
125:
126: addFiles(zos, parentPath, files);
127:
128: // get all directories... go through those...
129: File[] directories = directory
130: .listFiles(new DirectoryFileFilter(true));
131: for (int i = 0; i < directories.length; i++) {
132: recurseDirectory(parentPath + directories[i].getName()
133: + "/", directories[i], zos);
134: }
135:
136: }
137:
138: protected void addFiles(ZipOutputStream out, String parentPrefix,
139: File[] files) throws IOException {
140:
141: BufferedInputStream origin = null;
142:
143: byte data[] = new byte[BUFFER];
144: for (int i = 0; i < files.length; i++) {
145: String fileName = parentPrefix + files[i].getName();
146: logger.debug("Adding " + fileName);
147: InputStream in = new FileInputStream(files[i]);
148:
149: if (in == null)
150: throw new NullPointerException();
151:
152: origin = new BufferedInputStream(in, BUFFER);
153:
154: if (fileName == null)
155: throw new NullPointerException();
156:
157: ZipEntry entry = new ZipEntry(fileName);
158: out.putNextEntry(entry);
159: int count;
160: while ((count = origin.read(data, 0, BUFFER)) != -1) {
161: out.write(data, 0, count);
162: }
163: out.closeEntry();
164: in.close();
165: }
166: }
167:
168: /**
169: * Start crawling. Returns either when the crawl is done, or
170: * when pause() or stop() is called. Because this method implements the
171: * java.lang.Runnable interface, a crawler can be run in the
172: * background thread.
173: */
174: public void run() {
175: super .run();
176:
177: // process error links
178: for (Iterator i = errorLinks.iterator(); i.hasNext();) {
179: Link link = (Link) i.next();
180: visit(link.getPage());
181: }
182: }
183:
184: public synchronized void visit(Page page) {
185:
186: try {
187: mirror.writePage(page);
188: mirror.rewrite();
189: } catch (IOException e) {
190: logger
191: .info(
192: "Error visiting link. Most likely broken link.",
193: e);
194: }
195:
196: logger.debug("visiting page");
197: super .visit(page);
198: }
199:
200: public synchronized boolean shouldVisit(Link link) {
201: if (link.getMethod() == Link.POST) {
202: return false;
203: }
204:
205: if (!link.getHost().equalsIgnoreCase(hostName)) {
206: return false;
207: }
208:
209: // todo maybe if (link.getURL().getFile().startsWith(webappName + "/showPublicPortfolio.do")) {
210: // return false;
211: //}
212:
213: return true;
214: }
215:
216: public void deleteTemp() {
217: File temp = new File(tempDirectory);
218:
219: deleteContent(temp);
220: temp.delete();
221: }
222:
223: protected void deleteContent(File directory) {
224: File[] files = directory.listFiles(new DirectoryFileFilter(
225: false));
226:
227: if (files != null) {
228: for (int i = 0; i < files.length; i++) {
229: files[i].delete();
230: }
231: }
232:
233: // get all directories... go through those...
234: File[] directories = directory
235: .listFiles(new DirectoryFileFilter(true));
236: if (directories != null) {
237: for (int i = 0; i < directories.length; i++) {
238: deleteContent(directories[i]);
239: directories[i].delete();
240: }
241: }
242: }
243:
244: /**
245: * Notify that an event occured on a link.
246: */
247: public void crawled(LinkEvent event) {
248: if (event.getID() == LinkEvent.ERROR) {
249: // switch to stream page link
250:
251: if (!(event.getLink().getPage() instanceof StreamedPage)) {
252: logger.debug("loading file through streamed page.");
253: Link newLink = new Link(event.getLink().getURL());
254: newLink.setPage(new StreamedPage(event.getLink()));
255: addErrorLink(newLink);
256: } else {
257: logger.error("Link error "
258: + event.getLink().getURL().toExternalForm(),
259: event.getException());
260: }
261: } else if (event.getID() == LinkEvent.QUEUED) {
262: if (event.getLink().getPage() instanceof StreamedPage) {
263: event.getLink().setStatus(LinkEvent.DOWNLOADED);
264: }
265: }
266: }
267:
268: protected synchronized void addErrorLink(Link newLink) {
269: errorLinks.add(newLink);
270: }
271:
272: /**
273: * Implements the FileFilter. it accepts the switch of whether to accept files or directories
274: *
275: */
276: private class DirectoryFileFilter implements FileFilter {
277: private boolean directories = false;
278:
279: public DirectoryFileFilter(boolean directories) {
280: this .directories = directories;
281: }
282:
283: /**
284: * Tests whether or not the specified abstract pathname should be
285: * included in a pathname list.
286: *
287: * @param pathname The abstract pathname to be tested
288: * @return <code>true</code> if and only if <code>pathname</code>
289: * should be included
290: */
291: public boolean accept(File pathname) {
292: if (directories) {
293: return pathname.isDirectory();
294: } else {
295: return pathname.isFile();
296: }
297: }
298:
299: }
300:
301: }
|