Source Code Cross Referenced for HttpDocCache.java in  » Web-Crawler » JoBo » net » matuschek » http » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Web Crawler » JoBo » net.matuschek.http 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        //////////////////////////////////////////////////////////////////////////////
002:        // Copyright (c) Insiders Wissensbasierte Systeme GmbH, Germany
003:        //////////////////////////////////////////////////////////////////////////////
004:
005:        package net.matuschek.http;
006:
007:        import java.io.*;
008:        import java.net.*;
009:        import java.util.*;
010:        import java.util.zip.ZipEntry;
011:        import java.util.zip.ZipFile;
012:        import java.util.zip.ZipOutputStream;
013:
014:        import net.matuschek.util.MD5;
015:        import org.apache.log4j.Category;
016:
017:        /**
018:         * Full implementation of HttpDocManager interface.
019:         * Caches documents, links and headers in ZIP-files.
020:         * Documents with same content will be detected 
021:         * and share the same content-storage.
022:         *
023:         * @author Oliver Schmidt
024:         * @version $Revision: 1.2 $
025:         */
026:        public class HttpDocCache implements  HttpDocManager {
027:
028:            /** internally used header name to mark duplicates */
029:            protected final static String CONTENT_DUPLICATE = "Content-Duplicate";
030:
031:            /** use MD5 encoding for filenames */
032:            public boolean useMD5 = true;
033:
034:            /** log4j logging instance */
035:            protected static Category log = Category
036:                    .getInstance(HttpDocCache.class.getName());
037:
038:            /** collection of visited URLs */
039:            private Collection urls = new LinkedList();
040:
041:            /** storage main directory */
042:            protected String storagedir;
043:
044:            /** file that holds directory information */
045:            protected File storageDirectoryFile = null;
046:
047:            /** subdirectory name for links */
048:            protected final static String LINKS = "links" + File.separator;
049:
050:            /** subdirectory name for content */
051:            protected final static String CONTENT = "content" + File.separator;
052:
053:            /** subdirectory name for document information */
054:            protected final static String DOCUMENTS = "documents"
055:                    + File.separator;
056:
057:            /**
058:             * Constructor
059:             * @param storageDirectory
060:             */
061:            public HttpDocCache(String storageDirectory) {
062:                setStorageDir(storageDirectory);
063:            }
064:
065:            private FileOutputStream storageDirectoryStream = null;
066:
067:            /**
068:             * Set storage directory and create directories if necessary.
069:             * @param newStoragedir
070:             */
071:            private void setStorageDir(String newStoragedir) {
072:                storagedir = newStoragedir;
073:
074:                if (!storagedir.endsWith(File.separator)) {
075:                    storagedir = storagedir + File.separator;
076:                }
077:
078:                // create the directories, if they do not exist yet.
079:                File storagedirFile = new File(storagedir + DOCUMENTS);
080:                if (!storagedirFile.exists()) {
081:                    storagedirFile.mkdirs();
082:                }
083:                File contentFile = new File(storagedir + CONTENT);
084:                if (!contentFile.exists()) {
085:                    contentFile.mkdirs();
086:                }
087:
088:                if (useMD5) {
089:                    storageDirectoryFile = new File(storagedir
090:                            + "directory.csv");
091:                    try {
092:                        storageDirectoryStream = new FileOutputStream(
093:                                storageDirectoryFile.getPath(), true);
094:                        if (!storageDirectoryFile.exists()) {
095:                            storageDirectoryStream.write(("Path,URL" + LF)
096:                                    .getBytes());
097:                        }
098:                    } catch (Exception e) {
099:                        log.error(e.getMessage());
100:                    }
101:                }
102:            }
103:
104:            final static String QUOTE = "\"";
105:            final static String LF = System.getProperty("line.separator");
106:
107:            /**
108:             * Method store.
109:             * stores the document to the storage directory
110:             * @param doc the document to be stored
111:             * @param links to be stored (optional)
112:             * @return String
113:             * @throws DocManagerException if the document cannot be written to the directory
114:             */
115:            public void storeDocument(HttpDoc doc) throws DocManagerException {
116:                List links = doc.getLinks();
117:
118:                // donīt store cached documents
119:                if (doc.isCached()) {
120:                    return;
121:                }
122:
123:                // get the content type
124:                String filename = generateFilename(doc.getURL()
125:                        .toExternalForm());
126:
127:                String filepath = storagedir + DOCUMENTS + filename;
128:                checkStoragePathFor(DOCUMENTS, filename);
129:
130:                try {
131:                    File f = new File(filepath + ".zip");
132:                    if (!f.exists()) {
133:                        writeDirectoryInfo(doc, filename);
134:                    }
135:
136:                    // write it to the file
137:                    OutputStream fs = new BufferedOutputStream(
138:                            new FileOutputStream(f));
139:                    ZipOutputStream zos = new ZipOutputStream(fs);
140:                    zos.setLevel(9);
141:
142:                    try {
143:                        //			writeContentToZipFile(doc, zos);
144:                        storeContent(doc);
145:                        writeHeadersToZipFile(doc, zos);
146:                        writeUrlToZipFile(doc, zos);
147:                        if (links != null) {
148:                            writeLinksToZipFile(links, zos);
149:                        }
150:                    } catch (Throwable e) {
151:                        System.out.println(e);
152:                    } finally {
153:                        zos.close();
154:                        fs.close();
155:                        long date = doc.getDateAsMilliSeconds();
156:                        f.setLastModified(date > 0 ? date : System
157:                                .currentTimeMillis());
158:                    }
159:                } catch (IOException ioex) {
160:                    DocManagerException ex = new DocManagerException(ioex
161:                            .getMessage());
162:                    throw ex;
163:                }
164:            }
165:
166:            /**
167:             * Write Directory info.
168:             * @param doc
169:             * @param filename in cache
170:             * @throws IOException
171:             */
172:            protected void writeDirectoryInfo(HttpDoc doc, String filename)
173:                    throws IOException {
174:                if (storageDirectoryFile != null) {
175:                    synchronized (storageDirectoryFile) {
176:                        try {
177:                            String directoryInfo = QUOTE + filename + QUOTE
178:                                    + "," + QUOTE + doc.getURL() + QUOTE + LF;
179:                            storageDirectoryStream.write(directoryInfo
180:                                    .getBytes());
181:                        } catch (Exception e) {
182:                            log.warn(e.getMessage());
183:                            storageDirectoryStream.close();
184:                        }
185:                    }
186:                }
187:            }
188:
189:            /**
190:             * Write content to zipFile
191:             * @param doc
192:             * @param zos
193:             * @throws IOException
194:             */
195:            protected void writeContentToZipFile(HttpDoc doc,
196:                    ZipOutputStream zos) throws IOException {
197:                String contenttype = doc
198:                        .getHeaderValue(HttpHeader.CONTENT_TYPE);
199:                String extension = getExtensionFromContenttype(contenttype);
200:                ZipEntry zipEntry = new ZipEntry("content" + extension);
201:                long date = doc.getLastModifiedAsMilliSeconds();
202:                if (date < 0) {
203:                    date = doc.getDateAsMilliSeconds();
204:                }
205:                zipEntry.setTime(date);
206:                zos.putNextEntry(zipEntry);
207:                zos.write(doc.getContent());
208:                zos.closeEntry();
209:            }
210:
211:            /**
212:             * Write headers to zipFile.
213:             * @param doc
214:             * @param zos
215:             * @return ZipEntry
216:             * @throws IOException
217:             */
218:            protected ZipEntry writeHeadersToZipFile(HttpDoc doc,
219:                    ZipOutputStream zos) throws IOException {
220:                StringBuffer comment = new StringBuffer();
221:                Vector headers = doc.getHttpHeader();
222:                for (Iterator iter = headers.iterator(); iter.hasNext();) {
223:                    HttpHeader header = (HttpHeader) iter.next();
224:                    if (!header.getName().equals(CONTENT_DUPLICATE)) {
225:                        comment.append(header.toString());
226:                        if (iter.hasNext()) {
227:                            comment.append(LF);
228:                        }
229:                    }
230:                }
231:                ZipEntry ze = new ZipEntry("header");
232:                zos.putNextEntry(ze);
233:                zos.write(comment.toString().getBytes());
234:                long date = doc.getDateAsMilliSeconds();
235:                ze.setTime(date > 0 ? date : System.currentTimeMillis());
236:                zos.closeEntry();
237:                return ze;
238:            }
239:
240:            /**
241:             * Read headers from ZipFile
242:             * @param doc
243:             * @param zf
244:             * @return boolean
245:             * @throws IOException
246:             */
247:            protected boolean readHeadersFromZipFile(HttpDoc doc, ZipFile zf)
248:                    throws IOException {
249:                ZipEntry ze = zf.getEntry("header");
250:                if (ze != null) {
251:                    InputStream is = zf.getInputStream(ze);
252:                    BufferedReader reader = new BufferedReader(
253:                            new InputStreamReader(is));
254:                    while (reader.ready()) {
255:                        String line = reader.readLine();
256:                        int pos = line.indexOf(": ");
257:                        if (pos >= 0) {
258:                            String name = line.substring(0, pos);
259:                            String value = line.substring(pos + 2);
260:                            HttpHeader header = new HttpHeader(name, value);
261:                            doc.addHeader(header);
262:                        }
263:                    }
264:                    reader.close();
265:                    return true;
266:                }
267:                return false;
268:            }
269:
270:            /**
271:             * Read links from ZipFile
272:             * @param doc
273:             * @param zf
274:             * @return boolean
275:             * @throws IOException
276:             */
277:            protected boolean readLinksFromZipFile(HttpDoc doc, ZipFile zf)
278:                    throws IOException {
279:                ZipEntry ze = zf.getEntry("links");
280:                List links = doc.getLinks();
281:                if (links == null) {
282:                    links = new Vector();
283:                    doc.setLinks(links);
284:                } else {
285:                    links.clear();
286:                }
287:
288:                if (ze != null) {
289:                    InputStream is = zf.getInputStream(ze);
290:                    BufferedReader reader = new BufferedReader(
291:                            new InputStreamReader(is));
292:                    while (reader.ready()) {
293:                        String line = reader.readLine();
294:                        if (line != null) {
295:                            URL url = new URL(line);
296:                            links.add(url);
297:                        }
298:                    }
299:                    reader.close();
300:                    return true;
301:                }
302:                return false;
303:            }
304:
305:            /**
306:             * Write Url to ZipFile.
307:             * @param doc
308:             * @param zos
309:             * @return ZipEntry
310:             * @throws IOException
311:             */
312:            protected ZipEntry writeUrlToZipFile(HttpDoc doc,
313:                    ZipOutputStream zos) throws IOException {
314:                String url = doc.getURL().toString();
315:                ZipEntry ze = new ZipEntry("url");
316:                zos.putNextEntry(ze);
317:                zos.write(url.getBytes());
318:                long date = doc.getDateAsMilliSeconds();
319:                ze.setTime(date > 0 ? date : System.currentTimeMillis());
320:                zos.closeEntry();
321:                return ze;
322:            }
323:
324:            /**
325:             * Get File of document content users.
326:             * @param doc
327:             * @return File
328:             */
329:            private File getContentUsersFile(HttpDoc doc) {
330:                File f = null;
331:                byte[] content = doc.getContent();
332:                if (content.length != 0) {
333:                    String md5 = doc.getContentMD5();
334:                    f = contentFile(md5, ".txt");
335:                }
336:                return f;
337:            }
338:
339:            /**
340:             * Returns URL-String of duplicate content (if found).
341:             * @see net.matuschek.http.HttpDocManager#findDuplicate(HttpDoc)
342:             */
343:            public String findDuplicate(HttpDoc doc) throws IOException {
344:                String duplicate = null;
345:                File f = getContentUsersFile(doc);
346:                if (f != null) {
347:                    String urlString = doc.getURL().toString();
348:                    if (f.exists()) {
349:                        BufferedReader reader = new BufferedReader(
350:                                new InputStreamReader(new FileInputStream(f)));
351:                        while (reader.ready()) {
352:                            String line = reader.readLine();
353:                            if (line.equals(urlString)) {
354:                                break;
355:                            } else if (duplicate == null) {
356:                                duplicate = line;
357:                            }
358:                        }
359:                        reader.close();
360:                    }
361:                }
362:                return duplicate;
363:            }
364:
365:            /**
366:             * Creates a file with a name created by the content, containing the URL.
367:             * @param doc
368:             */
369:            protected void storeContent(HttpDoc doc) throws IOException {
370:                if (doc.getContent().length == 0)
371:                    return;
372:                File f = getContentUsersFile(doc);
373:                String urlString = doc.getURL().toString();
374:                String md5 = doc.getContentMD5();
375:
376:                // is content user?
377:                boolean found = false;
378:                if (f.exists()) {
379:                    BufferedReader reader = new BufferedReader(
380:                            new InputStreamReader(new FileInputStream(f)));
381:                    try {
382:                        while (reader.ready()) {
383:                            String line = reader.readLine();
384:                            if (line.equals(urlString)) {
385:                                found = true;
386:                                break;
387:                            }
388:                        }
389:                    } finally {
390:                        reader.close();
391:                    }
392:                }
393:
394:                // write content
395:                File fzip = contentFile(md5, ".zip");
396:                if (!fzip.exists()) {
397:                    checkStoragePathFor(CONTENT,
398:                            useFirstCharactersAsDirectories(md5));
399:                    OutputStream fs = new BufferedOutputStream(
400:                            new FileOutputStream(fzip));
401:                    ZipOutputStream zos = null;
402:                    try {
403:                        zos = new ZipOutputStream(fs);
404:                        zos.setLevel(9);
405:                        writeContentToZipFile(doc, zos);
406:                    } finally {
407:                        if (zos != null) {
408:                            zos.close();
409:                        } else {
410:                            fs.close();
411:                        }
412:                    }
413:                } else {
414:                    fzip.setLastModified(System.currentTimeMillis());
415:                }
416:
417:                // append user
418:                if (!found) {
419:                    FileOutputStream os = new FileOutputStream(f.getPath(),
420:                            true);
421:                    try {
422:                        os.write((urlString + LF).getBytes());
423:                    } finally {
424:                        os.close();
425:                    }
426:                }
427:            }
428:
429:            /**
430:             * Write links to ZipFile.
431:             * @param links
432:             * @param ZipOutputStream
433:             */
434:            protected void writeLinksToZipFile(List links, ZipOutputStream zs)
435:                    throws IOException {
436:                HashSet storedLinks = new HashSet();
437:                ZipEntry zipEntry = new ZipEntry("links");
438:                zs.putNextEntry(zipEntry);
439:                for (Iterator iter = links.iterator(); iter.hasNext();) {
440:                    URL url = (URL) iter.next();
441:                    if (!storedLinks.contains(url)) {
442:                        zs.write((url.toString() + LF).getBytes());
443:                        storedLinks.add(url);
444:                    }
445:                }
446:                zs.closeEntry();
447:            }
448:
449:            /**
450:             * Collects Urls (duplicates will be skipped).
451:             * 
452:             * @param doc a HttpDoc object to process. This may also be null
453:             * @exception DocManagerException will be thrown if an error occurs
454:             * while processing the document.
455:             * @see net.matuschek.http.HttpDocManager#processDocument(net.matuschek.http.HttpDoc)
456:             */
457:            public void processDocument(HttpDoc doc) throws DocManagerException {
458:                log.info("Processing " + doc.getURL().toExternalForm()
459:                        + doc.getHttpHeader());
460:
461:                // collect URL (only if content is no duplicate)
462:                HttpHeader duplicate = doc.getHeader(CONTENT_DUPLICATE);
463:                if (duplicate == null) {
464:                    urls.add(doc.getURL());
465:                }
466:            }
467:
468:            /**
469:             * retrieves a document from the cache.
470:             * @param url
471:             * @see net.matuschek.http.HttpDocManager#retrieveFromCache(java.net.URL)
472:             */
473:            public HttpDoc retrieveFromCache(java.net.URL url) {
474:                HttpDoc doc = null;
475:                File f = null;
476:                try {
477:                    String filename0 = url.toExternalForm();
478:                    String filename = generateFilename(filename0) + ".zip";
479:                    f = new File(storagedir + DOCUMENTS + filename);
480:
481:                    if (f.exists()) {
482:                        log.info("retrieve " + f);
483:
484:                        // create document and read it from file
485:                        doc = new HttpDoc();
486:                        doc.setURL(url);
487:                        ZipFile zf = new ZipFile(f);
488:
489:                        // read headers
490:                        readHeadersFromZipFile(doc, zf);
491:
492:                        // read links
493:                        readLinksFromZipFile(doc, zf);
494:
495:                        doc.setCached(true);
496:
497:                        // read content
498:                        String md5 = doc.getContentMD5();
499:                        File contentFile = contentFile(md5, ".zip");
500:                        if (contentFile.exists()) {
501:                            ZipFile contentZip = new ZipFile(contentFile);
502:                            readContentFromZipFile(doc, contentZip);
503:                            contentZip.close();
504:                        } else {
505:                            doc.setContent(new byte[0]);
506:                        }
507:                        zf.close();
508:                    }
509:                } catch (Exception e) {
510:                    log.warn("removing invalid file " + f);
511:                    f.delete();
512:                    doc = null;
513:                }
514:
515:                return doc;
516:            }
517:
518:            /**
519:             * Read content from ZipFile
520:             * @param doc
521:             * @param contentZip
522:             * @throws IOException
523:             */
524:            protected void readContentFromZipFile(HttpDoc doc,
525:                    ZipFile contentZip) throws IOException {
526:                byte[] content = null;
527:                for (Enumeration enumeration = contentZip.entries(); enumeration
528:                        .hasMoreElements();) {
529:                    ZipEntry zipEntry = (ZipEntry) enumeration.nextElement();
530:                    if (zipEntry.getName().startsWith("content")) {
531:                        InputStream is = contentZip.getInputStream(zipEntry);
532:                        int length = (int) zipEntry.getSize();
533:                        content = new byte[length];
534:                        int startPos = 0;
535:                        while (startPos < length) {
536:                            startPos += is.read(content, startPos, length
537:                                    - startPos);
538:                        }
539:                        is.close();
540:                        break;
541:                    }
542:                }
543:                doc.setContent(content);
544:            }
545:
546:            /**
547:             * Remove document from cache.
548:             * @param url
549:             * @see net.matuschek.http.HttpDocManager#removeDocument(URL)
550:             */
551:            public void removeDocument(URL url) {
552:                HttpDoc doc = retrieveFromCache(url);
553:
554:                File f = null;
555:                try {
556:                    String filename0 = url.toExternalForm();
557:                    String filename = generateFilename(filename0) + ".zip";
558:
559:                    f = new File(storagedir + LINKS + filename);
560:                    if (f.exists()) {
561:                        f.delete();
562:                    }
563:
564:                    deleteContent(doc);
565:                    f = new File(storagedir + DOCUMENTS + filename);
566:                    if (f.exists()) {
567:                        f.delete();
568:                    }
569:                } catch (Exception ex) {
570:                    log.error(ex);
571:                }
572:            }
573:
574:            /**
575:             * Deletes stored content for the given document
576:             * @param document
577:             */
578:            private void deleteContent(HttpDoc doc) throws IOException {
579:                byte[] content = doc.getContent();
580:                if (content.length == 0) {
581:                    return;
582:                }
583:                String urlString = doc.getURL().toString();
584:                String md5 = doc.getContentMD5();
585:                File f = contentFile(md5, ".txt");
586:                ArrayList entries = new ArrayList();
587:                if (f.exists()) {
588:                    BufferedReader reader = new BufferedReader(
589:                            new InputStreamReader(new FileInputStream(f)));
590:                    while (reader.ready()) {
591:                        String line = reader.readLine();
592:                        if (!line.equals(urlString)) {
593:                            entries.add(line);
594:                        }
595:                    }
596:                    reader.close();
597:                }
598:                if (entries.size() > 0) {
599:                    FileOutputStream os = new FileOutputStream(f.getPath(),
600:                            false);
601:                    for (Iterator iter = entries.iterator(); iter.hasNext();) {
602:                        String line = (String) iter.next();
603:                        os.write((line + LF).getBytes());
604:                    }
605:                    os.close();
606:                } else {
607:                    f.delete();
608:                    File fzip = contentFile(md5, ".zip");
609:                    if (fzip.exists()) {
610:                        fzip.delete();
611:                    }
612:                }
613:            }
614:
615:            /**
616:             * List collected URLs.
617:             * @see java.lang.Object#toString()
618:             */
619:            public String toString() {
620:                StringBuffer sb = new StringBuffer(1000);
621:                for (Iterator i = urls.iterator(); i.hasNext();) {
622:                    sb.append(i.next()).append("\n");
623:                }
624:                return sb.toString();
625:            }
626:
627:            /**
628:             * Uses the first storageDirDepth characters of filename as paths
629:             * @param filename
630:             */
631:            private final String useFirstCharactersAsDirectories(String filename) {
632:                int n = storageDirDepth;
633:                if (n > filename.length())
634:                    n = filename.length();
635:                char dir[] = new char[n * 2];
636:                for (int i = 0; i < n; i++) {
637:                    dir[i * 2] = filename.charAt(i);
638:                    dir[i * 2 + 1] = File.separatorChar;
639:                }
640:                return new String(dir);
641:            }
642:
643:            /**
644:             * Checks if the storage path for the given file exists and creates it if necessary.
645:             * @param subdirectory
646:             * @param filename
647:             */
648:            private final void checkStoragePathFor(String subdirectory,
649:                    String filename) {
650:                if (!subdirectory.endsWith(File.separator)) {
651:                    subdirectory += File.separator;
652:                }
653:                String head = filename.substring(0, storageDirDepth * 2);
654:                File path = new File(storagedir + subdirectory + head);
655:                if (!path.exists()) {
656:                    path.mkdirs();
657:                }
658:            }
659:
660:            /**
661:             * Generate a valid filename for the given docURI.
662:             * @param docURI
663:             * @return String
664:             */
665:            protected String generateFilename(String docURI) {
666:                if (useMD5) {
667:                    MD5 md5 = new MD5(docURI);
668:                    String hex = md5.asHex();
669:                    if (storageDirDepth > 0) {
670:                        return useFirstCharactersAsDirectories(hex)
671:                                + hex.substring(storageDirDepth);
672:                    }
673:                    return hex;
674:                } else {
675:                    StringBuffer buf = new StringBuffer(docURI.length());
676:
677:                    for (int i = 0; i < docURI.length(); i++) {
678:                        char c = docURI.charAt(i);
679:                        switch (c) {
680:                        case '/':
681:                            buf.append("&slash;");
682:                            break;
683:                        case '\\':
684:                            buf.append("&backslash");
685:                            break;
686:                        case ':':
687:                            buf.append("&colon;");
688:                            break;
689:                        case '*':
690:                            buf.append("&asterisk;");
691:                            break;
692:                        case '?':
693:                            buf.append("&question;");
694:                            break;
695:                        case '\"':
696:                            buf.append("&quot;");
697:                            break;
698:                        case '<':
699:                            buf.append("&lt;");
700:                            break;
701:                        case '>':
702:                            buf.append("&gt;");
703:                            break;
704:                        case '|':
705:                            buf.append("&or;");
706:                            break;
707:                        default:
708:                            buf.append(c);
709:                            break;
710:                        }
711:                    }
712:                    docURI = buf.toString();
713:
714:                    return docURI;
715:                }
716:            }
717:
718:            /**
719:             * Returns a File with the mapping of this content to its URLs.
720:             * @param content
721:             * @return long
722:             */
723:            protected File contentFile(String hex, String extension) {
724:                return new File(storagedir + CONTENT
725:                        + useFirstCharactersAsDirectories(hex)
726:                        + hex.substring(storageDirDepth) + extension);
727:            }
728:
729:            /**
730:             * Close storageDirectory File.
731:             * @see net.matuschek.http.HttpDocManager#finish()
732:             */
733:            public void finish() {
734:                if (storageDirectoryStream != null) {
735:                    try {
736:                        storageDirectoryStream.close();
737:                        storageDirectoryStream = null;
738:                    } catch (IOException e) {
739:                        e.printStackTrace();
740:                    }
741:                }
742:            }
743:
744:            /**
745:             * Calls finish and super.finalize().
746:             * @see java.lang.Object#finalize()
747:             */
748:            protected void finalize() throws Throwable {
749:                finish();
750:                super .finalize();
751:            }
752:
753:            /**
754:             * Depth of source set directory.
755:             * (depth = number of used subdirectory levels)
756:             * The first storageDirDepth characters of file will be used
757:             * as directories.
758:             */
759:            protected int storageDirDepth = 0;
760:
761:            /**
762:             * Sets the desired directory depth of the source set directory
763:             * (depth = number of used subdirectory levels)
764:             * 
765:             * @param desired depth of source set directory.
766:             */
767:            public void setStorageDirDepth(int depth) {
768:                storageDirDepth = depth;
769:            }
770:
771:            /**
772:             * Method getstorageDirDepth.
773:             * returns the directory depth of the source set directory
774:             * @param desired depth of source set directory.
775:             * @return the directory depth of the source set directory
776:             */
777:            public int getStorageDirDepth() {
778:                return storageDirDepth;
779:            }
780:
781:            /**
782:             * Get relevant part of contenttype and get default extension for it.
783:             * @param contenttype
784:             * @return extension
785:             */
786:            private String getExtensionFromContenttype(String contenttype) {
787:                String extension = null;
788:                if (contenttype != null) {
789:                    String strContentType = null;
790:                    int pos = contenttype.indexOf(';');
791:                    if (pos > 0) {
792:                        strContentType = contenttype.substring(0, pos).trim();
793:                    } else {
794:                        strContentType = contenttype.trim();
795:                    }
796:                    extension = getDefaultExtension(strContentType);
797:                }
798:
799:                if (extension == null) {
800:                    extension = "";
801:                } else {
802:                    extension = "." + extension;
803:                }
804:                return extension;
805:            }
806:
807:            /**
808:             * Get default extension for given contentType.
809:             * @param contentType
810:             * @return default extension or null
811:             */
812:            protected String getDefaultExtension(String contentType) {
813:                if (contentType == null) {
814:                    return null;
815:                } else if (contentType.indexOf("text/html") >= 0) {
816:                    return ".html";
817:                } else if (contentType.indexOf("text/") >= 0) {
818:                    return ".txt";
819:                } else {
820:                    return null;
821:                }
822:            }
823:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.