Source Code Cross Referenced for FetchFTP.java in  » Web-Crawler » heritrix » org » archive » crawler » fetcher » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Web Crawler » heritrix » org.archive.crawler.fetcher 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        /* FetchFTP.java
002:         *
003:         * $Id: FetchFTP.java 5080 2007-04-13 20:30:49Z gojomo $
004:         *
005:         * Created on Jun 5, 2003
006:         *
007:         * Copyright (C) 2003 Internet Archive.
008:         *
009:         * This file is part of the Heritrix web crawler (crawler.archive.org).
010:         *
011:         * Heritrix is free software; you can redistribute it and/or modify
012:         * it under the terms of the GNU Lesser Public License as published by
013:         * the Free Software Foundation; either version 2.1 of the License, or
014:         * any later version.
015:         *
016:         * Heritrix is distributed in the hope that it will be useful,
017:         * but WITHOUT ANY WARRANTY; without even the implied warranty of
018:         * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
019:         * GNU Lesser Public License for more details.
020:         *
021:         * You should have received a copy of the GNU Lesser Public License
022:         * along with Heritrix; if not, write to the Free Software
023:         * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
024:         */
025:        package org.archive.crawler.fetcher;
026:
027:        import java.io.IOException;
028:        import java.io.UnsupportedEncodingException;
029:        import java.net.Socket;
030:        import java.net.URLEncoder;
031:        import java.util.logging.Level;
032:        import java.util.logging.Logger;
033:        import java.util.regex.Matcher;
034:        import java.util.regex.Pattern;
035:
036:        import javax.management.AttributeNotFoundException;
037:
038:        import org.apache.commons.httpclient.URIException;
039:        import org.apache.commons.net.ftp.FTPCommand;
040:        import org.archive.crawler.datamodel.CrawlURI;
041:        import org.archive.crawler.datamodel.CoreAttributeConstants;
042:        import org.archive.crawler.datamodel.FetchStatusCodes;
043:        import org.archive.crawler.extractor.Link;
044:        import static org.archive.crawler.extractor.Link.NAVLINK_HOP;
045:        import static org.archive.crawler.extractor.Link.NAVLINK_MISC;
046:        import org.archive.crawler.framework.Processor;
047:        import org.archive.crawler.settings.SimpleType;
048:        import org.archive.io.RecordingInputStream;
049:        import org.archive.io.ReplayCharSequence;
050:        import org.archive.net.ClientFTP;
051:        import org.archive.net.FTPException;
052:        import org.archive.net.UURI;
053:        import org.archive.util.ArchiveUtils;
054:        import org.archive.util.HttpRecorder;
055:
056:        /**
057:         * Fetches documents and directory listings using FTP.  This class will also
058:         * try to extract FTP "links" from directory listings.  For this class to
059:         * archive a directory listing, the remote FTP server must support the NLIST
060:         * command.  Most modern FTP servers should.
061:         * 
062:         * @author pjack
063:         *
064:         */
065:        public class FetchFTP extends Processor implements 
066:                CoreAttributeConstants {
067:
068:            /** Serialization ID; robust against trivial API changes. */
069:            private static final long serialVersionUID = ArchiveUtils
070:                    .classnameBasedUID(FetchFTP.class, 1);
071:
072:            /** Logger for this class. */
073:            private static Logger logger = Logger.getLogger(FetchFTP.class
074:                    .getName());
075:
076:            /** Pattern for matching directory entries. */
077:            private static Pattern DIR = Pattern.compile("(.+)$",
078:                    Pattern.MULTILINE);
079:
080:            /** The name for the <code>username</code> attribute. */
081:            final public static String ATTR_USERNAME = "username";
082:
083:            /** The description for the <code>username</code> attribute. */
084:            final private static String DESC_USERNAME = "The username to send to "
085:                    + "FTP servers.  By convention, the default value of \"anonymous\" is "
086:                    + "used for publicly available FTP sites.";
087:
088:            /** The default value for the <code>username</code> attribute. */
089:            final private static String DEFAULT_USERNAME = "anonymous";
090:
091:            /** The name for the <code>password</code> attribute. */
092:            final public static String ATTR_PASSWORD = "password";
093:
094:            /** The description for the <code>password</code> attribute. */
095:            final private static String DESC_PASSWORD = "The password to send to "
096:                    + "FTP servers.  By convention, anonymous users send their email address "
097:                    + "in this field.";
098:
099:            /** The default value for the <code>password</code> attribute. */
100:            final private static String DEFAULT_PASSWORD = "";
101:
102:            /** The name for the <code>extract-from-dirs</code> attribute. */
103:            final private static String ATTR_EXTRACT = "extract-from-dirs";
104:
105:            /** The description for the <code>extract-from-dirs</code> attribute. */
106:            final private static String DESC_EXTRACT = "Set to true to extract "
107:                    + "further URIs from FTP directories.  Default is true.";
108:
109:            /** The default value for the <code>extract-from-dirs</code> attribute. */
110:            final private static boolean DEFAULT_EXTRACT = true;
111:
112:            /** The name for the <code>extract-parent</code> attribute. */
113:            final private static String ATTR_EXTRACT_PARENT = "extract_parent";
114:
115:            /** The description for the <code>extract-parent</code> attribute. */
116:            final private static String DESC_EXTRACT_PARENT = "Set to true to extract "
117:                    + "the parent URI from all FTP URIs.  Default is true.";
118:
119:            /** The default value for the <code>extract-parent</code> attribute. */
120:            final private static boolean DEFAULT_EXTRACT_PARENT = true;
121:
122:            /** The name for the <code>max-length-bytes</code> attribute. */
123:            final public static String ATTR_MAX_LENGTH = "max-length-bytes";
124:
125:            /** The description for the <code>max-length-bytes</code> attribute. */
126:            final private static String DESC_MAX_LENGTH = "Maximum length in bytes to fetch.\n"
127:                    + "Fetch is truncated at this length. A value of 0 means no limit.";
128:
129:            /** The default value for the <code>max-length-bytes</code> attribute. */
130:            final private static long DEFAULT_MAX_LENGTH = 0;
131:
132:            /** The name for the <code>fetch-bandwidth</code> attribute. */
133:            final public static String ATTR_BANDWIDTH = "fetch-bandwidth";
134:
135:            /** The description for the <code>fetch-bandwidth</code> attribute. */
136:            final private static String DESC_BANDWIDTH = "";
137:
138:            /** The default value for the <code>fetch-bandwidth</code> attribute. */
139:            final private static int DEFAULT_BANDWIDTH = 0;
140:
141:            /** The name for the <code>timeout-seconds</code> attribute. */
142:            final public static String ATTR_TIMEOUT = "timeout-seconds";
143:
144:            /** The description for the <code>timeout-seconds</code> attribute. */
145:            final private static String DESC_TIMEOUT = "If the fetch is not "
146:                    + "completed in this number of seconds, give up (and retry later).";
147:
148:            /** The default value for the <code>timeout-seconds</code> attribute. */
149:            final private static int DEFAULT_TIMEOUT = 1200;
150:
151:            /**
152:             * Constructs a new <code>FetchFTP</code>.
153:             * 
154:             * @param name  the name of this processor
155:             */
156:            public FetchFTP(String name) {
157:                super (name, "FTP Fetcher.");
158:                add(ATTR_USERNAME, DESC_USERNAME, DEFAULT_USERNAME);
159:                add(ATTR_PASSWORD, DESC_PASSWORD, DEFAULT_PASSWORD);
160:                add(ATTR_EXTRACT, DESC_EXTRACT, DEFAULT_EXTRACT);
161:                add(ATTR_EXTRACT_PARENT, DESC_EXTRACT_PARENT,
162:                        DEFAULT_EXTRACT_PARENT);
163:                add(ATTR_MAX_LENGTH, DESC_MAX_LENGTH, DEFAULT_MAX_LENGTH);
164:                add(ATTR_BANDWIDTH, DESC_BANDWIDTH, DEFAULT_BANDWIDTH);
165:                add(ATTR_TIMEOUT, DESC_TIMEOUT, DEFAULT_TIMEOUT);
166:            }
167:
168:            /**
169:             * Convenience method for adding an attribute.
170:             * 
171:             * @param name   The name of the attribute
172:             * @param desc   The description of the attribute
173:             * @param def    The default value for the attribute
174:             */
175:            private void add(String name, String desc, Object def) {
176:                SimpleType st = new SimpleType(name, desc, def);
177:                addElementToDefinition(st);
178:            }
179:
180:            /**
181:             * Convenience method for extracting an attribute.
182:             * If a value for the specified name cannot be found,
183:             * a warning is written to the log and the specified
184:             * default value is returned instead.
185:             * 
186:             * @param context  The context for the attribute fetch
187:             * @param name     The name of the attribute to fetch
188:             * @param def      The value to return if the attribute isn't found
189:             * @return         The value of that attribute
190:             */
191:            private Object get(Object context, String name, Object def) {
192:                try {
193:                    return getAttribute(context, name);
194:                } catch (AttributeNotFoundException e) {
195:                    logger.warning("Attribute not found (using default): "
196:                            + name);
197:                    return def;
198:                }
199:            }
200:
201:            /**
202:             * Processes the given URI.  If the given URI is not an FTP URI, then
203:             * this method does nothing.  Otherwise an attempt is made to connect
204:             * to the FTP server.
205:             * 
206:             * <p>If the connection is successful, an attempt will be made to CD to 
207:             * the path specified in the URI.  If the remote CD command succeeds, 
208:             * then it is assumed that the URI represents a directory.  If the
209:             * CD command fails, then it is assumed that the URI represents
210:             * a file.
211:             * 
212:             * <p>For directories, the directory listing will be fetched using
213:             * the FTP LIST command, and saved to the HttpRecorder.  If the
214:             * <code>extract.from.dirs</code> attribute is set to true, then
215:             * the files in the fetched list will be added to the curi as
216:             * extracted FTP links.  (It was easier to do that here, rather
217:             * than writing a separate FTPExtractor.)
218:             * 
219:             * <p>For files, the file will be fetched using the FTP RETR
220:             * command, and saved to the HttpRecorder.
221:             * 
222:             * <p>All file transfers (including directory listings) occur using
223:             * Binary mode transfer.  Also, the local passive transfer mode
224:             * is always used, to play well with firewalls.
225:             * 
226:             * @param curi  the curi to process
227:             * @throws InterruptedException  if the thread is interrupted during
228:             *   processing
229:             */
230:            public void innerProcess(CrawlURI curi) throws InterruptedException {
231:                if (!curi.getUURI().getScheme().equals("ftp")) {
232:                    return;
233:                }
234:
235:                curi.putLong(A_FETCH_BEGAN_TIME, System.currentTimeMillis());
236:                HttpRecorder recorder = HttpRecorder.getHttpRecorder();
237:                ClientFTP client = new ClientFTP();
238:
239:                try {
240:                    fetch(curi, client, recorder);
241:                } catch (FTPException e) {
242:                    logger.log(Level.SEVERE, "FTP server reported problem.", e);
243:                    curi.setFetchStatus(e.getReplyCode());
244:                } catch (IOException e) {
245:                    logger.log(Level.SEVERE, "IO Error during FTP fetch.", e);
246:                    curi.setFetchStatus(FetchStatusCodes.S_CONNECT_LOST);
247:                } finally {
248:                    disconnect(client);
249:                    curi.setContentSize(recorder.getRecordedInput().getSize());
250:                    curi.putLong(A_FETCH_COMPLETED_TIME, System
251:                            .currentTimeMillis());
252:                }
253:            }
254:
255:            /**
256:             * Fetches a document from an FTP server.
257:             * 
258:             * @param curi      the URI of the document to fetch
259:             * @param client    the FTPClient to use for the fetch
260:             * @param recorder  the recorder to preserve the document in
261:             * @throws IOException  if a network or protocol error occurs
262:             * @throws InterruptedException  if the thread is interrupted
263:             */
264:            private void fetch(CrawlURI curi, ClientFTP client,
265:                    HttpRecorder recorder) throws IOException,
266:                    InterruptedException {
267:                // Connect to the FTP server.
268:                UURI uuri = curi.getUURI();
269:                int port = uuri.getPort();
270:                if (port == -1) {
271:                    port = 21;
272:                }
273:                client.connectStrict(uuri.getHost(), port);
274:
275:                // Authenticate.
276:                String[] auth = getAuth(curi);
277:                client.loginStrict(auth[0], auth[1]);
278:
279:                // The given resource may or may not be a directory.
280:                // To figure out which is which, execute a CD command to
281:                // the UURI's path.  If CD works, it's a directory.
282:                boolean dir = client.changeWorkingDirectory(uuri.getPath());
283:                if (dir) {
284:                    curi.setContentType("text/plain");
285:                }
286:
287:                // TODO: A future version of this class could use the system string to
288:                // set up custom directory parsing if the FTP server doesn't support 
289:                // the nlist command.
290:                if (logger.isLoggable(Level.FINE)) {
291:                    String system = client.getSystemName();
292:                    logger.fine(system);
293:                }
294:
295:                // Get a data socket.  This will either be the result of a NLIST
296:                // command for a directory, or a RETR command for a file.
297:                int command = dir ? FTPCommand.NLST : FTPCommand.RETR;
298:                String path = dir ? "." : uuri.getPath();
299:                client.enterLocalPassiveMode();
300:                client.setBinary();
301:                Socket socket = client.openDataConnection(command, path);
302:                curi.setFetchStatus(client.getReplyCode());
303:
304:                // Save the streams in the CURI, where downstream processors
305:                // expect to find them.
306:                try {
307:                    saveToRecorder(curi, socket, recorder);
308:                } finally {
309:                    recorder.close();
310:                    close(socket);
311:                }
312:
313:                curi.setFetchStatus(200);
314:                if (dir) {
315:                    extract(curi, recorder);
316:                }
317:                addParent(curi);
318:            }
319:
320:            /**
321:             * Saves the given socket to the given recorder.
322:             * 
323:             * @param curi      the curi that owns the recorder
324:             * @param socket    the socket whose streams to save
325:             * @param recorder  the recorder to save them to
326:             * @throws IOException  if a network or file error occurs
327:             * @throws InterruptedException  if the thread is interrupted
328:             */
329:            private void saveToRecorder(CrawlURI curi, Socket socket,
330:                    HttpRecorder recorder) throws IOException,
331:                    InterruptedException {
332:                curi.setHttpRecorder(recorder);
333:                recorder.markContentBegin();
334:                recorder.inputWrap(socket.getInputStream());
335:                recorder.outputWrap(socket.getOutputStream());
336:
337:                // Read the remote file/dir listing in its entirety.
338:                long softMax = 0;
339:                long hardMax = getMaxLength(curi);
340:                long timeout = (long) getTimeout(curi) * 1000;
341:                int maxRate = getFetchBandwidth(curi);
342:                RecordingInputStream input = recorder.getRecordedInput();
343:                input.setLimits(hardMax, timeout, maxRate);
344:                input.readFullyOrUntil(softMax);
345:            }
346:
347:            /**
348:             * Extract FTP links in a directory listing.
349:             * The listing must already be saved to the given recorder.
350:             * 
351:             * @param curi      The curi to save extracted links to
352:             * @param recorder  The recorder containing the directory listing
353:             */
354:            private void extract(CrawlURI curi, HttpRecorder recorder) {
355:                if (!getExtractFromDirs(curi)) {
356:                    return;
357:                }
358:
359:                ReplayCharSequence seq = null;
360:                try {
361:                    seq = recorder.getReplayCharSequence();
362:                    extract(curi, seq);
363:                } catch (IOException e) {
364:                    logger.log(Level.SEVERE, "IO error during extraction.", e);
365:                } catch (RuntimeException e) {
366:                    logger.log(Level.SEVERE, "IO error during extraction.", e);
367:                } finally {
368:                    close(seq);
369:                }
370:            }
371:
372:            /**
373:             * Extracts FTP links in a directory listing.
374:             * 
375:             * @param curi  The curi to save extracted links to
376:             * @param dir   The directory listing to extract links from
377:             * @throws URIException  if an extracted link is invalid
378:             */
379:            private void extract(CrawlURI curi, ReplayCharSequence dir) {
380:                logger.log(Level.FINEST, "Extracting URIs from FTP directory.");
381:                Matcher matcher = DIR.matcher(dir);
382:                while (matcher.find()) {
383:                    String file = matcher.group(1);
384:                    addExtracted(curi, file);
385:                }
386:            }
387:
388:            /**
389:             * Adds an extracted filename to the curi.  A new URI will be formed
390:             * by taking the given curi (which should represent the directory the
391:             * file lives in) and appending the file.
392:             * 
393:             * @param curi  the curi to store the discovered link in
394:             * @param file  the filename of the discovered link
395:             */
396:            private void addExtracted(CrawlURI curi, String file) {
397:                try {
398:                    file = URLEncoder.encode(file, "UTF-8");
399:                } catch (UnsupportedEncodingException e) {
400:                    throw new AssertionError(e);
401:                }
402:                if (logger.isLoggable(Level.FINEST)) {
403:                    logger.log(Level.FINEST, "Found " + file);
404:                }
405:                String base = curi.toString();
406:                if (base.endsWith("/")) {
407:                    base = base.substring(0, base.length() - 1);
408:                }
409:                try {
410:                    UURI n = new UURI(base + "/" + file, true);
411:                    Link link = new Link(curi.getUURI(), n, NAVLINK_MISC,
412:                            NAVLINK_HOP);
413:                    curi.addOutLink(link);
414:                } catch (URIException e) {
415:                    logger
416:                            .log(Level.WARNING, "URI error during extraction.",
417:                                    e);
418:                }
419:            }
420:
421:            /**
422:             * Extracts the parent URI from the given curi, then adds that parent
423:             * URI as a discovered link to the curi. 
424:             * 
425:             * <p>If the <code>extract-parent</code> attribute is false, then this
426:             * method does nothing.  Also, if the path of the given curi is 
427:             * <code>/</code>, then this method does nothing.
428:             * 
429:             * <p>Otherwise the parent is determined by eliminated the lowest part
430:             * of the URI's path.  Eg, the parent of <code>ftp://foo.com/one/two</code>
431:             * is <code>ftp://foo.com/one</code>.
432:             * 
433:             * @param curi  the curi whose parent to add
434:             */
435:            private void addParent(CrawlURI curi) {
436:                if (!getExtractParent(curi)) {
437:                    return;
438:                }
439:                UURI uuri = curi.getUURI();
440:                try {
441:                    if (uuri.getPath().equals("/")) {
442:                        // There's no parent to add.
443:                        return;
444:                    }
445:                    String scheme = uuri.getScheme();
446:                    String auth = uuri.getEscapedAuthority();
447:                    String path = uuri.getEscapedCurrentHierPath();
448:                    UURI parent = new UURI(scheme + "://" + auth + path, false);
449:
450:                    Link link = new Link(uuri, parent, NAVLINK_MISC,
451:                            NAVLINK_HOP);
452:                    curi.addOutLink(link);
453:                } catch (URIException e) {
454:                    logger
455:                            .log(Level.WARNING, "URI error during extraction.",
456:                                    e);
457:                }
458:            }
459:
460:            /**
461:             * Returns the <code>extract.from.dirs</code> attribute for this
462:             * <code>FetchFTP</code> and the given curi.
463:             * 
464:             * @param curi  the curi whose attribute to return
465:             * @return  that curi's <code>extract.from.dirs</code>
466:             */
467:            public boolean getExtractFromDirs(CrawlURI curi) {
468:                return (Boolean) get(curi, ATTR_EXTRACT, DEFAULT_EXTRACT);
469:            }
470:
471:            /**
472:             * Returns the <code>extract.parent</code> attribute for this
473:             * <code>FetchFTP</code> and the given curi.
474:             * 
475:             * @param curi  the curi whose attribute to return
476:             * @return  that curi's <code>extract-parent</code>
477:             */
478:            public boolean getExtractParent(CrawlURI curi) {
479:                return (Boolean) get(curi, ATTR_EXTRACT_PARENT,
480:                        DEFAULT_EXTRACT_PARENT);
481:            }
482:
483:            /**
484:             * Returns the <code>timeout-seconds</code> attribute for this
485:             * <code>FetchFTP</code> and the given curi.
486:             * 
487:             * @param curi   the curi whose attribute to return
488:             * @return   that curi's <code>timeout-seconds</code>
489:             */
490:            public int getTimeout(CrawlURI curi) {
491:                return (Integer) get(curi, ATTR_TIMEOUT, DEFAULT_TIMEOUT);
492:            }
493:
494:            /**
495:             * Returns the <code>max-length-bytes</code> attribute for this
496:             * <code>FetchFTP</code> and the given curi.
497:             * 
498:             * @param curi  the curi whose attribute to return
499:             * @return  that curi's <code>max-length-bytes</code>
500:             */
501:            public long getMaxLength(CrawlURI curi) {
502:                return (Long) get(curi, ATTR_MAX_LENGTH, DEFAULT_MAX_LENGTH);
503:            }
504:
505:            /**
506:             * Returns the <code>fetch-bandwidth</code> attribute for this
507:             * <code>FetchFTP</code> and the given curi.
508:             * 
509:             * @param curi  the curi whose attribute to return
510:             * @return  that curi's <code>fetch-bandwidth</code>
511:             */
512:            public int getFetchBandwidth(CrawlURI curi) {
513:                return (Integer) get(curi, ATTR_BANDWIDTH, DEFAULT_BANDWIDTH);
514:            }
515:
516:            /**
517:             * Returns the username and password for the given URI.  This method
518:             * always returns an array of length 2.  The first element in the returned
519:             * array is the username for the URI, and the second element is the
520:             * password.
521:             * 
522:             * <p>If the URI itself contains the username and password (i.e., it looks
523:             * like <code>ftp://username:password@host/path</code>) then that username
524:             * and password are returned.
525:             * 
526:             * <p>Otherwise the settings system is probed for the <code>username</code>
527:             * and <code>password</code> attributes for this <code>FTPFetch</code>
528:             * and the given <code>curi</code> context.  The values of those 
529:             * attributes are then returned.
530:             * 
531:             * @param curi  the curi whose username and password to return
532:             * @return  an array containing the username and password
533:             */
534:            private String[] getAuth(CrawlURI curi) {
535:                String[] result = new String[2];
536:                UURI uuri = curi.getUURI();
537:                String userinfo;
538:                try {
539:                    userinfo = uuri.getUserinfo();
540:                } catch (URIException e) {
541:                    assert false;
542:                    logger.finest("getUserinfo raised URIException.");
543:                    userinfo = null;
544:                }
545:                if (userinfo != null) {
546:                    int p = userinfo.indexOf(':');
547:                    if (p > 0) {
548:                        result[0] = userinfo.substring(0, p);
549:                        result[1] = userinfo.substring(p + 1);
550:                        return result;
551:                    }
552:                }
553:                result[0] = (String) get(curi, ATTR_USERNAME, DEFAULT_USERNAME);
554:                result[1] = (String) get(curi, ATTR_PASSWORD, DEFAULT_PASSWORD);
555:                return result;
556:            }
557:
558:            /**
559:             * Determines the password for the given URI.  If the URI itself contains
560:             * a password, then that password is returned.  Otherwise the settings
561:             * system is probed for the <code>password</code> attribute, and the value
562:             * for that attribute is returned.
563:             * 
564:             * @param curi  the curi whose password to return
565:             * @return  that password
566:             */
567:            public String determinePassword(CrawlURI curi) {
568:                return (String) get(curi, ATTR_PASSWORD, DEFAULT_PASSWORD);
569:            }
570:
571:            /**
572:             * Quietly closes the given socket.
573:             * 
574:             * @param socket  the socket to close
575:             */
576:            private static void close(Socket socket) {
577:                try {
578:                    socket.close();
579:                } catch (IOException e) {
580:                    logger.log(Level.WARNING, "IO error closing socket.", e);
581:                }
582:            }
583:
584:            /**
585:             * Quietly closes the given sequence.
586:             * If an IOException is raised, this method logs it as a warning.
587:             * 
588:             * @param seq  the sequence to close
589:             */
590:            private static void close(ReplayCharSequence seq) {
591:                if (seq == null) {
592:                    return;
593:                }
594:                try {
595:                    seq.close();
596:                } catch (IOException e) {
597:                    logger.log(Level.WARNING,
598:                            "IO error closing ReplayCharSequence.", e);
599:                }
600:            }
601:
602:            /**
603:             * Quietly disconnects from the given FTP client.
604:             * If an IOException is raised, this method logs it as a warning.
605:             * 
606:             * @param client  the client to disconnect
607:             */
608:            private static void disconnect(ClientFTP client) {
609:                if (client.isConnected())
610:                    try {
611:                        client.disconnect();
612:                    } catch (IOException e) {
613:                        if (logger.isLoggable(Level.WARNING)) {
614:                            logger
615:                                    .warning("Could not disconnect from FTP client: "
616:                                            + e.getMessage());
617:                        }
618:                    }
619:            }
620:
621:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.