MiniCrawler: A skeletal Web crawler. : Mini Crawler « Network « C# / CSharp Tutorial

Home
C# / CSharp Tutorial
1.Language Basics
2.Data Type
3.Operator
4.Statement
5.String
6.struct
7.Class
8.Operator Overload
9.delegate
10.Attribute
11.Data Structure
12.Assembly
13.Date Time
14.Development
15.File Directory Stream
16.Preprocessing Directives
17.Regular Expression
18.Generic
19.Reflection
20.Thread
21.I18N Internationalization
22.LINQ
23.GUI Windows Forms
24.Windows Presentation Foundation
25.Windows Communication Foundation
26.Workflow
27.2D
28.Design Patterns
29.Windows
30.XML
31.XML LINQ
32.ADO.Net
33.Network
34.Directory Services
35.Security
36.unsafe
C# / C Sharp
C# / C Sharp by API
C# / CSharp Open Source
C# / CSharp Tutorial » Network » Mini Crawler 
33.29.1.MiniCrawler: A skeletal Web crawler.
//Code revised from
//C# 2.0 The Complete Reference
// 
using System; 
using System.Net; 
using System.IO; 
 
class MiniCrawler {  
 
  static string FindLink(string htmlstr, ref int startloc) { 
    int i; 
    int start, end
    string uri = null
    string lowcasestr = htmlstr.ToLower()
 
    i = lowcasestr.IndexOf("href=\"http", startloc)
    if(i != -1) { 
      start = htmlstr.IndexOf('"', i1
      end = htmlstr.IndexOf('"', start)
      uri = htmlstr.Substring(start, end-start)
      startloc = end
    
             
    return uri; 
  
 
  public static void Main(string[] args) { 
    string link = null
    string str; 
    string answer; 
 
    int curloc; // holds current location in response 
 
    if(args.Length != 1) { 
      Console.WriteLine("Usage: MiniCrawler <uri>")
      return 
    
 
    string uristr = args[0]// holds current URI 
 
    try 
 
      do 
        Console.WriteLine("Linking to " + uristr)
 
        // Create a WebRequest to the specified URI. 
        HttpWebRequest req = (HttpWebRequestWebRequest.Create(uristr)
 
        uristr = null// disallow further use of this URI 
 
        // Send that request and return the response. 
        HttpWebResponse resp = (HttpWebResponsereq.GetResponse()
 
        // From the response, obtain an input stream. 
        Stream istrm = resp.GetResponseStream()
 
        // Wrap the input stream in a StreamReader. 
        StreamReader rdr = new StreamReader(istrm)
 
        // Read in the entire page. 
        str = rdr.ReadToEnd()
 
        curloc = 0
        
        do 
          // Find the next URI to link to. 
          link = FindLink(str, ref curloc)
 
          if(link != null) { 
            Console.WriteLine("Link found: " + link)
 
            Console.Write("Link, More, Quit?")
            answer = Console.ReadLine()
 
            if(string.Compare(answer, "L"true== 0) { 
              uristr = string.Copy(link)
              break
            else if(string.Compare(answer, "Q"true== 0) { 
              break
            else if(string.Compare(answer, "M"true== 0) { 
              Console.WriteLine("Searching for another link.")
            
          else 
            Console.WriteLine("No link found.")
            break
          
 
        while(link.Length > 0)
 
        // Close the Response. 
        resp.Close()
      while(uristr != null)
 
    catch(WebException exc) { 
      Console.WriteLine("Network Error: " + exc.Message +  
                        "\nStatus code: " + exc.Status)
    catch(ProtocolViolationException exc) { 
      Console.WriteLine("Protocol Error: " + exc.Message)
    catch(UriFormatException exc) { 
      Console.WriteLine("URI Format Error: " + exc.Message)
    catch(NotSupportedException exc) { 
      Console.WriteLine("Unknown Protocol: " + exc.Message)
    catch(IOException exc) { 
      Console.WriteLine("I/O Error: " + exc.Message)
    
 
    Console.WriteLine("Terminating MiniCrawler.")
  
}
33.29.Mini Crawler
33.29.1.MiniCrawler: A skeletal Web crawler.
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.