Aggie.cs :  » RSS-RDF » Aggie » Bitworking » C# / CSharp Open Source

Home
C# / CSharp Open Source
1.2.6.4 mono .net core
2.2.6.4 mono core
3.Aspect Oriented Frameworks
4.Bloggers
5.Build Systems
6.Business Application
7.Charting Reporting Tools
8.Chat Servers
9.Code Coverage Tools
10.Content Management Systems CMS
11.CRM ERP
12.Database
13.Development
14.Email
15.Forum
16.Game
17.GIS
18.GUI
19.IDEs
20.Installers Generators
21.Inversion of Control Dependency Injection
22.Issue Tracking
23.Logging Tools
24.Message
25.Mobile
26.Network Clients
27.Network Servers
28.Office
29.PDF
30.Persistence Frameworks
31.Portals
32.Profilers
33.Project Management
34.RSS RDF
35.Rule Engines
36.Script
37.Search Engines
38.Sound Audio
39.Source Control
40.SQL Clients
41.Template Engines
42.Testing
43.UML
44.Web Frameworks
45.Web Service
46.Web Testing
47.Wiki Engines
48.Windows Presentation Foundation
49.Workflows
50.XML Parsers
C# / C Sharp
C# / C Sharp by API
C# / CSharp Tutorial
C# / CSharp Open Source » RSS RDF » Aggie 
Aggie » Bitworking » Aggie.cs
/*

  Copyright (c) 2002 Joe Gregorio

  Permission is hereby granted, free of charge, to any person obtaining 
  a copy of this software and associated documentation files (the "Software"), 
  to deal in the Software without restriction, including without limitation 
  the rights to use, copy, modify, merge, publish, distribute, sublicense, 
  and/or sell copies of the Software, and to permit persons to whom the 
  Software is furnished to do so, subject to the following conditions:

  The above copyright notice and this permission notice shall be included in 
  all copies or substantial portions of the Software.

  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

/*
  Credits:

  Feature #569734 HTTP Redirection Header : Simon Fell
  Feature #573937 If-Modified-Since & Friends : Simon Fell
  Bug     #574438 Proxy settings require restart to detect changes : Simon Fell
  Bug#[ 573749 ] Show channel/title in the channel list
  Submitted and fixed by: Eric Vitiello Jr. (presto) aka pixel.
  Bug [ 575053 ] Invalid XSLT causes exception
  Submitted and fixed by: Joe Gregorio
  Bug [ 575134 ] Missing OPML file will crash Aggie
  Submitted and fixed by: Joe Gregorio
  Feature #573714 Put downloaded xml in separate directory : Simon Fell
  Bug #570292 Freeze on Merge 
  Submitted by: Joe Gregorio, Fixed by: Simon Fell
  Feature [ 582438 ] Referer Website Url: Submitted by: Joe Gregorio, Implemented by Ingve Vormestrand.
  Bug [ 582050 ] Crash on missing mySubscriptions v HTTP : Found by Gordon Weakliem, Fixed by: Joe Gregorio
  Bug [ 583697 ] Relative redirects not handled correctly : Found by Ingve Vormestrand, Fixed by: Simon Fell
  Bug [ 596647 ] autodiscovery fails with relative URL : Found by cobra libre (jacobito), Fixed by: Simon Fell
 */
namespace Bitworking{
  
  using System;
  using System.Threading;
  using System.Collections;
  using System.Collections.Specialized;
  using System.Xml;
  using System.Xml.Xsl;
  using System.Xml.XPath;
  using System.Xml.Serialization;
  using System.Net;
  using System.IO;
  using System.Text;
  using System.Text.RegularExpressions;
  using Bitworking.Smtp;


  public class VersionInfo {
    public static string UserAgent { get { return "Aggie 1.0 Release Candidate 5 - http://bitworking.org"; } }
  }
  
  // =========================================================
  //                        Aggie
  // =========================================================
  public class Aggie {
    public event AggieSiteEvent SiteMerge ;
          
    private AggieConfig config_;
    private ArrayList rssSinks_;

    private void InitProxy() {
      if (config_.proxy_.Trim() != "") {
        GlobalProxySelection.Select = new WebProxy(config_.proxy_);
      } else {
        GlobalProxySelection.Select = GlobalProxySelection.GetEmptyWebProxy();
      }
    }    

    private Aggie() {
      siteList_ = new ArrayList();
      isAmphetadeskFormat_ = true;
      InitProxy();
      AggieRequest.WeblogUrl = config_.weblogUrl_;
      InitErrorMessage_ = "";
      InitRssSinks();
    }

    private Aggie(AggieConfig config) {
      config_ = config;
      siteList_ = new ArrayList();
      isAmphetadeskFormat_ = true;
      InitProxy();
      AggieRequest.WeblogUrl = config_.weblogUrl_;
      InitErrorMessage_ = "";
      InitRssSinks();
    }

    private void InitRssSinks() {
      rssSinks_ = new ArrayList();

      // Hardcoded for now. Need a mechanism to auto-load 
      // new RssSinks in the future.
      rssSinks_.Add(new RssSmtpSink());
    }
    

    private string InitErrorMessage_;
    public string InitErrorMessage {
      get { return InitErrorMessage_; }
    }
  
    public string siteListFileName {
      get { return config_.siteListFileName_; }
      set {
        config_.siteListFileName_ = value;
      }
    }

    public string proxy {
      get { return config_.proxy_; }
      set {
        config_.proxy_ = value;
        InitProxy();
      }
    }

    public string skin {
      get { return config_.skin_; }
      set {
        config_.skin_ = value;
      }
    }

    public string weblogUrl {
      get { return config_.weblogUrl_; }
      set { 
        config_.weblogUrl_ = value;
        AggieRequest.WeblogUrl = value;
      }
    }

    public int MaxThreads {
      get { return config_.maxThreads_; }
      set {
        config_.maxThreads_ = value;
      }
    }

    public SmtpConfigInfo SmtpConfigInfo {
      get { return config_.smtpConfig_; }
    }

    private bool isAmphetadeskFormat_;  // We do not write Radio format, only read it.
    public bool IsReadOnlySiteList() {
      // Look for http:// at beginning of name
      Regex r = new Regex("^http://", RegexOptions.IgnoreCase);
      return !isAmphetadeskFormat_ || r.IsMatch(config_.siteListFileName_);
    }

    public bool showOldItems {
      get  { return config_.showOldItems_; }
      set  { config_.showOldItems_ = value; }      
    }
    
    

    private ArrayList siteList_;
    public ArrayList siteList {
      get { return siteList_; }
    }

    public bool duplicateSiteURL(string url) {
      bool returnValue = false;
      string loweredUrl = url.ToLower();
      foreach (Site s in siteList_) {
        if (s.url.ToLower() == loweredUrl) {
          returnValue = true;
          break;
        }
      }      
      return returnValue;
    }    

    public void Add(ref Site s)  {
      if (!duplicateSiteURL(s.url)) {
        siteList_.Add(s);
      }
    }

    public void Remove(Site s)  {
      siteList_.Remove(s);
    }    

    private static Aggie CreateFromInitFileImpl(ref AggieConfig config) {
      Aggie returnValue;
      try {  
        returnValue = new Aggie(config);
        returnValue.Init();
      }
      catch (Exception) {
        returnValue = null;
      }
      return returnValue;
    }

    public static Aggie CreateFromInitFile() {
      AggieConfig config = AggieConfig.Create();
      Aggie returnValue = CreateFromInitFileImpl(ref config);
      if (null == returnValue) {
        string oldSiteListFileName = config.siteListFileName_;
        config.RevertToDefaultChannelList();

        returnValue = CreateFromInitFileImpl(ref config);
        returnValue.InitErrorMessage_ = "Corrupt or missing channel list: " + oldSiteListFileName + ". Reverting to default channel list.";
      }
      
      // Initialize the RSS parser's static tables from external file
      string parserFile = "RssExtractors.xml";
      if ( !File.Exists( parserFile ) ) {
        File.Copy( parserFile + ".orig", parserFile );
      }
      try {
        RssDocument.Init( parserFile );
      }
      catch ( Exception e ) {
        // TODO: What do we do here?
        throw new Exception( "Corrupt or missing parser configuration file " + parserFile, e );
        //HelpfulException he = new HelpfulException( e );
        //MessageBox.Show("RSS parser failed to initialize from configuration file " + parserFile
        //  + ":\r\n\r\n" + he.Message, "Unable to initialize RSS parser.", MessageBoxButtons.OK, MessageBoxIcon.Exclamation);
      }
      return returnValue;
    }

    public void WriteInitFile() {
      config_.Save();
    }

    // Read in the list of all the sites to scan.
    public bool Init() {    
      bool returnValue = false;
      XmlNodeReader reader = null;

      try {
        GenericRequestCreator.Register( "xslt:", typeof( XsltRequest ) );
        GenericRequestCreator.Register( "RssHarvest:", typeof( RssHarvestRequest ) );
        GenericRequestCreator.Register( "RssDistill:", typeof( RssHarvestRequest ) );
      }
      catch ( Exception e ) {
        // Failure of a protocol handler is not fatal, at least not now
        System.Diagnostics.Debug.WriteLine( "Unable to register protocol handler(s).\r\n"
          + e.Message );
      }

      siteList_.Clear();
      if (config_.siteListFileName_ == "myChannels.opml" && !File.Exists("myChannels.opml")) {
        File.Copy("myChannels.opml.orig", "myChannels.opml");
      }

      XmlDocument doc = new XmlDocument();
      Regex r = new Regex("^http://", RegexOptions.IgnoreCase);
      if (r.IsMatch(config_.siteListFileName_)) {

        WebRequest wreq = AggieRequest.CreateRequest(config_.siteListFileName_);
        WebResponse resp = wreq.GetResponse();

        doc.Load(resp.GetResponseStream());
        
      } else {
        if (!File.Exists(config_.siteListFileName_)) {
          InitErrorMessage_ = "Corrupt or missing channel list: " + config_.siteListFileName_ + ". Reverting to default channel list.";
          config_.siteListFileName_ = "myChannels.opml";
        }
        if (!File.Exists("myChannels.opml")) {
          File.Copy("myChannels.opml.orig", "myChannels.opml");
        }

        doc.Load(config_.siteListFileName_);
      }
        
      reader = new XmlNodeReader(doc);

      while (reader.Read()) {
        if (reader.NodeType ==  XmlNodeType.Element) {
          if (reader.LocalName == "outline") {
            Site s = new Site();
            s.ReadFromOPML(ref reader);
            siteList_.Add(s);
          }
        }
      }       
      if (siteList_.Count > 0 && !((Site)siteList_[0]).isAmphetaDeskFormat) {
        isAmphetadeskFormat_ = false;
      } else {
        isAmphetadeskFormat_ = true;
      }
        
      returnValue = true;
      if (reader != null) {
        reader.Close();
      }
      return returnValue;
    }

    public void Go() {
      foreach (Site s in siteList_) {
        s.Go();
      }
    }
  
    public void Merge() {
      File.Delete("aggregated.xml");
      XmlTextWriter writer = new XmlTextWriter("aggregated.xml", new UTF8Encoding());
      writer.WriteStartDocument();
      string subset = @"
 <!ELEMENT aggregate (site*) >
 <!ELEMENT site ((channel, item*)?) >
 <!ATTLIST site read (true|false) ""false"">
 <!ELEMENT channel (title, link, description, base?, errors?, comments?) >
 <!ELEMENT item (title, link, description) >
 <!ELEMENT title (#PCDATA) >
 <!ELEMENT link (#PCDATA) >
 <!ELEMENT description (#PCDATA) >
 <!ELEMENT base (#PCDATA) >
 <!ELEMENT errors (#PCDATA) >
 <!ELEMENT comments (strict?, loose*) >
 <!ELEMENT strict (#PCDATA) >
 <!ELEMENT loose (#PCDATA) >
";
      writer.WriteDocType("aggregate", null, null, subset);
      writer.Formatting = Formatting.Indented;
    
      try {
        writer.WriteStartElement("aggregate");
        siteList_.Sort();

        foreach (Site site in siteList_) {
          if (site.status == Bitworking.Site.State.Succeeded) {
            try {
              if ( SiteMerge != null ) { // fire event
                SiteMerge ( this, new SiteEventArgs ( site ) ) ;
              }
              site.ParseIntoAggregateForm(ref writer, config_.showOldItems_, true);
            }
            catch (System.Exception e) {
              System.Diagnostics.Debug.WriteLine( "While parsing site " + site.filename + "..." );
              System.Diagnostics.Debug.WriteLine( "Caught unknown exception (" + e.Message 
                + ")" );
              System.Diagnostics.Debug.Write( e.StackTrace );
            }            
          } else {
            site.WriteSiteError(ref writer);
          }
        }
      }
      finally {
        writer.WriteEndElement();
        writer.WriteEndDocument();    
        writer.Close();
      }  
    }



    public void Transform(bool freshFeeds) {
      XslTransform xslt = new XslTransform();
      xslt.Load("templates/" + config_.skin_ + "/skin.xsl");
      StreamWriter ow  = null;
      try {
        XPathDocument xpathdocument = new XPathDocument("aggregated.xml");

        File.Delete("Aggie.html");
        ow = new StreamWriter(File.OpenWrite("Aggie.html"));
        xslt.Transform(xpathdocument, null, ow);  
      }
      finally {
        if (null != ow) {
          ow.Close();
        }
      }
      foreach (IRssSink sink in rssSinks_) {
        sink.NewAggregatedXml(freshFeeds);
      }
      
    }

    public void WriteToOPML() {
      // !!!! Once FTP is implemented we need to detect if we have a URI here for ftp.
      // If so create a local with the same name, write it, then upload it.

      // If the file name begins with http: don't do anything.
      if (!IsReadOnlySiteList()) {
        File.Delete(config_.siteListFileName_);
        XmlTextWriter writer = new XmlTextWriter(config_.siteListFileName_, new UTF8Encoding());
        writer.Formatting = Formatting.Indented;
    
        writer.WriteStartDocument();
        writer.WriteStartElement("opml");
        writer.WriteStartElement("body");

        foreach (Site s in siteList_) {
          s.WriteToOPML(writer);
        }
      
        writer.WriteEndElement();
        writer.WriteEndElement();
        writer.WriteEndDocument();

        writer.Close();
      }
      
      httpCacheManager.Current.Save() ;
    }
  }
  

  public  class AggieRequest {
    private const int defaultTimeout_ = 60000;
    private const string aggieBase_ = @"http://bitworking.org/AggieReferrers.html";
    private static string referer_ = aggieBase_;
    private static readonly string userAgent_ = VersionInfo.UserAgent
      + " (" + Environment.OSVersion.ToString() + "; .NET CLR " + Environment.Version.ToString() + ") " + referer_;
    
    public static string WeblogUrl {
      set { 
        referer_ = aggieBase_;
        if (null != value && String.Empty != value) {
          referer_ = aggieBase_ + @"?userWeblog=" + value; 
        }
      }
    }

    public static WebRequest CreateRequest(string url) {
      WebRequest req = WebRequest.Create(url);
      req.Timeout = defaultTimeout_;

      HttpWebRequest wreq = req as HttpWebRequest;
      if (null != wreq) {
        wreq.UserAgent = userAgent_;
      }
      return req;
    }      
  }
}



www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.