/*
Copyright (c) 2002 Joe Gregorio
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/*
Credits:
Feature #569734 HTTP Redirection Header : Simon Fell
Feature #573937 If-Modified-Since & Friends : Simon Fell
Bug #574438 Proxy settings require restart to detect changes : Simon Fell
Bug#[ 573749 ] Show channel/title in the channel list
Submitted and fixed by: Eric Vitiello Jr. (presto) aka pixel.
Bug [ 575053 ] Invalid XSLT causes exception
Submitted and fixed by: Joe Gregorio
Bug [ 575134 ] Missing OPML file will crash Aggie
Submitted and fixed by: Joe Gregorio
Feature #573714 Put downloaded xml in separate directory : Simon Fell
Bug #570292 Freeze on Merge
Submitted by: Joe Gregorio, Fixed by: Simon Fell
Feature [ 582438 ] Referer Website Url: Submitted by: Joe Gregorio, Implemented by Ingve Vormestrand.
Bug [ 582050 ] Crash on missing mySubscriptions v HTTP : Found by Gordon Weakliem, Fixed by: Joe Gregorio
Bug [ 583697 ] Relative redirects not handled correctly : Found by Ingve Vormestrand, Fixed by: Simon Fell
Bug [ 596647 ] autodiscovery fails with relative URL : Found by cobra libre (jacobito), Fixed by: Simon Fell
*/
namespace Bitworking{
using System;
using System.Threading;
using System.Collections;
using System.Collections.Specialized;
using System.Xml;
using System.Xml.Xsl;
using System.Xml.XPath;
using System.Xml.Serialization;
using System.Net;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;
using Bitworking.Smtp;
public class VersionInfo {
public static string UserAgent { get { return "Aggie 1.0 Release Candidate 5 - http://bitworking.org"; } }
}
// =========================================================
// Aggie
// =========================================================
public class Aggie {
public event AggieSiteEvent SiteMerge ;
private AggieConfig config_;
private ArrayList rssSinks_;
private void InitProxy() {
if (config_.proxy_.Trim() != "") {
GlobalProxySelection.Select = new WebProxy(config_.proxy_);
} else {
GlobalProxySelection.Select = GlobalProxySelection.GetEmptyWebProxy();
}
}
private Aggie() {
siteList_ = new ArrayList();
isAmphetadeskFormat_ = true;
InitProxy();
AggieRequest.WeblogUrl = config_.weblogUrl_;
InitErrorMessage_ = "";
InitRssSinks();
}
private Aggie(AggieConfig config) {
config_ = config;
siteList_ = new ArrayList();
isAmphetadeskFormat_ = true;
InitProxy();
AggieRequest.WeblogUrl = config_.weblogUrl_;
InitErrorMessage_ = "";
InitRssSinks();
}
private void InitRssSinks() {
rssSinks_ = new ArrayList();
// Hardcoded for now. Need a mechanism to auto-load
// new RssSinks in the future.
rssSinks_.Add(new RssSmtpSink());
}
private string InitErrorMessage_;
public string InitErrorMessage {
get { return InitErrorMessage_; }
}
public string siteListFileName {
get { return config_.siteListFileName_; }
set {
config_.siteListFileName_ = value;
}
}
public string proxy {
get { return config_.proxy_; }
set {
config_.proxy_ = value;
InitProxy();
}
}
public string skin {
get { return config_.skin_; }
set {
config_.skin_ = value;
}
}
public string weblogUrl {
get { return config_.weblogUrl_; }
set {
config_.weblogUrl_ = value;
AggieRequest.WeblogUrl = value;
}
}
public int MaxThreads {
get { return config_.maxThreads_; }
set {
config_.maxThreads_ = value;
}
}
public SmtpConfigInfo SmtpConfigInfo {
get { return config_.smtpConfig_; }
}
private bool isAmphetadeskFormat_; // We do not write Radio format, only read it.
public bool IsReadOnlySiteList() {
// Look for http:// at beginning of name
Regex r = new Regex("^http://", RegexOptions.IgnoreCase);
return !isAmphetadeskFormat_ || r.IsMatch(config_.siteListFileName_);
}
public bool showOldItems {
get { return config_.showOldItems_; }
set { config_.showOldItems_ = value; }
}
private ArrayList siteList_;
public ArrayList siteList {
get { return siteList_; }
}
public bool duplicateSiteURL(string url) {
bool returnValue = false;
string loweredUrl = url.ToLower();
foreach (Site s in siteList_) {
if (s.url.ToLower() == loweredUrl) {
returnValue = true;
break;
}
}
return returnValue;
}
public void Add(ref Site s) {
if (!duplicateSiteURL(s.url)) {
siteList_.Add(s);
}
}
public void Remove(Site s) {
siteList_.Remove(s);
}
private static Aggie CreateFromInitFileImpl(ref AggieConfig config) {
Aggie returnValue;
try {
returnValue = new Aggie(config);
returnValue.Init();
}
catch (Exception) {
returnValue = null;
}
return returnValue;
}
public static Aggie CreateFromInitFile() {
AggieConfig config = AggieConfig.Create();
Aggie returnValue = CreateFromInitFileImpl(ref config);
if (null == returnValue) {
string oldSiteListFileName = config.siteListFileName_;
config.RevertToDefaultChannelList();
returnValue = CreateFromInitFileImpl(ref config);
returnValue.InitErrorMessage_ = "Corrupt or missing channel list: " + oldSiteListFileName + ". Reverting to default channel list.";
}
// Initialize the RSS parser's static tables from external file
string parserFile = "RssExtractors.xml";
if ( !File.Exists( parserFile ) ) {
File.Copy( parserFile + ".orig", parserFile );
}
try {
RssDocument.Init( parserFile );
}
catch ( Exception e ) {
// TODO: What do we do here?
throw new Exception( "Corrupt or missing parser configuration file " + parserFile, e );
//HelpfulException he = new HelpfulException( e );
//MessageBox.Show("RSS parser failed to initialize from configuration file " + parserFile
// + ":\r\n\r\n" + he.Message, "Unable to initialize RSS parser.", MessageBoxButtons.OK, MessageBoxIcon.Exclamation);
}
return returnValue;
}
public void WriteInitFile() {
config_.Save();
}
// Read in the list of all the sites to scan.
public bool Init() {
bool returnValue = false;
XmlNodeReader reader = null;
try {
GenericRequestCreator.Register( "xslt:", typeof( XsltRequest ) );
GenericRequestCreator.Register( "RssHarvest:", typeof( RssHarvestRequest ) );
GenericRequestCreator.Register( "RssDistill:", typeof( RssHarvestRequest ) );
}
catch ( Exception e ) {
// Failure of a protocol handler is not fatal, at least not now
System.Diagnostics.Debug.WriteLine( "Unable to register protocol handler(s).\r\n"
+ e.Message );
}
siteList_.Clear();
if (config_.siteListFileName_ == "myChannels.opml" && !File.Exists("myChannels.opml")) {
File.Copy("myChannels.opml.orig", "myChannels.opml");
}
XmlDocument doc = new XmlDocument();
Regex r = new Regex("^http://", RegexOptions.IgnoreCase);
if (r.IsMatch(config_.siteListFileName_)) {
WebRequest wreq = AggieRequest.CreateRequest(config_.siteListFileName_);
WebResponse resp = wreq.GetResponse();
doc.Load(resp.GetResponseStream());
} else {
if (!File.Exists(config_.siteListFileName_)) {
InitErrorMessage_ = "Corrupt or missing channel list: " + config_.siteListFileName_ + ". Reverting to default channel list.";
config_.siteListFileName_ = "myChannels.opml";
}
if (!File.Exists("myChannels.opml")) {
File.Copy("myChannels.opml.orig", "myChannels.opml");
}
doc.Load(config_.siteListFileName_);
}
reader = new XmlNodeReader(doc);
while (reader.Read()) {
if (reader.NodeType == XmlNodeType.Element) {
if (reader.LocalName == "outline") {
Site s = new Site();
s.ReadFromOPML(ref reader);
siteList_.Add(s);
}
}
}
if (siteList_.Count > 0 && !((Site)siteList_[0]).isAmphetaDeskFormat) {
isAmphetadeskFormat_ = false;
} else {
isAmphetadeskFormat_ = true;
}
returnValue = true;
if (reader != null) {
reader.Close();
}
return returnValue;
}
public void Go() {
foreach (Site s in siteList_) {
s.Go();
}
}
public void Merge() {
File.Delete("aggregated.xml");
XmlTextWriter writer = new XmlTextWriter("aggregated.xml", new UTF8Encoding());
writer.WriteStartDocument();
string subset = @"
<!ELEMENT aggregate (site*) >
<!ELEMENT site ((channel, item*)?) >
<!ATTLIST site read (true|false) ""false"">
<!ELEMENT channel (title, link, description, base?, errors?, comments?) >
<!ELEMENT item (title, link, description) >
<!ELEMENT title (#PCDATA) >
<!ELEMENT link (#PCDATA) >
<!ELEMENT description (#PCDATA) >
<!ELEMENT base (#PCDATA) >
<!ELEMENT errors (#PCDATA) >
<!ELEMENT comments (strict?, loose*) >
<!ELEMENT strict (#PCDATA) >
<!ELEMENT loose (#PCDATA) >
";
writer.WriteDocType("aggregate", null, null, subset);
writer.Formatting = Formatting.Indented;
try {
writer.WriteStartElement("aggregate");
siteList_.Sort();
foreach (Site site in siteList_) {
if (site.status == Bitworking.Site.State.Succeeded) {
try {
if ( SiteMerge != null ) { // fire event
SiteMerge ( this, new SiteEventArgs ( site ) ) ;
}
site.ParseIntoAggregateForm(ref writer, config_.showOldItems_, true);
}
catch (System.Exception e) {
System.Diagnostics.Debug.WriteLine( "While parsing site " + site.filename + "..." );
System.Diagnostics.Debug.WriteLine( "Caught unknown exception (" + e.Message
+ ")" );
System.Diagnostics.Debug.Write( e.StackTrace );
}
} else {
site.WriteSiteError(ref writer);
}
}
}
finally {
writer.WriteEndElement();
writer.WriteEndDocument();
writer.Close();
}
}
public void Transform(bool freshFeeds) {
XslTransform xslt = new XslTransform();
xslt.Load("templates/" + config_.skin_ + "/skin.xsl");
StreamWriter ow = null;
try {
XPathDocument xpathdocument = new XPathDocument("aggregated.xml");
File.Delete("Aggie.html");
ow = new StreamWriter(File.OpenWrite("Aggie.html"));
xslt.Transform(xpathdocument, null, ow);
}
finally {
if (null != ow) {
ow.Close();
}
}
foreach (IRssSink sink in rssSinks_) {
sink.NewAggregatedXml(freshFeeds);
}
}
public void WriteToOPML() {
// !!!! Once FTP is implemented we need to detect if we have a URI here for ftp.
// If so create a local with the same name, write it, then upload it.
// If the file name begins with http: don't do anything.
if (!IsReadOnlySiteList()) {
File.Delete(config_.siteListFileName_);
XmlTextWriter writer = new XmlTextWriter(config_.siteListFileName_, new UTF8Encoding());
writer.Formatting = Formatting.Indented;
writer.WriteStartDocument();
writer.WriteStartElement("opml");
writer.WriteStartElement("body");
foreach (Site s in siteList_) {
s.WriteToOPML(writer);
}
writer.WriteEndElement();
writer.WriteEndElement();
writer.WriteEndDocument();
writer.Close();
}
httpCacheManager.Current.Save() ;
}
}
public class AggieRequest {
private const int defaultTimeout_ = 60000;
private const string aggieBase_ = @"http://bitworking.org/AggieReferrers.html";
private static string referer_ = aggieBase_;
private static readonly string userAgent_ = VersionInfo.UserAgent
+ " (" + Environment.OSVersion.ToString() + "; .NET CLR " + Environment.Version.ToString() + ") " + referer_;
public static string WeblogUrl {
set {
referer_ = aggieBase_;
if (null != value && String.Empty != value) {
referer_ = aggieBase_ + @"?userWeblog=" + value;
}
}
}
public static WebRequest CreateRequest(string url) {
WebRequest req = WebRequest.Create(url);
req.Timeout = defaultTimeout_;
HttpWebRequest wreq = req as HttpWebRequest;
if (null != wreq) {
wreq.UserAgent = userAgent_;
}
return req;
}
}
}
|