using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using System.Xml;
using System.Text.RegularExpressions;
using HtmlAgilityPack;
using System.Net;
namespace iReaper.IndexBuilder.WWESource{
public class RawIndexParseTTask : ITask
{
System.Threading.ManualResetEvent _weekCompleteSignal;
int iCountInWeek;
#region ITask Members
public void Initialize()
{
}
public void Invoke()
{
string _folderConf = System.Configuration.ConfigurationSettings.AppSettings["TempFolder"];
System.Xml.Serialization.XmlSerializer serializer = new System.Xml.Serialization.XmlSerializer(typeof(EventItem[]));
EventItem[] items = null;
using (FileStream fs = File.OpenRead(Path.Combine(_folderConf, "searchResult.xml")))
{
items = (EventItem[])serializer.Deserialize(fs);
fs.Close();
}
foreach (var item in items)
{
if (string.IsNullOrEmpty(item.DownloadUrl))
{
_inputQueue.Enqueue(item);
}
}
_weekCompleteSignal = new System.Threading.ManualResetEvent(false);
iCountInWeek = _inputQueue.Count;
// Create task thread
for (int i = 0; i < 10; i++)
{
System.Threading.ThreadPool.QueueUserWorkItem(new System.Threading.WaitCallback(ProcessOneEventItem));
}
_weekCompleteSignal.WaitOne();
using (FileStream fs = new FileStream(Path.Combine(_folderConf, "searchResult.xml"),FileMode.Create))
{
serializer.Serialize(fs, items);
fs.Close();
}
}
private void ProcessOneEventItem(object obj)
{
MSAwareWebClient client = new MSAwareWebClient("daiyue@ireaper.info", "unite1394!@#");
EventItem item = null;
bool retry = false;
int retryCount = 0;
while (true)
{
if (!retry)
{
lock (_inputQueue)
{
if (_inputQueue.Count > 0)
{
item = _inputQueue.Dequeue();
}
else
{
if (iCountInWeek == 0)
{
_weekCompleteSignal.Set();
}
return;
}
}
}
//System.Threading.Thread.Sleep(100);
try
{
string regURL = string.Format("https://msevents.microsoft.com/cui/Register.aspx?EventID={0}&culture=en-US&countryCode=US&IsRedirect=false", item.EventID);
//
// Confirm
string content = client.DownloadString(regURL);
HtmlAgilityPack.HtmlDocument htmldoc = new HtmlDocument();
htmldoc.LoadHtml(content);
if (htmldoc.DocumentNode.SelectSingleNode("//input[@type='submit' and @value='Confirm']") != null)
{
FormParser parser = new FormParser(content);
var form = parser.Forms[0];
form.ActionUrl = regURL;// new Uri(new Uri(item.DownloadUrl), form.ActionUrl).ToString();
var content2 = form.Submit(client, "Confirm");
//
htmldoc = new HtmlDocument();
htmldoc.LoadHtml(content2);
}
var nodeView = htmldoc.DocumentNode.SelectSingleNode("//a[text()='ViewOnline']");
if (nodeView != null)
{
string url = nodeView.Attributes["href"].Value;
var nodeRegId = htmldoc.DocumentNode.SelectSingleNode("//input[@type='hidden' and @id='hdnRegID']");
item.RegistrationId = nodeRegId.Attributes["value"].Value;
item.DownloadUrl = ParseRAWLMUrl(url, item.RegistrationId);
}
int complet = System.Threading.Interlocked.Decrement(ref iCountInWeek);
if (OnProgress != null)
{
OnProgress(this, ProgressEventArgs.EventArgs(complet));
}
retryCount = 0;
retry = false;
}
catch (System.Net.WebException)
{
Console.WriteLine("Try again : detail url:{0}", item.DetailUrl);
retryCount++;
if (retryCount < 3)
{
retry = true;
}
else
{
retry = false;
System.Threading.Interlocked.Decrement(ref iCountInWeek);
}
}
catch (System.Exception ex)
{
retry = false;
// give up
System.Threading.Interlocked.Decrement(ref iCountInWeek);
Console.WriteLine(ex.Message);
} // skip any error, continue next.
}
}
private string ParseRAWLMUrl(string LMStr, string RegistrationID)
{
if (string.IsNullOrEmpty(LMStr))
{
return string.Empty;
}
Uri uri;
if (!Uri.TryCreate(LMStr, UriKind.Absolute, out uri))
{
return string.Empty;
}
switch (uri.Scheme)
{
case "http":
case "https":
var queries = System.Web.HttpUtility.ParseQueryString(uri.Query);
string idValue = null, pwValue = null;
foreach (string key in queries.Keys)
{
if (key.ToLower() == "id")
{
idValue = queries[key];
}
else if (key.ToLower() == "pw")
{
pwValue = queries[key];
}
}
// Check if both id and pw is presented
if (idValue != null && pwValue != null)
{
return string.Format("{0}://{1}{2}?id={3}&pw={4}&fmt=dll",
uri.Scheme,
uri.Host,
uri.AbsolutePath,
idValue,
pwValue);
}
else
{
return "";
}
case "javascript":
string path = uri.AbsolutePath;
if (path.StartsWith("CheckFormat("))
{
int start = "CheckFormat(".Length;
int end = path.Length - 2;
if (end > start)
{
string parameters = path.Substring(start, end - start + 1);
string[] values = parameters.Split(',');
for (int i = 1; i < values.Length; i++)
{
if (values[i] == "true")
{
// Build JoinMeeting uri
string url = string.Format("https://msevents.microsoft.com/cui/joinmeeting.aspx?RegistrationID={0}&RecordingFmt={1}&Culture=en-us",
RegistrationID,
i);
// Get actual livemeeting url
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
request.AllowAutoRedirect = false;
request.Method = "HEAD";
var response = request.GetResponse();
string responseUrl = response.Headers[HttpResponseHeader.Location];
response.Close();
return ParseRAWLMUrl(responseUrl, RegistrationID);
}
}
}
}
break;
}
return null;
}
private string GetWizIdFromUrl(string responseUrl)
{
int iStart = responseUrl.IndexOf("wizid=", StringComparison.InvariantCultureIgnoreCase) + "wizid=".Length;
int length = Guid.Empty.ToString().Length;
return responseUrl.Substring(iStart, length);
}
private Queue<EventItem> _inputQueue = new Queue<EventItem>();
const string regex = "Presenter:\\s+((?<Name>[\\w ]+)\\s*,(?<Title>[\\w ]+\\s*,[\\w ]+)\\s*,?)*";
static System.Text.RegularExpressions.RegexOptions options = (((System.Text.RegularExpressions.RegexOptions.IgnorePatternWhitespace | System.Text.RegularExpressions.RegexOptions.Singleline)
| System.Text.RegularExpressions.RegexOptions.ExplicitCapture)
| System.Text.RegularExpressions.RegexOptions.Multiline);
static System.Text.RegularExpressions.Regex reg = new System.Text.RegularExpressions.Regex(regex, options);
public event EventHandler<ProgressEventArgs> OnProgress;
#endregion
}
}
|