using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Net;
using System.Collections.Specialized;
namespace iReaper.IndexBuilder{
// Based on HttpAgilityPack
// Will only find the first form and post it.
public class FormParser
{
HtmlAgilityPack.HtmlDocument document;
PostForm[] _forms;
Dictionary<string, PostForm> _tables;
public FormParser(string Html)
{
document = new HtmlAgilityPack.HtmlDocument();
document.LoadHtml(Html);
_tables = new Dictionary<string, PostForm>();
// Parse the html
Analysis();
}
public FormParser(HtmlAgilityPack.HtmlDocument htmldoc)
{
document = htmldoc;
_tables = new Dictionary<string, PostForm>();
// Parse the html
Analysis();
}
public PostForm[] Forms
{
get { return _forms; }
}
public Dictionary<string, PostForm> Dictionary
{
get { return _tables; }
}
private void Analysis()
{
List<PostForm> forms = new List<PostForm>();
// Get form
var nodes = document.DocumentNode.SelectNodes("//form[@method='post']");
if (nodes == null)
{
return;
}
foreach (var node in nodes)
{
var form = ParseNode(node);
forms.Add(form);
_tables.Add(form.Name, form);
}
_forms = forms.ToArray();
}
private PostForm ParseNode(HtmlAgilityPack.HtmlNode node)
{
string nName = node.Attributes["name"] != null ? node.Attributes["name"].Value : node.Attributes["id"].Value;
// Get all input
string url = node.Attributes["action"].Value;
List<KeyValuePair<string, string>> postDatas = new List<KeyValuePair<string, string>>();
// Get all input
var nInputs = node.SelectNodes(".//input");
if (nInputs != null)
{
foreach (var n in nInputs)
{
string name = n.Attributes["name"] != null ? n.Attributes["name"].Value : n.Attributes["id"].Value;
string value = n.Attributes["value"] == null ? "" : n.Attributes["value"].Value;
name = System.Web.HttpUtility.UrlEncode(name);
value = System.Web.HttpUtility.UrlEncode(value);
string term = name + "=" + value;
string type = n.Attributes["type"].Value;
postDatas.Add(new KeyValuePair<string, string>(type, term));
}
}
// Output
return new PostForm(nName, url, postDatas);
}
}
/// <summary>
/// Represents a logic of a form element, only for post
/// </summary>
public class PostForm
{
private string _name;
private string _actionUrl;
private List<string> _inputsList;
private Dictionary<string, string> _submitTable;
public PostForm(string name,
string actionUrl,
List<KeyValuePair<string,string>> postDatas)
{
_name = name;
_actionUrl = actionUrl;
_inputsList = new List<string>();
_submitTable = new Dictionary<string, string>();
foreach(var pair in postDatas)
{
if (pair.Key.ToLower() == "submit" || pair.Key.ToLower() == "button")
{
var items = pair.Value.Split('=');
_submitTable[items[1]] = pair.Value;
}
else
{
_inputsList.Add(pair.Value);
}
}
}
// The name of this element (or id, if not presents)
public string Name { get { return _name; } }
public string ActionUrl { get { return _actionUrl; } set { _actionUrl = value; } }
public string Submit(CookieAwareWebClient client)
{
return Submit(client, null);
}
public string Submit(CookieAwareWebClient client, string submits)
{
string submitString = null;
if (submits == null)
{
if (_submitTable.Count > 0)
{
submitString = _submitTable.Values.First();
}
}
else if (_submitTable.ContainsKey(submits))
{
submitString = _submitTable[submits];
}
else
{
throw new ArgumentException("Could not find submits value:" + submits);
}
string postData = string.Join("&", _inputsList.ToArray());
if (submitString != null)
{
postData += "&" + submitString;
}
#if DEBUG
string dump = string.Join("\r\n", _inputsList.ToArray());
dump += "\r\n" + submitString;
#endif
client.Headers[HttpRequestHeader.ContentType] = "application/x-www-form-urlencoded";
return client.UploadString(_actionUrl, "POST", postData);
}
}
}
|