ASP tutorial. Net train votes information Grab system
First, the system function:
1. Every time from the network to crawl a new votes information;
2. Support to filter votes information according to keywords;
3. Support Crawl time interval setting;
4. Support Votes web site links;
Second, the operating environment:
1.net2.0 frame and above;
2.ie6.0 and above;
Third, the realization of ideas:
1. Set the crawl address and resolution of the way
public static list<site> Getdefaultsites ()
{
list<site> sites = new list<site> ();
Sites = new list<site> ();
Sites.add (new site ()
{
Name = "Train Ticket Net",
url = "Http://www.huochepiao.com/city/search.asp?leixing=%d7%aa%c8%c3&chufa=&daoda=",
Regexpattern = @ "· <a href= "(. *?)" mce_href= "(. *?)" target=_blank> (. *?) </a> ",
encoding = Encoding.default,
Keys = new string[] {"Lie"}
});
Sites.add (new site ()
{
Name = "Bai",
url = "http://beijing.baixing.com/huochepiao/?%e5%8f%91%e8%bd%a6%e6%97%a5%e6%9c%9f=&%e8%bd%a6%e6%ac%a1=& %e5%87%ba%e5%8f%91%e5%9f%8e%e5%b8%82=%e5%8c%97%e4%ba%ac&%e5%88%b0%e8%be%be%e5%9f%8e%e5%b8%82=&wanted=1 ",
Regexpattern = @ "" "><a href=" "/(. *?)" " > (. *?) </a></td> ",
encoding = Encoding.UTF8,
Domain = "http://beijing.baixing.com/",
Keys = new string[] {"Lie"}
});
Sites.add (new site ()
{
Name = "NET",
url = "http://bj.ganji.com/piao/",
Regexpattern = @ "<dt><a href=" "/(. *?)" "target=" "_blank" "> (. *?) </a></dt> ",
encoding = Encoding.UTF8,
Domain = "http://bj.ganji.com/",
Keys = new string[] {"Lie"}
});
Sites.add (new site ()
{
name = "Cool News Network",
url = "Http://huoche.kuxun.cn/zhuanrang-beijing-wuhan.html",
Regexpattern = @ "<div class=" "Col_11 Left" "> (. *?) <BR/><div style= "padding:8px 0 0 0px;" mce_style= "padding:8px 0 0 0px;" ><a target= ' _blank ' href= ' (. *?) "mce_href=" (. *?) " > ",
encoding = Encoding.UTF8,
Domain = "",
Ischange = "Yes"
});
return sites;
}
Crawl Web Information
public string getnetstring (string url, encoding Codetpye)
{
String str = "";
Try
{
WebClient client = new WebClient ();
byte[] pagedata = client.downloaddata (URL);
str = codetpye.getstring (pagedata);
}
Catch
{
}
return str;
}
Parsing votes information
public class Clsnetinfoparseserver
{
private static ilist<getresult> lslist = new list<getresult> ();
public void Clearls ()
{
Lslist = new list<getresult> ();
}
private bool Ishas (string URL)
{
foreach (var item in lslist)
{
if (item.url = = URL)
{
return true;
}
}
return false;
}
Public ilist<getresult> Donetinfoparse (string strnetinfo, site site, string[] keys)
{
ilist<getresult> list = new list<getresult> ();
MatchCollection mc = regex.matches (Strnetinfo, Site.regexpattern);
foreach (Match m in MC)
{
if (m.success)
{
GetResult r = new GetResult ();
if (!string.isnullorempty (Site.ischange))
{
R.content = site.domain + m.groups tutorial [1].value.trim ();
R.url = M.groups[2].value.trim ();
}
Else
{
R.url = Site.domain + M.groups[1].value.trim ();
R.content = M.groups[2].value.trim ();
}
if (!ishas (R.url))
{
BOOL Iscontainkey = false;
if (keys!= null && keys.length > 0)
{
foreach (string key in keys)
{
if (R.content.contains (key))
{
Iscontainkey = true;
Break
}
}
}
Else
{
Iscontainkey = true;
}
if (!iscontainkey)
Continue
R.getdatetime = DateTime.Now.ToString ();
R.name = Site.name;
Lslist.add (R);
List.add (R);
}
}
}
return list;
}
}