String server = "www.google.com.hk";
private void SearchButton_Click (object sender, EventArgs e)
{
var ipas = dns.gethostaddresses (server);
var response = "";
using (var sock = new Socket ( AddressFamily.InterNetwork, SocketType.Stream, protocoltype.tcp))
{
Sock . Connect (Ipas[0], 80);
Try
{
var keyword = uri.escapedatastring (this. Keywordtextbox.text);
Sock. Send (Encoding.Default.GetBytes (String). Format ("Get/search?q={0}&ie=utf-8&oe=utf-8 http/1.1\r\nhost: {1}\r\nconnection:close\r\n\r\n", keyword, server)));
var buf = new byte[1024];
var readsize = 0;
Todo
{
ReadSize = sock. Receive (BUF);
Response + + Encoding.UTF8.GetString (BUF);
while (ReadSize > 0);
Array.foreach<string[]> (New Htmlparser ()). Parse (response). ToArray (), (_) => {table. Rows.Add (_); });
}
Finally
{
Sock. Disconnect (FALSE);
}
}
}
<summary>
Htmlパーサ
</summary>
public class Htmlparser
{
<summary>
パース Aimi Line
</summary>
<param name= "source" ></param>
<returns></returns>
Public ienumerable<string[]> Parse (string source)
{
Htmlヘッダ part を Removal
var lines = source. Split (new string[] {"\ r \ n"}, Stringsplitoptions.none). SkipWhile (S =>!s.startswith ("<"));
var html = lines. ToArray () [0];
//xdocumentに processing
using (var sgmlreader = new Sgmlreader {DocType = "HTML", casefolding = Casefolding.tolower})
{
Sgmlreader.inputstream = new StringReader (HTML);
var xml = Xdocument.load (Sgmlreader);
Item リンク Part を Take I did easily out し
var query =
From Ele in XML. Elements (). Descendants ()
where Ele. Attribute ("class")!= null && ele. Attribute ("Class"). Value = = "G"
Select Ele. Element ("H3");
リンクurl for Plastic
var regex = new Regex (@ "/url\?q= (. *) \&sa");
foreach (var item in query)
{
if (item!= NULL)
{
var mc = regex. Matches (item. Element ("a"). Attribute ("href"). Value);
if (MC. Count > 0)
{
var url = mc[0]. GROUPS[1]. Value;
Yield return new string[] {item. Element ("a"). Value, URL};
}
}
}
}
}
}
Reference
Web page Crawl
Http://www.cnblogs.com/lumnm/archive/2009/12/23/1630435.html
HTML parsing
Http://developer.51cto.com/art/200909/149097.htm