c#抓取網站資料

來源:互聯網
上載者:User

標籤:

string url = "http://www.123.com/fast_view?a=1&gameId=25&areaId=0&serverId=0";            string reffer = "http://www.123.com/";            string html = GetHTML(url,reffer,10*1000);            int tableStart = html.IndexOf("<table");            int tableEnd = html.IndexOf("</table>");            DataTable dt = new DataTable();            dt.Columns.Add("ServerName", typeof(System.String));            dt.Columns.Add("GoodsName", typeof(System.String));            dt.Columns.Add("Price", typeof(System.String));            dt.Columns.Add("Qty", typeof(System.String));            dt.Columns.Add("Id", typeof(System.String));            if (tableStart != -1 && tableEnd!=-1 && tableEnd>tableStart)            {                string tableHtml = html.Substring(tableStart, tableEnd - tableStart + 8);                System.Text.RegularExpressions.MatchCollection trs = System.Text.RegularExpressions.Regex.Matches(html, "<tr[^>]*>(.*?)</tr>", System.Text.RegularExpressions.RegexOptions.Singleline | System.Text.RegularExpressions.RegexOptions.IgnoreCase);                for (int i = 0; i < trs.Count; i++)                {                    System.Text.RegularExpressions.MatchCollection tds = System.Text.RegularExpressions.Regex.Matches(trs[i].Value, "<td[^>]*>(.*?)</td>", System.Text.RegularExpressions.RegexOptions.Singleline | System.Text.RegularExpressions.RegexOptions.IgnoreCase);                    if (tds.Count < 8) continue;                    DataRow dr = dt.NewRow();                    dr["ServerName"] = System.Text.RegularExpressions.Regex.Match(tds[0].Value, @"SelfTextCut2\(‘([^‘]*)‘", System.Text.RegularExpressions.RegexOptions.IgnoreCase | System.Text.RegularExpressions.RegexOptions.Singleline).Groups[1].Value;                    dr["GoodsName"] = System.Text.RegularExpressions.Regex.Match(tds[2].Value, @"SelfTextCut2\(‘([^‘]*)‘", System.Text.RegularExpressions.RegexOptions.IgnoreCase | System.Text.RegularExpressions.RegexOptions.Singleline).Groups[1].Value;                    dr["Price"] = System.Text.RegularExpressions.Regex.Match(tds[5].Value, @"parseFloat\(([^\)]*)\)", System.Text.RegularExpressions.RegexOptions.IgnoreCase | System.Text.RegularExpressions.RegexOptions.Singleline).Groups[1].Value;                    dr["Qty"] = System.Text.RegularExpressions.Regex.Replace(tds[6].Value, "<[^>]*>", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase | System.Text.RegularExpressions.RegexOptions.Singleline);                    dr["Id"] = System.Text.RegularExpressions.Regex.Match(tds[7].Value, @"dl\(‘(\d+)‘\)", System.Text.RegularExpressions.RegexOptions.IgnoreCase | System.Text.RegularExpressions.RegexOptions.Singleline).Groups[1].Value;                    dt.Rows.Add(dr);                }            }================================================================================================================================附上一個小小的GETHTML,嘎嘎public static string GetHTML(string strUrl, string Reffer, int Timeout)        {            try            {                //構造請求                HttpWebRequest request = (HttpWebRequest)WebRequest.Create(strUrl);                request.Method = "GET";                request.ServicePoint.Expect100Continue = false;                //要求標頭                request.UserAgent = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36";                request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";                request.Headers.Add("Accept-Language", "zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3");                request.Headers.Add("Accept-Encoding", "gzip, deflate");                request.KeepAlive = false;                request.Referer = Reffer;                request.Timeout = Timeout;                HttpWebResponse response = (HttpWebResponse)request.GetResponse();                Stream stream = null;                if (response.ContentEncoding == "gzip")                {                    System.IO.Compression.GZipStream gzsStream = new System.IO.Compression.GZipStream(response.GetResponseStream(), System.IO.Compression.CompressionMode.Decompress);                    stream = gzsStream;                }                else                {                    stream = response.GetResponseStream();                }                string strResult = new StreamReader(stream, System.Text.Encoding.GetEncoding("gb2312")).ReadToEnd();                response.Close();                return strResult;            }            catch (Exception err)            {                return "Error:" + err.ToString();            }        }

 

c#抓取網站資料

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.