標籤:
class h{ public static string getPro(HtmlNode node, string proname) { return node.GetAttributeValue(proname, ""); } public static HtmlNodeCollection Parse(string htm,string xpath) { var doc = new HtmlDocument(); doc.LoadHtml(htm); return doc.DocumentNode.SelectNodes(xpath); } public static HtmlNode[] Contains(HtmlNodeCollection collection,string key,string value) { var list = new List<HtmlNode>(); foreach (var node in collection) { var pro = node.GetAttributeValue(key, ""); if (!pro.Contains(value)) continue; list.Add(node); } return list.ToArray(); } public static HtmlNode[] Equals(HtmlNodeCollection collection, string key, string value) { var list = new List<HtmlNode>(); foreach (var node in collection) { var pro = node.GetAttributeValue(key, ""); if (pro !=value) continue; list.Add(node); } return list.ToArray(); }}
public class u
{ public static MatchCollection getMidValue(string begin, string end, string html) { Regex reg = new Regex("(?<=(" + begin + "))[.\\s\\S]*?(?=(" + end + "))", RegexOptions.Multiline | RegexOptions.Singleline); return reg.Matches(html); } static public string getvalue(string str, string start, string end) { Regex rg = new Regex("(?<=(" + start + "))[.\\s\\S]*?(?=(" + end + "))", RegexOptions.Multiline | RegexOptions.Singleline); return rg.Match(str).Value; } static public string noHtml(string Htmlstring) //去除HTML標記 { //刪除指令碼 Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase); //刪除HTML Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"([\r])[\s]+", "", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "", RegexOptions.IgnoreCase); Htmlstring.Replace("<", ""); Htmlstring.Replace(">", ""); Htmlstring.Replace("\r", ""); return Htmlstring; }}
public class w
{ public static string get(string urlString) { return getutf8(urlString); } public static string getutf8(string urlString) { return get(urlString, Encoding.UTF8); } public static string getgbk(string urlString) { return get(urlString, Encoding.GetEncoding("gbk")); } public static string getgb2312(string urlString) { return get(urlString, Encoding.GetEncoding("gb2312")); } public static string get(string urlString, Encoding encoding) { //定義局部變數 HttpWebRequest httpWebRequest = null; HttpWebResponse httpWebRespones = null; Stream stream = null; string htmlString = string.Empty; //請求頁面 try { httpWebRequest = WebRequest.Create(urlString) as HttpWebRequest; } //處理異常 catch (Exception ex) { throw new Exception("建立頁面請求時發生錯誤!", ex); } httpWebRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 2.0.50727; Maxthon 2.0)"; //擷取伺服器的返回資訊 try { httpWebRespones = (HttpWebResponse)httpWebRequest.GetResponse(); stream = httpWebRespones.GetResponseStream(); } //處理異常 catch (Exception ex) { throw new Exception("接受伺服器返回頁面時發生錯誤!", ex); } StreamReader streamReader = new StreamReader(stream, encoding); //讀取返回頁面 try { htmlString = streamReader.ReadToEnd(); } //處理異常 catch (Exception ex) { throw new Exception("讀取頁面資料時發生錯誤!", ex); } //釋放資源返回結果 streamReader.Close(); stream.Close(); return htmlString; } /// <summary> /// 提供通過POST方法擷取頁面的方法 /// </summary> /// <param name="urlString">請求的URL</param> /// <param name="encoding">頁面使用的編碼</param> /// <param name="postDataString">POST資料</param> /// <returns>擷取的頁面</returns> public static string post(string urlString, Encoding encoding, string postDataString) { //定義局部變數 CookieContainer cookieContainer = new CookieContainer(); HttpWebRequest httpWebRequest = null; HttpWebResponse httpWebResponse = null; Stream inputStream = null; Stream outputStream = null; StreamReader streamReader = null; string htmlString = string.Empty; //轉換POST資料 byte[] postDataByte = encoding.GetBytes(postDataString); //建立頁面請求 try { httpWebRequest = WebRequest.Create(urlString) as HttpWebRequest; } //處理異常 catch (Exception ex) { throw new Exception("建立頁面請求時發生錯誤!", ex); } //指定請求處理方式 httpWebRequest.Method = "POST"; httpWebRequest.KeepAlive = false; httpWebRequest.ContentType = "application/x-www-form-urlencoded"; httpWebRequest.CookieContainer = cookieContainer; httpWebRequest.ContentLength = postDataByte.Length; //向伺服器傳送資料 try { inputStream = httpWebRequest.GetRequestStream(); inputStream.Write(postDataByte, 0, postDataByte.Length); } //處理異常 catch (Exception ex) { throw new Exception("發送POST資料時發生錯誤!", ex); } finally { inputStream.Close(); } //接受伺服器返回資訊 try { httpWebResponse = httpWebRequest.GetResponse() as HttpWebResponse; outputStream = httpWebResponse.GetResponseStream(); streamReader = new StreamReader(outputStream, encoding); htmlString = streamReader.ReadToEnd(); } //處理異常 catch (Exception ex) { throw new Exception("接受伺服器返回頁面時發生錯誤!", ex); } finally { streamReader.Close(); } foreach (Cookie cookie in httpWebResponse.Cookies) { cookieContainer.Add(cookie); } return htmlString; }}
C# 網路操作類