c#採集網頁用得幾個函數 有解釋

來源:互聯網
上載者:User

public  string GetHtmlSource(string Url, string charset) //得到Html原始碼
        {
            if (charset == "" || charset == null) charset = "gb2312";
            string text1 = "";
            try
            {
                HttpWebRequest request1 = (HttpWebRequest)WebRequest.Create(Url);
                HttpWebResponse response1 = (HttpWebResponse)request1.GetResponse();
                Stream stream1 = response1.GetResponseStream();
                StreamReader reader1 = new StreamReader(stream1, Encoding.GetEncoding(charset));
                text1 = reader1.ReadToEnd();
                stream1.Close();
                response1.Close();
            }
            catch (Exception exception1)
            {
            }
            return text1;
        }

 

//獲得頁面HTML代碼中開始標記和結束標記中間的資料:測試可用

//參    數:HTML原始碼 ,開始標記,結束標記

 public string SniffwebCode(string code, string wordsBegin, string wordsEnd)
        {
            string NewsTitle = "";
            Regex regex1 = new Regex("" + wordsBegin + @"(?<title>[\s\S]+?)" + wordsEnd + "", RegexOptions.Compiled | RegexOptions.IgnoreCase);
            for (Match match1 = regex1.Match(code); match1.Success; match1 = match1.NextMatch())
            {
                NewsTitle = match1.Groups["title"].ToString();
            }
            return NewsTitle;

        }

public ArrayList SniffwebCodeReturnList(string code, string wordsBegin, string wordsEnd)
        {
            ArrayList urlList = new ArrayList();
            //string NewsTitle = "";
            Regex regex1 = new Regex("" + wordsBegin + @"(?<title>[\s\S]+?)" + wordsEnd + "", RegexOptions.Compiled | RegexOptions.IgnoreCase);
            for (Match match1 = regex1.Match(code); match1.Success; match1 = match1.NextMatch())
            {
                urlList.Add(match1.Groups["title"].ToString());
            }
            return urlList;

        }

 

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.