C#程式

來源:互聯網
上載者:User

標籤:

using System;using System.Collections.Generic;using System.IO;using System.Linq;using System.Net;using System.Text;using System.Text.RegularExpressions;using System.Threading.Tasks;namespace MeiZi{    public class Program    {        static void Main(string[] args)        {            new GetMeiziPic();        }    }    /// <summary>    /// 擷取妹子圖片    /// </summary>    public class GetMeiziPic    {        private readonly string _path;        private const string ImgRegex = @"<img[^>]*?src\s*=\s*[""‘]?([^‘"" >]+?)[ ‘""][^>]*?>";//圖片的Regex        private const string LinkRegex = @"<h2><a\s+[^>]*?>[^<>]*?<\/a></h2>";        public GetMeiziPic()        {            _path = DealDir(Path.Combine(Environment.CurrentDirectory, "Images"));            Console.WriteLine("===============    開始採集   ===============");            for (var i = 1; i < 10; i++)            {                Console.WriteLine("===============正在下載第{0}頁資料===============", i);                DoFetchStep1(i);            }            Console.WriteLine("===============   採集完成   ===============");        }        private string DealDir(string path)        {            if (!Directory.Exists(path))                Directory.CreateDirectory(path);            return path;        }        private void DoFetchStep1(int pageNum)        {            var request = (HttpWebRequest)WebRequest.Create("http://www.sepaidui.com/?sort=4&page=" + pageNum);            request.Credentials = CredentialCache.DefaultCredentials;            var response = (HttpWebResponse)request.GetResponse();            if (response.StatusCode != HttpStatusCode.OK) return;            var stream = response.GetResponseStream();            if (stream == null) return;            using (var sr = new StreamReader(stream))            {                FetchLinksFromSource1(sr.ReadToEnd());            }        }        private void FetchLinksFromSource1(string htmlSource)        {            var matchesLink = Regex.Matches(htmlSource, LinkRegex, RegexOptions.IgnoreCase | RegexOptions.Singleline);            foreach (Match m in matchesLink)            {                string href = m.Groups[0].Value.Split(‘"‘)[1];                DoFetchStep2(href);            }        }        private void DoFetchStep2(string href)        {            var request = (HttpWebRequest)WebRequest.Create(href);            var h = request.Headers;            request.Credentials = CredentialCache.DefaultCredentials;            var response = (HttpWebResponse)request.GetResponse();            if (response.StatusCode != HttpStatusCode.OK) return;            var stream = response.GetResponseStream();            if (stream == null) return;            using (var sr = new StreamReader(stream))            {                FetchLinksFromSource2(sr.ReadToEnd());            }        }        private void FetchLinksFromSource2(string htmlSource)        {            var matchesImgSrc = Regex.Matches(htmlSource, ImgRegex, RegexOptions.IgnoreCase | RegexOptions.Singleline);            foreach (Match m in matchesImgSrc)            {                var href = m.Groups[1].Value;                //只選取來自新浪相簿的圖片                if (href.Contains("sinaimg") && CheckIsUrlFormat(href) && !href.Contains("60d02b59tw1eq6g7srmiwj20pv03mdg8"))                {                    Console.WriteLine(href);                }                else                    continue;                using (var myWebClient = new WebClient())                {                    try                    {                        myWebClient.DownloadFile(new Uri(href), Path.Combine(_path, Path.GetRandomFileName() + Path.GetExtension(href)));                    }                    catch (Exception ex)                    {                        Console.WriteLine(ex.Message);                    }                }            }        }        private readonly Regex _isUrlFormat = new Regex(@"http://?([\w-]+\.)+[\w-]+(/[\w- ./?%&=]*)?");        private bool CheckIsUrlFormat(string value)        {            return _isUrlFormat.IsMatch(value);        }    }}

 

C#程式

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.