Using system;using system.collections.generic;using system.io;using system.linq;using System.Net;using System.Text; Using system.text.regularexpressions;using system.threading.tasks;namespace Regular {class program {static void M Ain (string[] args) {string url = "http://www.admin5.com/browse/177/"; String html = gethtml (URL, Encoding.UTF8); Regex r = new Regex ("(? <=href=\"). =\")"); MatchCollection MC = r.matches (HTML); int a = 1; foreach (Match m in MC) {if (M.value.contains ("article")) {C Onsole. WriteLine ("http://www.admin5.com/" + m.value); Console.WriteLine ("Crawl Content"); String content = gethtml (M.value, Encoding.UTF8); Regex i = new Regex ("(?<=title>)." =</title>) "); matchcollection mm = i.matches (content); Regex rcontent = new regex("<div class=\" content\ ">[\\s\\S]*?</div>"); MatchCollection nr = rcontent. Matches (content); string title = Mm[0]. Value; String neirong = Nr[0]. Value; Console.WriteLine ("Save Data"); String path = Directory.GetCurrentDirectory (); if (! Directory.Exists (path + "\\data")) {directory.createdirectory (path + "\\data"); } file.writealltext (path + "\\data" + "\ \" + A + ". txt", title + "\ r \ n" + Neirong); a++; Console.WriteLine ("Saved successfully"); }} Console.WriteLine ("OK"); Console.readkey (); } private static string gethtml (string url, Encoding Encoding) {HttpWebRequest request = (httpwe brequest) webrequest.create (URL); HttpWebResponse response = (HttpWebResponse) request. GetrespoNSE (); Stream s = response. GetResponseStream (); StreamReader sr = new StreamReader (s); return Sr. ReadToEnd (); } }}
Simple C # crawler subtotal