Example of Automatically Obtaining proxy IP information, including code, sharing, and ip address example
/// <Summary> /// read URL data content /// </summary> /// <param name = "url"> URL </param> // <returns> website text content </returns> public string HttpGetText (string url) {HttpWebRequest Request = (HttpWebRequest) HttpWebRequest. create (url); Request. method = "GET"; Request. contentType = @ "application/x-www-form-urlencoded"; Request. accept = @ "text/html, application/xhtml + xml, application/xml; q = 0.9, image/webp, */*; q = 0.8"; Request. headers. add (HttpRequestHeader. acceptLanguage, @ "Accept-Language: zh-CN, zh; q = 0.8"); Request. userAgent = @ "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"; Request. proxy = proxy; var stream = Request. getResponse (). getResponseStream (); var read = new StreamReader (stream); var json = read. readToEnd (); read. close (); stream. close (); System. diagnostics. debug. writeLine ("". padRight (20, '='); System. diagnostics. debug. writeLine (json); System. diagnostics. debug. writeLine ("". padRight (20, '='); return json;} public class IPs {public List <proxy> items = new List <proxy> (); public class proxy {public string ip; public int port; public string address; public int speed; public int life; // the number of consecutive minutes public DateTime check_time ;}} private void button#click (object sender, EventArgs e) {var html = HttpGetText (" http://www.xicidaili.com/nt "); Int i1 = html. indexOf ("<table id = \" ip_list \ ">"); int i2 = html. indexOf ("</table>"); string ip_list = html. substring (i1, i2-i1 + "</table> ". length); var find = new Regex (@ "<tr. *?> \ S *? <Td. *?>. *? </Td> \ s *? <Td. *?> (? <Ip> .*?) </Td> \ s *? <Td. *?> (? <Port> .*?) </Td> \ s *? <Td. *?> \ S *? <A. *?> (? <Address> .*?) </A> \ s *? </Td> .*? Width :(? <Speed> .*?) % .*? <Td> (? <Life> .*?) </Td> .*? <Td> (? <Check_time> .*?) </Td> .*? </Tr> ", RegexOptions. ignoreCase | RegexOptions. singleline); var ips = find. matches (ip_list); listView1.BeginUpdate (); foreach (System. text. regularExpressions. match item in ips) {try {var ip = new IPs. proxy (); ListViewItem lvi = new ListViewItem (item. groups ["ip"]. value); ip. ip = item. groups ["ip"]. value; lvi. subItems. add (item. groups ["port"]. value); ip. port = Convert. toInt32 (item. groups ["port"]. value); lvi. subItems. add (item. groups ["address"]. value); ip. address = item. groups ["address"]. value; lvi. subItems. add (item. groups ["speed"]. value); ip. speed = Convert. toInt32 (item. groups ["speed"]. value); lvi. subItems. add (item. groups ["life"]. value); ip. life = conv (item. groups ["life"]. value); lvi. subItems. add (item. groups ["check_time"]. value); ip. check_time = Convert. toDateTime (item. groups ["check_time"]. value); listView1.Items. add (lvi); IPaddress. items. add (ip);} catch {LogAdd ("An error occurred while converting ip address information" + item. value) ;}} listView1.EndUpdate (); int conv (string life) {int a = 1; if (life. contains ("day") {a = 60*24; life = life. replace ("day", "");} else if (life. contains ("Minutes") {a = 1; life = life. replace ("Minute", "");} else if (life. contains ("Hour") {a = 60; life = life. replace ("Hour", "");} return Convert. toInt32 (life) * ;}}
The key code is to obtain the IP proxy information in the specified webpage, and then extract it using a regular expression.
Originally, I wanted to directly convert html to xml. Who knows if html is written incorrectly and cannot be converted successfully?
Only regular expressions can be used for search. The results are good ~
Code running environment: vs2017
Of course, the old version can also be used to place local function code outside.
:
Key code:
var html= HttpGetText("http://www.xicidaili.com/nt");int i1= html.IndexOf("<table id=\"ip_list\">");int i2= html.IndexOf("</table>");string ip_list = html.Substring(i1, i2 - i1+ "</table>".Length);var find = new Regex(@"<tr.*?>\s*?<td.*?>.*?</td>\s*?<td.*?>(?<ip>.*?)</td>\s*?<td.*?>(?<port>.*?)</td>\s*?<td.*?>\s*?<a.*?>(?<address>.*?)</a>\s*?</td>.*?width:(?<speed>.*?)%.*?<td>(?<life>.*?)</td>.*?<td>(?<check_time>.*?)</td>.*?</tr>", RegexOptions.IgnoreCase | RegexOptions.Singleline);var ips= find.Matches(ip_list);
A URL and learning tool are recommended for regular expressions:
Http://deerchao.net/tutorials/regex/regex.htm#charclass
I can't remember the regular expression myself, so I can check it when I need it.