If you want to use the middle method, you can access my help class completely free and open source: C# HttpHelper, helper class, ignore the code when the real Httprequest request, ignore the certificate, ignore the cookie, web crawl
1. The first trick is to obtain web page information based on the URL address.
First look at the code
Get method
Copy code
Publicstaticstring GetUrltoHtml(string Url,string type)
{
Try
{
System.Net.WebRequest wReq = System.Net.WebRequest.Create(Url);
// Get the response instance.
System.Net.WebResponse wResp = wReq.GetResponse();
System.IO.Stream respStream = wResp.GetResponseStream();
// Dim reader As StreamReader = New StreamReader(respStream)
Using (System.IO.StreamReader reader = new System.IO.StreamReader(respStream, Encoding.GetEncoding(type)))
{
Return reader.ReadToEnd();
}
}
Catch (System.Exception ex)
{
//errorMsg = ex.Message;
}
Return"";
}
Copy code
Post method
Copy code
///<summary>
///Access the network using the https protocol
///</summary>
///<param name="URL">url address</param>
///<param name="strPostdata">Data sent</param>
///<returns></returns>
Publicstring OpenReadWithHttps(string URL,string strPostdata,string strEncoding)
{
Encoding encoding = Encoding.Default;
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(URL);
request.Method ="post";
request.Accept ="text/html, application/xhtml+xml, */*";
request.ContentType ="application/x-www-form-urlencoded";
Byte[] buffer= encoding.GetBytes(strPostdata);
request.ContentLength = buffer.Length;
request.GetRequestStream().Write(buffer, 0, buffer.Length);
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
Using( StreamReader reader =new StreamReader(response.GetResponseStream(), System.Text.Encoding.GetEncoding(strEncoding)))
{
Return reader.ReadToEnd();
}
}
Copy code
This move is the first entry, features:
1. The simplest and most intuitive one, an introductory course.
2. Adapt to plain text, no need to log in, no need to verify the page you can enter.
3. The data type obtained is an HTML document.
4. The request method is Get/Post
2. The second measure, according to the URL address, obtain the webpage information that needs to be verified by the certificate.
First look at the code
Get method
Copy code
/ / callback verification certificate problem
Publicbool CheckValidationResult(object sender, X509Certificate certificate, X509Chain chain, SslPolicyErrors errors)
{ // always accept
Returntrue;
}
/// <summary>
/// Incoming URL returns the html code of the page
/// </summary>
/// <param name="Url">URL</param>
/// <returns></returns>
Publicstring GetUrltoHtml(string Url)
{
StringBuilder content =new StringBuilder();
Try
{
//This sentence must be written before the connection is created. Use the callback method for certificate verification.
ServicePointManager.ServerCertificateValidationCallback=new System.Net.Security.RemoteCertificateValidationCallback(CheckValidationResult);
// Create an HTTP request with the specified URL
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url);
/ / Create a certificate file
X509Certificate objx509 = new X509Certificate(Application.StartupPath+"\\123.cer");
/ / Add to the request
request.ClientCertificates.Add(objx509);
/ / Get the response corresponding to the HTTP request
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
/ / Get the response flow
Stream responseStream = response.GetResponseStream();
// docking response stream (with "GBK" character set)
StreamReader sReader = new StreamReader(responseStream, Encoding.GetEncoding("utf-8"));
/ / Start reading data
Char[] sReaderBuffer =new Char[256];
Int count= sReader.Read(sReaderBuffer,0,256);
While (count>0)
{
String tempStr = new String(sReaderBuffer,0, count);
content.Append(tempStr);
Count = sReader.Read(sReaderBuffer,0,256);
}
// end of reading
sReader.Close();
}
Catch (Exception)
{
Content =new StringBuilder("Runtime Error");
}
Return content.ToString();
}
Copy code
Post method
Copy code
/ / callback verification certificate problem
Publicbool CheckValidationResult(object sender, X509Certificate certificate, X509Chain chain, SslPolicyErrors errors)
{
// always accept
Returntrue;
}
///<summary>
///Access the network using the https protocol
///</summary>
///<param name="URL">url address</param>
///<param name="strPostdata">Data sent</param>
///<returns></returns>
Publicstring OpenReadWithHttps(string URL,string strPostdata,string strEncoding)
{
// This sentence must be written before the creation of the connection. Use the callback method for certificate verification.
ServicePointManager.ServerCertificateValidationCallback=new System.Net.Security.RemoteCertificateValidationCallback(CheckValidationResult);
Encoding encoding = Encoding.Default;
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(URL);
/ / Create a certificate file
X509Certificate objx509 =new X509Certificate(Application.StartupPath+"\\123.cer");
/ / Load cookies
request.CookieContainer =new CookieContainer();
/ / Add to the request
request.ClientCertificates.Add(objx509);
request.Method ="post";
request.Accept ="text/html, application/xhtml+xml, */*";
request.ContentType ="application/x-www-form-urlencoded";
Byte[] buffer= encoding.GetBytes(strPostdata);
request.ContentLength = buffer.Length;
request.GetRequestStream().Write(buffer, 0, buffer.Length);
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
Using (StreamReader reader =new StreamReader(response.GetResponseStream(), System.Text.Encoding.GetEncoding(strEncoding)))
{
Return reader.ReadToEnd();
}
}
Copy code
This trick is to learn to enter the door, any page that needs to be verified to enter can use this method to enter, I use the certificate callback verification method, certificate verification is verified by the client, so we can use I define a method to verify it. Some people will say that it is not clear how to verify it. Others are very simple. The code is written by myself. Why is it so difficult for myself? Returning a True directly is not finished, always It is verified, so that you can ignore the existence of the certificate. Features:
1. Small problems before entry, beginner course.
2. Adapt to pages that do not require login, clear text but need to verify the certificate to access.
3. The data type obtained is an HTML document.
4. The request method is Get/Post
3. The third measure is to obtain the webpage information that needs to be logged in according to the URL address.
Let's first analyze this type of web page, the web page that needs to be logged in to access, and the other is also a kind of verification, verify what, verify whether the client is logged in, whether it has the corresponding credentials, and the login ID is required to verify the SessionID. Every page that needs to be logged in needs to be verified. Then what we do, our first step is to have the data in the cookie, including the SessionID, how to get it, this method is a lot, using ID9 or Firefox browser is very Easy to get, you can refer to my article
Provide an example of a web page to capture the ho123 mobile phone number attribution. This is a detailed description of ID9.
If we get the cookie information after logging in, it will be very simple to go to the corresponding page. Others say that the local cookie information is taken over at the request.
Look at the code
Get method
View Code
///<summary>
/// Incoming URL returns the html code of the web page with the certificate method
/// </summary>
/// <param name="Url">URL</param>
/// <returns></returns>
Publicstring GetUrltoHtml(string Url)
{
StringBuilder content =new StringBuilder();
Try
{
// Create an HTTP request with the specified URL
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url);
request.UserAgent="Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; BOIE9; ZHCN)";
request.Method ="GET";
request.Accept ="*/*";
/ / If the method to verify the source of the page to add this sentence, if not verified then you can not write
request.Referer ="http://sufei.cnblogs.com";
CookieContainer objcok =new CookieContainer();
objcok.Add(new Uri("http://sufei.cnblogs.com"),new Cookie("key","value"));
objcok.Add(new Uri("http://sufei.cnblogs.com"),new Cookie("key","value"));
objcok.Add(new Uri("http://sufei.cnblogs.com"),new Cookie("sidi_sessionid","360A748941D055BEE8C960168C3D4233"));
request.CookieContainer = objcok;
/ / Do not stay connected
request.KeepAlive =true;
/ / Get the response corresponding to the HTTP request
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
/ / Get the response flow
Stream responseStream = response.GetResponseStream();
// docking response stream (with "GBK" character set)
StreamReader sReader = new StreamReader(responseStream, Encoding.GetEncoding("gb2312"));
/ / Start reading data
Char[] sReaderBuffer =new Char[256];
Int count= sReader.Read(sReaderBuffer,0,256);
While (count>0)
{
String tempStr = new String(sReaderBuffer,0, count);
content.Append(tempStr);
Count = sReader.Read(sReaderBuffer,0,256);
}
// end of reading
sReader.Close();
}
Catch (Exception)
{
Content =new StringBuilder("Runtime Error");
}
Return content.ToString();
}
Post method.
View Code
///<summary>
///Access the network using the https protocol
///</summary>
///<param name="URL">url address</param>
///<param name="strPostdata">Data sent</param>
///<returns></returns>
Publicstring OpenReadWithHttps(string URL,string strPostdata)
{
Encoding encoding = Encoding.Default;
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(URL);
request.Method ="post";
request.Accept ="text/html, application/xhtml+xml, */*";
request.ContentType ="application/x-www-form-urlencoded";
CookieContainer objcok =new CookieContainer();
objcok.Add(new Uri("http://sufei.cnblogs.com"),new Cookie("key","value"));
objcok.Add(new Uri("http://sufei.cnblogs.com"),new Cookie("key","value"));
objcok.Add(new Uri("http://sufei.cnblogs.com"),new Cookie("sidi_sessionid","360A748941D055BEE8C960168C3D4233"));
request.CookieContainer = objcok;
Byte[] buffer= encoding.GetBytes(strPostdata);
request.ContentLength = buffer.Length;
request.GetRequestStream().Write(buffer, 0, buffer.Length);
HttpWebResponse response = (HttpWebResponse) request.GetResponse();
StreamReader reader =new StreamReader(response.GetResponseStream(), System.Text.Encoding.GetEncoding("utf-8"));
Return reader.ReadToEnd();
}
Features:
1. Still a little water type, you can have a calf after the practice.
2. Adapt to pages that require login to access.
3. The data type obtained is an HTML document.
4. The request method is Get/Post
To sum up, other basic skills are in these parts. If you go deeper, it is a combination of basic skills.
such as,
1. First use the Get or Post method to log in and get the cookie and then visit the page to get the information. This is also a combination of the above skills.
Here you need to do this step after requesting
response.Cookies
This is the method that can get the current cookie after you request it. It can be obtained directly by returning it to the previous method. We have constructed it ourselves. You can use this cookie directly here.
2. If we come across a web page that needs to log in and also verify the certificate, this is also very simple to combine our methods above.
The following code here I use Get as an example Post example is the same method
View Code
///<summary>
/// Incoming URL returns the html code of the page
/// </summary>
/// <param name="Url">URL</param>
/// <returns></returns>
Publicstring GetUrltoHtml(string Url)
{
StringBuilder content =new StringBuilder();
Try
{
//This sentence must be written before the connection is created. Use the callback method for certificate verification.
ServicePointManager.ServerCertificateValidationCallback=new System.Net.Security.RemoteCertificateValidationCallback(CheckValidationResult);
// Create an HTTP request with the specified URL
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url);
/ / Create a certificate file
X509Certificate objx509 = new X509Certificate(Application.StartupPath+"\\123.cer");
/ / Add to the request
request.ClientCertificates.Add(objx509);
CookieContainer objcok =new CookieContainer();
objcok.Add(new Uri("http://www.cnblogs.com"),new Cookie("key","value"));
objcok.Add(new Uri("http://www.cnblogs.com"),new Cookie("key","value"));
objcok.Add(new Uri("http://www.cnblogs.com"),new Cookie("sidi_sessionid","360A748941D055BEE8C960168C3D4233"));
request.CookieContainer = objcok;
/ / Get the response corresponding to the HTTP request
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
/ / Get the response flow
Stream responseStream = response.GetResponseStream();
// docking response stream (with "GBK" character set)
StreamReader sReader = new StreamReader(responseStream, Encoding.GetEncoding("utf-8"));
/ / Start reading data
Char[] sReaderBuffer =new Char[256];
Int count= sReader.Read(sReaderBuffer,0,256);
While (count>0)
{
String tempStr = new String(sReaderBuffer,0, count);
content.Append(tempStr);
Count = sReader.Read(sReaderBuffer,0,256);
}
// end of reading
sReader.Close();
}
Catch (Exception)
{
Content =new StringBuilder("Runtime Error");
}
Return content.ToString();
}
3. If we encounter the kind of method that needs to verify the source of the webpage, this is the case. Some programmers will think that you might use the program to automatically get the webpage information, in order to prevent the use of the page source to verify, That is to say, as long as the request is not from their page or domain name, some of them are directly verified by the source IP. These can be accessed using the following sentence. This is mainly because the address can be directly forged.
request.Referer ="http://sufei.cnblogs.com";
Oh, the other is very simple because this address can be modified directly. But if the source URL is verified on the server, then we have to modify the packet. This is a bit difficult to discuss.
4. Provide some methods to configure with this example
Method of filtering HTML tags
View Code
///<summary>
/// Filter html tags
/// </summary>
/// <param name="strHtml">html content</param>
/// <returns></returns>
Publicstaticstring StripHTML(string stringToStrip)
{
// paring using RegEx //
stringToStrip = Regex.Replace(stringToStrip,"</p(?:\\s*)>(?:\\s*)<p(?:\\s*)>","\n\n", RegexOptions .IgnoreCase| RegexOptions.Compiled);
stringToStrip = Regex.Replace(stringToStrip,"<br(?:\\s*)/>","\n", RegexOptions.IgnoreCase| RegexOptions.Compiled);
stringToStrip = Regex.Replace(stringToStrip,"\"","‘‘", RegexOptions.IgnoreCase| RegexOptions.Compiled);
stringToStrip = StripHtmlXmlTags(stringToStrip);
Return stringToStrip;
}
Privatestaticstring StripHtmlXmlTags(string content)
{
Return Regex.Replace(content,"<[^>]+>","", RegexOptions.IgnoreCase| RegexOptions.Compiled);
}
URL conversion method
Copy code
#region Conversion URL
Publicstaticstring URLDecode(string text)
{
Return HttpUtility.UrlDecode(text, Encoding.Default);
}
Publicstaticstring URLEncode(string text)
{
Return HttpUtility.UrlEncode(text, Encoding.Default);
}
#endregion
Copy code
To provide a practical example, this is the method of using IP138 to query the attribution of the mobile phone number. Others are in my last article. I will put it here again for your convenience. Other techniques in this area are very interesting. I hope that everyone will give more suggestions. I believe there should be more and better, more perfect methods. Here is a reference for everyone. Thanks for the support
Upper example
Copy code
///<summary>
/// Enter the mobile number to get the attribution information
/// </summary>
/// <param name="number">mobile number</param>
/// <returns>Array type 0 is attribution, 1 card type, 2 area code, 3 postal code </returns>
Publicstaticstring[] getTelldate(string number)
{
Try
{
String strSource= GetUrltoHtml("http://www.ip138.com:8080/search.asp?action=mobile&mobile="+ number.Trim());
// attribution
strSource = strSource.Substring(strSource.IndexOf(number));
strSource = StripHTML(strSource);
strSource = strSource.Replace("\r","");
strSource = strSource.Replace("\n","");
strSource = strSource.Replace("\t","");
strSource = strSource.Replace(" ","");
strSource = strSource.Replace("-->","");
String[] strnumber= strSource.Split(newstring[] { "attribute", "card type", "zip code", "area code", "more detail", "card number" }, StringSplitOptions.RemoveEmptyEntries);
String[] strnumber1=null;
If (strnumber.Length>4)
{
Strnumber1 =newstring[] { strnumber[1].Trim(), strnumber[2].Trim(), strnumber[3].Trim(), strnumber[4].Trim() };
}
Return strnumber1;
}
Catch (Exception)
{
Returnnull;
}
}
Copy code
This example is not very good, some places can be simplified, this interface can be directly used Xml, but my focus here is to let some newcomers look at the methods and ideas cool, huh, huh
The fourth measure, access through Socket
-------------------------------------------------- -------------------------------------------------- --------
Copy code
///<summary>
/// The public class of the request is used to send a request to the server
///</summary>
///<param name="strSMSRequest">Send the requested string</param>
///<returns> returns the requested information</returns>
Private static string SMSrequest(string strSMSRequest)
{
Byte[] data = new byte[1024];
String stringData = null;
IPHostEntry gist = Dns.GetHostByName("www.110.cn");
IPAddress ip = gist.AddressList[0];
//Get IP
IPEndPoint ipEnd = new IPEndPoint(ip, 3121);
//Default 80 port number
Socket socket = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp);
/ / Use the tcp protocol stream type
Try
{
socket.Connect(ipEnd);
}
Catch (SocketException ex)
{
Return "Fail to connect server\r\n" + ex.ToString();
}
String path = strSMSRequest.ToString().Trim();
StringBuilder buf = new StringBuilder();
//buf.Append("GET ").Append(path).Append(" HTTP/1.0\r\n");
//buf.Append("Content-Type: application/x-www-form-urlencoded\r\n");
//buf.Append("\r\n");
Byte[] ms = System.Text.UTF8Encoding.UTF8.GetBytes(buf.ToString());
/ / Submit the requested information
socket.Send(ms);
//Receive back
String strSms = "";
Int recv = 0;
Do
{
Recv = socket.Receive(data);
stringData = Encoding.ASCII.GetString(data, 0, recv);
/ / If the request page meta specifies the page's encoding to gb2312 then you need to use the corresponding Encoding to convert the byte ()
strSms = strSms + stringData;
//strSms += recv.ToString();
}
While (recv != 0);
socket.Shutdown(SocketShutdown.Both);
socket.Close();
Return strSms;
}
C # HttpWebRequest Stunt get web page information based on URL address