Simulate web access with logon and verification code to capture data and capture data on the web
Capture Data After simulating web access with logon and Verification Code
1. Obtain the Verification Code 1. Put a picturebox (imgValidate) on the form to store the obtained Verification Code image,
2. Use the browser's developer tool firefox (f12) to analyze the verification code URL
Private void GetValidateImage ()
{
Cookies = new CookieContainer ();
String strUrl = "http://www.xxx.com/ValidateCodePicture.aspx? Key = "+ strValidCode; // The strValidCode on the verification code page must be obtained first.
CookieContainer cc = new CookieContainer ();
HttpWebRequest request = (HttpWebRequest) WebRequest. Create (strUrl );
// Set request args
Request. Method = "Get ";
Request. CookieContainer = cc;
Request. KeepAlive = true;
// Request. ContentType = "application/x-www-form-urlencoded; charset = UTF-8 ";
Request. ContentType = "text/html ";
// Simulate goole browser access
Request. UserAgent =
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 ";
// Request. Referer = strUrl;
Request. Headers. Add ("x-requested-with: XMLHttpRequest ");
Request. headers. add (HttpRequestHeader. acceptLanguage, "zh-CN, zh; q = 0.8, en; q = 0.6, nl; q = 0.4, zh-TW; q = 0.2 ");
// Request. ContentLength = postdataByte. Length; text/html; charset = UTF-8
Request. Accept = "text/html, application/xhtml + xml, application/xml; q = 0.9, image/webp, */*; q = 0.8 ";
Request. AutomaticDecompression = DecompressionMethods. Deflate | DecompressionMethods. GZip |
DecompressionMethods. None;
// Supports page Jump. The query result will be the page after the jump.
/// Request. AllowAutoRedirect = true;
Request. Headers. Add ("Accept-Encoding", "gzip, deflate ");
If (request. Method = "POST ")
{
(Request as HttpWebRequest). ContentType = "application/x-www-form-urlencoded ";
}
HttpWebResponse response = (HttpWebResponse) request. GetResponse ();
MemoryStream MS = null;
Using (var stream = response. GetResponseStream ())
{
Byte [] buffer = new Byte [response. ContentLength];
Int offset = 0, actuallyRead = 0;
Do
{
ActuallyRead = stream. Read (buffer, offset, buffer. Length-offset );
Offset + = actuallyRead;
}
While (actuallyRead> 0 );
MS = new MemoryStream (buffer );
}
Response. Close ();
Cookies = request. CookieContainer; // save cookies
StrCookies = request. CookieContainer. GetCookieHeader (request. RequestUri); // convert cookies into strings
Bitmap sourcebm = new Bitmap (Stream) MS); // initialize the Bitmap image
ImgValidate. Image = sourcebm;
}
2. Get the content assigned by js. Some webpages cannot see the control value by viewing the webpage source code. The following method is required.
Use the built-in webbrowse of C # to load the webpage, and then use webBrowser1.Document to obtain the value of the corresponding control, as shown in figure
Tring strMsg2 = webBrowser1.Document. GetElementById ("hdValidateCodeID"). OuterHtml;
3. Obtain the parameters to be submitted. If the webpage of asp.net still has the "_ EVENTTARGET", "_ EVENTARGUMENT", "_ VIEWSTATE" parameters, this can also be seen in the developer tool-Network-parameters.
You can use httpRequest to obtain the source code before analyzing it.
Here we use the load in webbrowse.
Private void GetViewState ()
{
String strMsg = webBrowser1.Document. GetElementById ("_ VIEWSTATE"). OuterHtml;
// Obtain viewstate value
// <INPUT id =__ VIEWSTATE type = hidden value =/wepdwukmt1_ntk3mjg2n2rk name =__ VIEWSTATE>
MatchCollection mc = Regex. Matches (strMsg, "id =__ VIEWSTATE .*(? <Viewstate> value [^>] *) ", RegexOptions. IgnoreCase );
If (mc. Count> 0)
{
Foreach (Match m in mc)
{
StrViewState = m. Groups ["viewstate"]. Value. ToString (). Trim ();
If (strViewState. Length> 0)
{
StrViewState = strViewState. replace ("value = ",""). replace ("\"",""). replace ("\\",""). replace ("name =__ VIEWSTATE ",""). replace ("","");
}
}
}
// <INPUT id = hdValidateCodeID type = hidden value = c1b52d3a-1f8b-1dc4-0d44-32a4b46ef8af name = hdValidateCodeID>
String strMsg2 = webBrowser1.Document. GetElementById ("hdValidateCodeID"). OuterHtml;
MatchCollection mc2 = Regex. Matches (strMsg2, "id = hdValidateCodeID .*(? <Validatecode> value [^>] *) ", RegexOptions. IgnoreCase );
If (mc2.Count> 0)
{
Foreach (Match m in mc2)
{
StrValidCode = m. Groups ["validatecode"]. Value. ToString (). Trim ();
If (strValidCode. Length> 0)
{
StrValidCode = strValidCode. replace ("value = ",""). replace ("\"",""). replace ("\\",""). replace ("/",""). replace ("name = hdValidateCodeID ",""). replace ("","");
}
}
}
TxtValidCode. Text = strValidCode;
TxtViewState. Text = strViewState;
// String Cookie to be converted to Cookie type and put into CookieContainer
String cookieStr = webBrowser1.Document. Cookie;
String [] cookstr = cookieStr. Split (';');
Foreach (string str in cookstr)
{
Try
{
String [] cookieNameValue = str. Split ('= ');
Cookie ck = new Cookie (cookieNameValue [0]. Trim (). ToString (), cookieNameValue [1]. Trim (). ToString ());
Ck. Domain = "XXX.com"; // a pair must be written.
MyCookieContainer. Add (ck );
}
Catch
{
}
}
}
4. log on to and access the cookie. Submit parameters and save the cookie for future use.
Private void Login ()
{
Cookies = new CookieContainer ();
String strUrl = "http://www.xxx.com/Login.aspx"; // verification code page
HttpWebRequest request = (HttpWebRequest) WebRequest. Create (strUrl );
// Set request args
Request. Method = "POST ";
Request. CookieContainer = myCookieContainer;
Request. KeepAlive = true;
// Request. ContentType = "application/x-www-form-urlencoded; charset = UTF-8 ";
Request. ContentType = "text/html ";
// Simulate goole browser access
Request. UserAgent =
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 ";
// Request. Referer = strUrl;
Request. Headers. Add ("x-requested-with: XMLHttpRequest ");
Request. headers. add (HttpRequestHeader. acceptLanguage, "zh-CN, zh; q = 0.8, en; q = 0.6, nl; q = 0.4, zh-TW; q = 0.2 ");
// Request. ContentLength = postdataByte. Length; text/html; charset = UTF-8
Request. Accept = "text/html, application/xhtml + xml, application/xml; q = 0.9, image/webp, */*; q = 0.8 ";
Request. AutomaticDecompression = DecompressionMethods. Deflate | DecompressionMethods. GZip |
DecompressionMethods. None;
// Supports page Jump. The query result will be the page after the jump.
/// Request. AllowAutoRedirect = true;
Request. Headers. Add ("Accept-Encoding", "gzip, deflate ");
If (request. Method = "POST ")
{
(Request as HttpWebRequest). ContentType = "application/x-www-form-urlencoded ";
}
// --- Begin
String postData = string. format ("txtUserName = {0} & txtPassword = {1} & txtValidateCode = {2} & hdValidateCodeID = {3} & ddlLanguage = CN & btnLogin = login & __eventtarget = &__ EVENTARGUMENT = & __ VIEWSTATE = {4 }", txtUserName. text, txtPassword. text, txtValidate. text, strValidCode, strViewState); // modify the POST string in the previous FireBug.
Byte [] postdatabyte = Encoding. UTF8.GetBytes (postData );
Request. ContentLength = postdatabyte. Length;
Using (Stream stream = request. GetRequestStream ())
{
Stream. Write (postdatabyte, 0, postdatabyte. Length );
}
// --- End ---
HttpWebResponse response = (HttpWebResponse) request. GetResponse ();
// StreamReader reader = new StreamReader (response. GetResponseStream (), Encoding. GetEncoding ("gb2312 "));
StreamReader reader = new StreamReader (response. GetResponseStream (), Encoding. UTF8 );
String strMsg = reader. ReadToEnd ();
Response. Close ();
Cookies = request. CookieContainer; // save cookies. You can use this cookie when you request other webpages.
LbLogin. Text = "logged on ";
BtnSearchResume. Enabled = true;
}