代碼如下
$cookie_file = tempnam('./temp','cookie');$login_url = 'http://211.64.47.129/default_ysdx.aspx';$post_fields = '__VIEWSTATE=dDw1MjQ2ODMxNzY7Oz7xlHJHd0KfeVRA2p7BXNto118wbQ==&TextBox1=學號&TextBox2=密碼';$ch = curl_init($login_url);curl_setopt($ch, CURLOPT_HEADER, 0);curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);curl_setopt($ch, CURLOPT_POST, 1);curl_setopt($ch, CURLOPT_POSTFIELDS, $post_fields);curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_file);curl_exec($ch);curl_close($ch);$url='http://211.64.47.129/xs_main.aspx?xh=學號';$ch = curl_init($url);curl_setopt($ch, CURLOPT_HEADER, 0);curl_setopt($ch, CURLOPT_RETURNTRANSFER, 0);curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_file);curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);$contents = curl_exec($ch);preg_match("/
(.*)<\/li>/",$contents,$arr);echo $arr[1];curl_close($ch);
但是最後卻回到了登入的介面,小白求大神解答
回複內容:
代碼如下
$cookie_file = tempnam('./temp','cookie');$login_url = 'http://211.64.47.129/default_ysdx.aspx';$post_fields = '__VIEWSTATE=dDw1MjQ2ODMxNzY7Oz7xlHJHd0KfeVRA2p7BXNto118wbQ==&TextBox1=學號&TextBox2=密碼';$ch = curl_init($login_url);curl_setopt($ch, CURLOPT_HEADER, 0);curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);curl_setopt($ch, CURLOPT_POST, 1);curl_setopt($ch, CURLOPT_POSTFIELDS, $post_fields);curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_file);curl_exec($ch);curl_close($ch);$url='http://211.64.47.129/xs_main.aspx?xh=學號';$ch = curl_init($url);curl_setopt($ch, CURLOPT_HEADER, 0);curl_setopt($ch, CURLOPT_RETURNTRANSFER, 0);curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_file);curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);$contents = curl_exec($ch);preg_match("/
(.*)<\/li>/",$contents,$arr);echo $arr[1];curl_close($ch);
但是最後卻回到了登入的介面,小白求大神解答
根據樓主的要求, 配合我自己寫的 HttpClient
的類, 編寫的代碼如下, 但因為沒有測試帳號, 所以測試時使用的帳號和密碼為 test
, 返回的結果是登陸失敗, 樓主只需要修改代碼裡的帳號和密碼, 應該就可以了.
HTTP
請求過程中的 Cookie
由 HttpClient/CURL
自動處理.
樓主發的代碼應該是沒問題的, 感覺應該是沒有提交 RadioButtonList1
和 Button1
這兩個資料.
還有那個__VIEWSTATE
雖然看起來是固定的, 但為了保險期間, 還是應該從頁面中擷取到之後, 再提交登陸.
附代碼:
ch = curl_init(); curl_setopt($this->ch, CURLOPT_USERAGENT, 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; Trident/4.0; QQDownload 685; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET4.0C; .NET4.0E)');//UA curl_setopt($this->ch, CURLOPT_TIMEOUT, 40); curl_setopt($this->ch, CURLOPT_FOLLOWLOCATION, TRUE); curl_setopt($this->ch, CURLOPT_AUTOREFERER, true); curl_setopt($this->ch, CURLOPT_RETURNTRANSFER, TRUE); curl_setopt($this->ch, CURLOPT_COOKIEJAR, $cookie_jar); curl_setopt($this->ch, CURLOPT_COOKIEFILE, $cookie_jar); } function __destruct(){ curl_close($this->ch); } final public function setReferer($ref=''){ if($ref != ''){ curl_setopt($this->ch, CURLOPT_REFERER, $ref); } } final public function Get($url, $header=false, $nobody=false){ curl_setopt($this->ch, CURLOPT_POST, false); curl_setopt($this->ch, CURLOPT_URL, $url); curl_setopt($this->ch, CURLOPT_HEADER, $header); curl_setopt($this->ch, CURLOPT_NOBODY, $nobody); return curl_exec($this->ch); } final public function Post($url, $data=array(), $header=false, $nobody=false){ curl_setopt($this->ch, CURLOPT_URL, $url); curl_setopt($this->ch, CURLOPT_HEADER, $header); curl_setopt($this->ch, CURLOPT_NOBODY, $nobody); curl_setopt($this->ch, CURLOPT_POST, true); curl_setopt($this->ch, CURLOPT_POSTFIELDS, http_build_query($data)); return curl_exec($this->ch); }}const Login_URL = 'http://211.64.47.129/default_ysdx.aspx';$http = new HttpClient(tempnam('./temp','cookie'));$html = $http->Get(Login_URL);//先請求登陸頁面, 擷取 __VIEWSTATEpreg_match('/name="__VIEWSTATE" value="(.+?)"/', $html, $vs);if(count($vs) !== 2){ echo '擷取viewstate失敗'; exit();}//構造登陸時的資料$data = array( '__VIEWSTATE'=>$vs[1],//__VIEWSTATE 'TextBox1'=>'username',//修改此處的使用者 'TextBox2'=>'password',//和密碼 'RadioButtonList1'=>'學生',//以及身份類型 'Button1'=>' 登入 ');$html = $http->Post(Login_URL, $data);preg_match('/language=\'javascript\'>alert\(\'(.+?)\'\);/', $html, $err);//檢測是否出錯, 如果有出錯, 則顯示錯誤資訊, 然後退出if(count($err) === 2){ echo $err[1]; exit();}$sn = '123123';//學號$html = $http->Get('http://211.64.47.129/xs_main.aspx?xh='. $sn);preg_match('/
\s*(.*)<\/li>/', $html, $result);var_dump($result);
https://github.com/lndj/Lcrawl/tree/dev
一隻優雅的正方教務系統爬蟲。