代碼如下:
";$curl = curl_init();$timeout = 5;curl_setopt($curl, CURLOPT_URL, $login_url);curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);curl_setopt($curl, CURLOPT_CONNECTTIMEOUT, $timeout);curl_setopt($curl,CURLOPT_COOKIEJAR,$cookie_file); //擷取COOKIE並儲存$contents = curl_exec($curl);curl_close($curl);//3.取出驗證碼echo "COOKIE擷取完成,正在取驗證碼...
";$curl = curl_init();curl_setopt($curl, CURLOPT_URL, $verify_code_url);curl_setopt($curl, CURLOPT_COOKIEJAR, $cookie_file);//儲存cookiecurl_setopt($curl, CURLOPT_COOKIEFILE, $cookie_file);//使用cookiecurl_setopt($curl, CURLOPT_HEADER, 0);curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);$img = curl_exec($curl);curl_close($curl);$codename = time();$fp = fopen("/home/wwwroot/default/tesseract/Test/images/$codename.png","w");echo "";fwrite($fp,$img);fclose($fp);//開始識別驗證碼echo "驗證碼取出完成,正在休眠,正在識別驗證碼...
";passthru("/usr/bin/tesseract /home/wwwroot/default/tesseract/Test/images/$codename.png /home/wwwroot/default/tesseract/Test/images/$codename");$code = file_get_contents("./images/$codename.txt");echo "驗證碼成功取出:$code
";echo "正在準備類比登入...
";$post_url = "http://210.32.33.91:8080/reader/redr_verify.php";//為安全性,此處密碼不提供。$post = "number=1111111&passwd=111111&captcha=$code&select=cert_no&returnUrl=";$curl = curl_init();curl_setopt($curl, CURLOPT_URL, $post_url);curl_setopt($curl, CURLOPT_HEADER, false);curl_setopt($curl, CURLOPT_RETURNTRANSFER,1);curl_setopt($curl, CURLOPT_POSTFIELDS, $post);curl_setopt($curl, CURLOPT_COOKIEJAR, $cookie_file);curl_setopt($curl, CURLOPT_COOKIEFILE, $cookie_file);$result=curl_exec($curl);curl_close($curl);echo str_replace('captcha.php','http://210.32.33.91:8080/reader/captcha.php',$result);
回複內容:
代碼如下:
";$curl = curl_init();$timeout = 5;curl_setopt($curl, CURLOPT_URL, $login_url);curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);curl_setopt($curl, CURLOPT_CONNECTTIMEOUT, $timeout);curl_setopt($curl,CURLOPT_COOKIEJAR,$cookie_file); //擷取COOKIE並儲存$contents = curl_exec($curl);curl_close($curl);//3.取出驗證碼echo "COOKIE擷取完成,正在取驗證碼...
";$curl = curl_init();curl_setopt($curl, CURLOPT_URL, $verify_code_url);curl_setopt($curl, CURLOPT_COOKIEJAR, $cookie_file);//儲存cookiecurl_setopt($curl, CURLOPT_COOKIEFILE, $cookie_file);//使用cookiecurl_setopt($curl, CURLOPT_HEADER, 0);curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);$img = curl_exec($curl);curl_close($curl);$codename = time();$fp = fopen("/home/wwwroot/default/tesseract/Test/images/$codename.png","w");echo "";fwrite($fp,$img);fclose($fp);//開始識別驗證碼echo "驗證碼取出完成,正在休眠,正在識別驗證碼...
";passthru("/usr/bin/tesseract /home/wwwroot/default/tesseract/Test/images/$codename.png /home/wwwroot/default/tesseract/Test/images/$codename");$code = file_get_contents("./images/$codename.txt");echo "驗證碼成功取出:$code
";echo "正在準備類比登入...
";$post_url = "http://210.32.33.91:8080/reader/redr_verify.php";//為安全性,此處密碼不提供。$post = "number=1111111&passwd=111111&captcha=$code&select=cert_no&returnUrl=";$curl = curl_init();curl_setopt($curl, CURLOPT_URL, $post_url);curl_setopt($curl, CURLOPT_HEADER, false);curl_setopt($curl, CURLOPT_RETURNTRANSFER,1);curl_setopt($curl, CURLOPT_POSTFIELDS, $post);curl_setopt($curl, CURLOPT_COOKIEJAR, $cookie_file);curl_setopt($curl, CURLOPT_COOKIEFILE, $cookie_file);$result=curl_exec($curl);curl_close($curl);echo str_replace('captcha.php','http://210.32.33.91:8080/reader/captcha.php',$result);
2016/1/25 14:51更新
Linux的話captcha目錄,cookies目錄給寫入權限
你一步步斷點調試,看你那個程式產生的結果是多少,圖片是多少,
代碼在:https://github.com/rainwsy/sf/tree/master/library-OCR-login
更新:
你應該
1.將驗證碼存下來,跟文字結果對比下,
2.對比每次的session_id是否一致
3.CURLOPT_COOKIEJAR第一次用的時候存session_id就可以了,後面的操作用CURLOPT_COOKIEFILE來讀取session_id,其實你可以對比下幾次請求返回header頭中的session_id是否一致
我的驗證碼識別結果:
寫了個DEMO:
指出幾個問題,擷取session在擷取驗證碼的那一步一併擷取就OK,沒必要先取得session再取驗證碼,
當看到帳號密碼隱藏的時候我再想這是給校友回答的嗎?
getCaptcha();/* 開始登陸 */$username = '使用者名稱';$passwd = '密碼';$postArray = [ 'number' => $username, 'passwd' => $passwd, 'captcha' => $captcha, 'select' => 'cert_no', 'returnUrl' => ''];$postData = http_build_query($postArray);echo post($loginUrl, $postData, $cookie_file);function get($url, $cookie_file, $isCookiesSave = false){ // 初始化 $curl = curl_init($url); $header = array(); $header[] = 'User-Agent: Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36'; curl_setopt($curl, CURLOPT_HTTPHEADER, $header); // 不輸出header頭資訊 curl_setopt($curl, CURLOPT_HEADER, 0); if ($isCookiesSave) { curl_setopt($curl, CURLOPT_COOKIEJAR, $cookie_file); // 儲存cookies } else { curl_setopt($curl, CURLOPT_COOKIEFILE, $cookie_file); } // 儲存到字串而不是輸出 curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); // 是否抓取跳轉後的頁面 curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 1); $info = curl_exec($curl); curl_close($curl); return $info;}function post($url, $data, $cookie_file){ // 初始化 $curl = curl_init($url); $header = array(); $header[] = 'User-Agent: Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36'; curl_setopt($curl, CURLOPT_HTTPHEADER, $header); // 不輸出header頭資訊 curl_setopt($curl, CURLOPT_HEADER, 0); // 儲存到字串而不是輸出 curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); curl_setopt($curl, CURLOPT_COOKIEFILE, $cookie_file); // post資料 curl_setopt($curl, CURLOPT_POST, 1); // 請求資料 curl_setopt($curl, CURLOPT_POSTFIELDS, $data); // 是否抓取跳轉後的頁面 curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 1); $response = curl_exec($curl); curl_close($curl); return $response;}
網上的評論是 識別率低