To use php multithreading, you must first use PHP5.3 or later and install the pthreadsPHP extension to enable PHP to support multithreading. to install the extension, please use Baidu. PHP extension Download: github.com... to use php multithreading, you must first use PHP5.3 or later and install the pthreads PHP extension to enable PHP to support multithreading. to install the extension, please use Baidu.
PHP extensions: https://github.com/krakjoe/pthreads
PHP Manual: http://php.net/manual/zh/book.pthreads.php
After the extension is installed, you can use multiple threads to paste the following code that crawls the content of the Baidu online disk through search results:
Start ();} foreach ($ thread_array as $ thread_array_key => $ thread_array_value) {// check whether the thread execution ends while ($ thread_array [$ thread_array_key]-> isRunning ()) {usleep (10);} if ($ thread_array [$ thread_array_key]-> join () {// if the execution ends, get the result $ temp = $ thread_array [$ thread_array_key]-> data; if ($ temp! = Null) $ infos ['res'] [] = $ temp; }}$ infos ['Page'] = $ ret ['Page']; $ infos ['status'] = "1";} else $ infos = null; return $ infos ;} /*** retrieve Baidu search result list URL ** @ param string $ url * search result page URL * @ param int $ format * Default $ format = 0, get the default address; $ format = 1 get the real address after the jump * @ return NULL multitype: array () */function BaiduSRLinksGet ($ url, $ format = 0) {$ html = CurlLoad :: htmlGet ($ url); // Obtain the page if ($ html = = Null) return null; try {preg_match_all ("/" url ":"(? <Links>. *) "}/", $ html, $ rets); // filter if (! Array_key_exists ('links ', $ rets) // If the array does not contain the links key name, return null for failed retrieval; $ ret = array (); if ($ format = 1) {$ number = count ($ rets ['link']); for ($ I = 0; $ I <$ number; $ I ++) {$ headr_temp = CurlLoad: Get_Headers ($ rets ['link'] [$ I], 1 ); // Obtain the real address through headr if (array_key_exists ("Location", $ headr_temp) $ ret ['link'] [$ I] = $ headr_temp ['location']; else $ ret ['link'] = $ rets ['link'];} else $ ret ['Lin Ks '] = $ rets ['link']; preg_match_all ('/href = "? /S? Wd = site % 3Apan.baidu.com % 20 (?
. + ?) & Ie = UTF-8 ">/', $ html, $ out); unset ($ out ['URL'] [0]); $ number = count ($ out ['URL']); for ($ I = 1; $ I <$ number; $ I ++) {preg_match_all ('/& pn = (. *)/', $ out ['URL'] [$ I], $ temp ); $ ret ['Page'] [$ temp [1] [0]/10] = base64_encode ($ out ['URL'] [$ I]);} return $ ret;} catch (Exception $ e) {WriteLog ($ e); return null ;}} /*** obtain Baidu online storage resource information ** @ param string $ url * URL of the online storage resource page * @ return NULL array */function PanInfoGet ($ u Rl) {$ html = CurlLoad: HtmlGet ($ url); // Obtain the page if ($ html = null) return null; try {if (preg_match_all ("/file name :(?
. *) File size :(?
. *) Sharer :(?
. *) Sharing Time :(?
. *) Number of downloads :(?
[0-9] +)/", $ html, $ ret) = 0) return null; $ rets ['name'] = $ ret ['name'] [0]; $ rets ['size'] = $ ret ['size'] [0]; $ rets ['user'] = $ ret ['user'] [0]; $ rets ['Date'] = $ ret ['Date'] [0]; $ rets ['Number'] = $ ret ['Number'] [0]; $ rets ['link'] = $ url; return $ rets ;} catch (Exception $ e) {WriteLog ($ e); return null ;}} function WriteLog ($ str) {$ file = fopen (".. /error. log "," a + "); fwrite ($ file," Warning :". date ("Y/m/ D H: I: s "). ":". $ str. "rn"); fclose ($ file);}/*** multi-Thread capturing object * @ author MuXi **/class baidu_thread_run extends Thread {public $ url; public $ data; public function _ construct ($ url) {$ this-> url = $ url;} public function run () {if ($ url = $ this-> url )) {$ this-> data = PanInfoGet ($ url); // thread execution method }}?>