Php code for multi-thread QQ number crawler
'Set NAMES \ 'utf8 \ '', PDO: MYSQL_ATTR_COMPRESS => true, PDO: ATTR_PERSISTENT => true); */} protected function getInstance () {return self:: $ dbh ;}/ * the collectable class implements machinery for Pool: collect */class Crawler extends Stackable {public $ depth = 3; private static $ level = 0; public function _ construct ($ qq) {$ this-> qq = $ qq;} public function run () {try {$ dbh = $ this-> worker-> getInstance (); $ this-> recursion (array ($ this-> qq);} catch (PDOException $ e) {$ error = sprintf ("% s, % s \ n", $ mobile, $ id); file_put_contents ("mobile_error.log", $ error, FILE_APPEND );} // printf ("runtime: % s, % s \ n", date ('Y-m-d H: I: s '), $ this-> worker-> getThreadId (); // $ lst = $ this-> qzone ($ this-> qq); // print_r ($ lst );} public function recursion ($ qqs) {if (self: $ level <= $ this-> depth) {self ::$ level ++;} else if (self :: $ level> 0) {self: $ level --;} printf ("Level: % s \ n", self: $ level); // sleep (1 ); usleep (mt_rand (Random, interval 00); if (self ::$ level >=$ this-> depth) {return;} foreach ($ qqs as $ uin) {$ lst = $ this-> qzone ($ uin); print_r ($ lst); $ this-> recursion ($ lst) ;}} public function qzone ($ qq) {$ url =' http://m.qzone.com/ Mqz_get_visitor? G_tk = 1191852101 & res_mode = 0 & res_uin = '. $ qq. '& offset = 0 & count = 100 & page = 1 & format = json & t = 1401762986882 & sid = dODKVcYv6azjN87cxXQ5mao1xgakYjHg18c8aa5e0201 % 3D % 3D'; $ snoopy = new Snoopy; // need an proxy? // $ Snoopy-> proxy_host = "my. proxy. host "; // $ snoopy-> proxy_port =" 8080 "; // set browser and referer: $ snoopy-> agent =" Mozilla/4.0 (compatible; MSIE 6.0; windows NT 5.1) "; $ snoopy-> referer =" http://m.qzone.com/ "; // Set some cookies: // $ snoopy-> cookies [" SessionID "] = '000000 '; // $ snoopy-> cookies ["favoriteColor"] = "blue"; // set an raw-header: $ snoopy-> rawheaders ["Pragma"] = "no-cache"; // set some internal variables: $ snoopy-> maxredirs = 2; $ snoopy-> offsiteok = false; $ snoopy-> expandlinks = false; // set username and password (optional) // $ snoopy-> user = "joe "; // $ snoopy-> pass = "bloe"; // fetch the text of the website www.google.com: if ($ snoopy-> fetchtext ($ url) {// other methods: fetch, fetchform, fetchlinks, submittext and submitlinks // response code: // print "response code :". $ snoopy-> response_code."
\ N "; // print the headers: // print"Headers:
"; // While (list ($ key, $ val) = each ($ snoopy-> headers) {// print $ key.": ". $ val ."
\ N "; //} // print the texts of the website: // print_r (json_decode ($ snoopy-> results); $ results = array (); $ tmp = json_decode ($ snoopy-> results); if ($ tmp) {if (property_exists ($ tmp, 'data ')) {foreach ($ tmp-> data-> list as $ lst) {$ results [] = $ lst-> uin ;}} return ($ results );} else {print "Snoopy: error while fetching document :". $ snoopy-> error. "\ n" ;}}$ pool = new Pool (100, \ CrawlerWorker: class, []); # foreach (r Ange (1000,100 000) as $ number) {# $ pool-> submit (new Crawler ($ number )); #}$ pool-> submit (new Crawler ('20140901'); $ pool-> submit (new Crawler ('20140901 ')); // $ pool-> submit (new Crawler ('XXX ')); // $ pool-> submit (new Crawler ('XXX'); // and so on // $ pool-> submit (new Crawler ('nnn ')); $ pool-> shutdown ();?>