Let's talk about the next idea. using PHP's curl function to store cookies, google's search page cannot be opened using file_get_connents. you must simulate the browser completely. baidu is different. you can directly use file_get_conntens to capture the page, then we can use the regular expression to process it. we will not list it here... syntaxHighli
Let's talk about the next idea. using PHP's curl function to store cookies, google's search page cannot be opened using file_get_connents. you must simulate the browser completely. baidu is different. you can directly use file_get_conntens to capture the page, then you can use the regular expression to process it. Baidu will not be listed here.
Header ("Content-Type: text/html; charset = utf-8 ");
Function ggsearch ($ url_s, $ keyword, $ page = 1 ){
$ EnKeyword = urlencode ($ keyword );
$ RsState = false;
$ Page_num = ($ page-1) * 10;
If ($ page <= 10 ){
$ Interface = "eth0:". rand (1, 4); // avoid gg ip address
$ Cookie_file = dirname (_ FILE _). "/temp/google.txt"; // Store cookie values
$ Url = "http://www.google.com/search? Q = $ enKeyword & hl = en & prmd = imvns & ei = jpnjtvlfi8hlggexwdpm & start = $ page_num & sa = N ";
$ Ch = curl_init ();
Curl_setopt ($ ch, CURLOPT_URL, $ url );
// Curl_setopt ($ ch, CURLOPT_USERAGENT, $ _ SERVER ['http _ USER_AGENT ']); // Obtain the browser type
Curl_setopt ($ ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv: 1.9.1.2) Gecko/20090729 Firefox/3.5.2 GTB5 ");
Curl_setopt ($ ch, CURLOPT_INTERFACE, "$ interface"); // specify the access IP address
Curl_setopt ($ ch, CURLOPT_RETURNTRANSFER, 1 );
Curl_setopt ($ ch, CURLOPT_FOLLOWLOCATION, 1 );
Curl_setopt ($ ch, CURLOPT_COOKIEJAR, $ cookie_file );
$ Contents = curl_exec ($ ch );
Curl_close ($ ch );
$ Match = "! (. *) \ S + <\! -- Z -->! ";
Preg_match_all ("$ match", "$ contents", $ line );
While (list ($ k, $ v) = each ($ line [0]) {
Preg_match_all ("!] +> (.*?)! ", $ V, $ title );
$ Num = count ($ title [1]);
For ($ I = 0; $ I <$ num; $ I ++ ){
If (strstr ($ title [0] [$ I], $ url_s )){
$ RsState = true;
$ J = $ I + 1;
$ Sum = $ j + ($ page) * 10-10 );
// Echo $ contents;
Echo "keyword". $ keyword ."
". "Ranking :". ''. $ sum. ''. "####". "th ". ''. $ page. ''. "page ". "th ". ''. $ j. ''. "Name ". $ title [0] [$ I]."
";
Echo "". "click Search Results ".""."
";
Echo "";
Break;
}
}
}
Unset ($ contents );
If ($ rsState = false ){
Ggsearch ($ url_s, $ keyword, ++ $ page); // you cannot find the search page to continue searching.
}
} Else {
Echo 'keyword '. $ keyword.' This website is not ranked within 10 pages '.'
';
Echo "";
}
}
If (! Empty ($ _ POST ['submit ']) {
$ Time = explode ('', microtime ());
$ Start = $ time [0] + $ time [1];
$ More_key = trim ($ _ POST ['textarea ']);
$ Url_s = trim ($ _ POST ['URL']);
If (! Empty ($ more_key )&&! Empty ($ url_s )){
/* Determine the rules of input characters */
If (strstr ($ more_key, "\ n ")){
$ Exkey = explode ("\ n", $ more_key );
}
If (strstr ($ more_key, "| ")){
$ Exkey = explode ("|", $ more_key );
}
If (! Strstr ($ more_key, "\ n ")&&! Strstr ($ more_key, "| ")){
$ Exkey = array ($ more_key );
}
/* Determine whether there is anything like www or http */
If (count (explode ('.', $ url_s) <= 2 ){
$ Url = ltrim ($ url_s, 'http: // www ');
$ Url = 'www. '. $ url_s;
}
Foreach ($ exkey as $ keyword ){
// $ Keyword;
Ggsearch ($ url_s, $ keyword );
}
$ Endtime = explode ('', microtime ());
$ End = $ endtime [0] + $ endtime [1];
Echo '';
Echo 'Program Running time :';
Echo $ end-$ start;
// Die ();
}
}
?>
Capture rankings
From Shine's holy heaven-Min Chen 〃