Copy CodeThe code is as follows:
/*
Author:ssh_kobe
date:20110602
Shortage: If the picture path in the page is not an absolute path, you cannot crawl
*/
Set_time_limit (0);//crawl is not limited by time
$URL = ' http://pp.baidu.com/';//any URL
Get_pic ($URL);
function Get_pic ($pic _url) {
Get Picture binary stream
$data =curlget ($pic _url);
/* Get a picture link using regular expressions */
$pattern _src = '/<[img| Img].*?src=[\ ' |\ "] (. *? (?: [\.gif|\.jpg])) [\ ' |\ '].*? [\/]?>/';
$num = Preg_match_all ($pattern _src, $data, $match _src);
Get an array of images $arr _src= $match _src[1];//
Get_name ($arr _src);
echo "
Finished!!! ";
return 0;
}
/* Get the picture type and save it to the same directory as the file */
function get_name ($pic _arr)
{
Type of picture
$pattern _type = '/(/. ( jpg|bmp|jpeg|gif|png))/';
foreach ($pic _arr as $pic _item) {//loops out the address of each picture
$num = Preg_match_all ($pattern _type, $pic _item, $match _type);
$pic _name = Get_unique (). Name of microsecond timestamp $match _type[1][0];//Change
Save a picture as a stream
$write _fd = @fopen ($pic _name, "WB");
@fwrite ($write _fd, Curlget ($pic _item));
@fclose ($write _fd);
echo "[ok]..!";
}
return 0;
}
Get a unique ID through microsecond time
function Get_unique () {
List ($msec, $sec) = Explode ("", Microtime ());
Return $sec. Intval ($msec *1000000);
}
Crawl Web content
function Curlget ($url) {
$url =str_replace (' & ', ' & ', $url);
$curl = Curl_init ();
curl_setopt ($curl, Curlopt_url, $url);
curl_setopt ($curl, Curlopt_header, false);
curl_setopt ($curl, Curlopt_referer, $url);
curl_setopt ($curl, Curlopt_useragent, "mozilla/4.0 (compatible; MSIE 6.0; seaport/1.2; Windows NT 5.1; SV1; infopath.2) ");
curl_setopt ($curl, Curlopt_cookiejar, ' cookie.txt ');
curl_setopt ($curl, Curlopt_cookiefile, ' cookie.txt ');
curl_setopt ($curl, Curlopt_returntransfer, 1);
curl_setopt ($curl, curlopt_followlocation, 0);
$values = curl_exec ($curl);
Curl_close ($curl);
return $values;
}
?>
http://www.bkjia.com/PHPjc/324220.html www.bkjia.com true http://www.bkjia.com/PHPjc/324220.html techarticle Copy the code as follows: PHP/* author:ssh_kobe date:20110602 shortage: If the picture path in the page is not an absolute path, you cannot crawl */set_time_limit (0);//crawl is not affected ...