PHP's curl can be used to simulate a variety of HTTP requests, which is the basis for PHP to do web crawlers, but also for the interface API calls. This time someone is going to ask: why don't you file_get_contents?
Curl performs better than it, and can do more complicated operations than just fetching page data.
Here are some common functions.
curl_init
Initialize a Curl Dialog
curl_setopt
Set the curl parameter, which is the transfer option
curl_exec
Execute request
curl_close
Close a Curl Dialog
It's mostly the top four.
Curl_errno returns the last error code, PHP has defined many error enumeration codes
Let's go directly to the example below, the explanations are in the comments
1. Download a webpage on the network and replace "Baidu" in the content with "cock wire" after output
<?php/** * Example Description: Download a webpage on the network and replace "Baidu" in the content with "cock silk" after output */$curlobj = Curl_init (); Initialize curl_setopt ($curlobj, Curlopt_url, "http://www.baidu.com"); Set the Urlcurl_setopt ($curlobj, Curlopt_returntransfer, true) to access the Web page; Do not print directly after execution $output=curl_exec ($curlobj); Executive Curl_close ($curlobj); Close Curlecho str_replace ("Baidu", "Dick Silk", $output);? >
2. Query the current weather in Beijing by calling WebService
<?php/** * Example Description: By calling WebService query the current weather in Beijing * * $data = ' thecityname= beijing '; $curlobj = Curl_init (); curl_setopt ($curlobj, Curlopt_url, "http://www.webxml.com.cn/WebServices/WeatherWebService.asmx/ Getweatherbycityname "); curl_setopt ($curlobj, Curlopt_header, 0); curl_setopt ($curlobj, Curlopt_returntransfer, 1); curl_setopt ($curlobj, Curlopt_post, 1); curl_setopt ($curlobj, Curlopt_postfields, $data); curl_setopt ($curlobj, Curlopt_httpheader, Array ("application/x-www-form-urlencoded; Charset=utf-8 ", " Content-length: ". strlen ($data)) ); $rtn = curl_exec ($curlobj); if (!curl_errno ($curlobj)) { //$info = Curl_getinfo ($curlobj); Print_r ($info); echo $rtn; } else { echo ' Curl error: '. Curl_error ($curlobj);} Curl_close ($curlobj);? >
3. Simulates a URL that needs to be signed in and crawls the contents of a Web page
<?php/** * Example Description: Simulates the URL that needs to be logged in and crawls the contents of the webpage */$data =array (' username ' = ' promonkey ', ' password ' = ' 1q2w3e ', ' rememb Er ' =>1); $data = ' username=zjzhoufy@126.com&password=1q2w3e&remember=1 '; $curlobj = Curl_init (); Initialize curl_setopt ($curlobj, Curlopt_url, "Http://www.imooc.com/user/login"); Set the Urlcurl_setopt ($curlobj, Curlopt_returntransfer, true) to access the Web page; Do not print directly//cookie-related settings after execution, this part of the setup needs to set Date_default_timezone_set (' PRC ') before all sessions begin; When using cookies, you must first set the time zone curl_setopt ($curlobj, curlopt_cookiesession, TRUE); curl_setopt ($curlobj, Curlopt_header, 0); curl_setopt ($curlobj, curlopt_followlocation, 1); This allows Curl to support page links to jump curl_setopt ($curlobj, Curlopt_post, 1); curl_setopt ($curlobj, Curlopt_postfields, $data); curl_setopt ($curlobj, Curlopt_httpheader, Array ("application/x-www-form-urlencoded; Charset=utf-8 "," Content-length: ". strlen ($data))); Curl_exec ($curlobj); Executive curl_setopt ($curlobj, Curlopt_url, "Http://www.imooc.com/space/index"); curl_seTopt ($curlobj, curlopt_post, 0); curl_setopt ($curlobj, Curlopt_httpheader, Array ("Content-type:text/xml")); $output =curl_exec ($curlobj); Executive Curl_close ($curlobj); Close Curlecho $output;? >
4. Login website Information crawl and download personal Space page + custom implementation page link jump crawl
<?php/** * Example Description: Login website information crawl and download personal Space page * Custom implementation page link Jump crawl * */$data = ' username=demo_peter@126.com&password=123qwe &remember=1 '; $curlobj = Curl_init (); Initialize curl_setopt ($curlobj, Curlopt_url, "Http://www.imooc.com/user/login"); Set the Urlcurl_setopt ($curlobj, Curlopt_returntransfer, true) to access the Web page; Do not print directly//cookie-related settings after execution, this part of the setup needs to set Date_default_timezone_set (' PRC ') before all sessions begin; When using cookies, you must first set the time zone curl_setopt ($curlobj, curlopt_cookiesession, TRUE); curl_setopt ($curlobj, Curlopt_header, 0); Comment out this line, because this setting must turn off Safe mode and turn off Open_basedir, which is disadvantageous to server security//curl_setopt ($curlobj, curlopt_followlocation, 1); curl_setopt ($curlobj, Curlopt_post, 1); curl_setopt ($curlobj, Curlopt_postfields, $data); curl_setopt ($curlobj, Curlopt_httpheader, Array ("application/x-www-form-urlencoded; Charset=utf-8 "," Content-length: ". strlen ($data))); Curl_exec ($curlobj); Executive curl_setopt ($curlobj, Curlopt_url, "Http://www.imooc.com/space/index") curl_setopt ($curlobj, curlopt_post, 0); curl_sEtopt ($curlobj, Curlopt_httpheader, Array ("Content-type:text/xml")); $output =curl_redir_exec ($curlobj); Executive Curl_close ($curlobj); Close Curlecho $output;/** * Custom Implementation page link jump crawl */function curl_redir_exec ($ch, $debug = "") {static $curl _loops = 0; static $curl _max_loops = 20; if ($curl _loops++ >= $curl _max_loops) {$curl _loops = 0; return FALSE; } curl_setopt ($ch, Curlopt_header, true); The header is enabled to crawl to the new URL redirected to curl_setopt ($ch, Curlopt_returntransfer, true); $data = curl_exec ($ch); The content returned by the split $h _len = Curl_getinfo ($ch, curlinfo_header_size); $header = substr ($data, 0, $h _len); $data = substr ($data, $h _len-1); $http _code = Curl_getinfo ($ch, Curlinfo_http_code); if ($http _code = = 301 | | $http _code = = 302) {$matches = array (); Preg_match ('/location: (. *?) \n/', $header, $matches); $url = @parse_url (Trim (Array_pop ($matches))); Print_r ($url); if (! $url) { Couldn ' t process the URL to redirect to $curl _loops = 0; return $data; } $last _url = Parse_url (Curl_getinfo ($ch, Curlinfo_effective_url)); if (!isset ($url [' scheme ')) $url [' scheme '] = $last _url[' scheme ']; if (!isset ($url [' Host ']) $url [' host '] = $last _url[' host ']; if (!isset ($url [' path ')) $url [' path '] = $last _url[' path ']; $new _url = $url [' scheme ']. '://' . $url [' Host ']. $url [' Path ']. (Isset ($url [' query '])? " $url [' query ']: '); curl_setopt ($ch, Curlopt_url, $new _url); Return curl_redir_exec ($ch); } else {$curl _loops=0; return $data; }}?>
Download a file from the FTP server to a local
<?php/** * Example Description: Upload the local file to the FTP server */$curlobj = Curl_init (); $localfile = ' ftp01.php '; $fp = fopen ($localfile, ' R '); curl_setopt ($curlobj, Curlopt_url, "ftp://192.168.1.100/ftp01_ Uploaded.php "); curl_setopt ($curlobj, Curlopt_header, 0); curl_setopt ($curlobj, Curlopt_returntransfer, 1); curl_setopt ($curlobj, Curlopt_timeout, 300); Times out after 300scurl_setopt ($curlobj, Curlopt_userpwd, "peter.zhou:123456");//ftp user name: Password// Upload and download is mainly under the sub three parameters different curl_setopt ($curlobj, curlopt_upload, 1); curl_setopt ($curlobj, Curlopt_infile, $fp); curl_setopt ( $curlobj, Curlopt_infilesize, FileSize ($localfile)); $rtn = Curl_exec ($curlobj); Fclose ($FP); if (!curl_errno ($curlobj)) { echo "uploaded successfully."; } else { echo ' curl error: '. Curl_error ($ Curlobj);} Curl_close ($curlobj);? >
6. Download an HTTPS resource above the network
<?php/** * Example Description: Download an HTTPS resource above the network */$curlobj = Curl_init (); Initialize curl_setopt ($curlobj, Curlopt_url, "https://ajax.aspnetcdn.com/ajax/jquery.validate/1.12.0/ Jquery.validate.js "); Set the Urlcurl_setopt ($curlobj, Curlopt_returntransfer, true) to access the Web page; Do not print directly after execution//set HTTPS support Date_default_timezone_set (' PRC '); When using cookies, you must first set the time zone curl_setopt ($curlobj, Curlopt_ssl_verifypeer, 0); Check the source of the authentication certificate check the SSL encryption algorithm exists from the certificate curl_setopt ($curlobj, Curlopt_ssl_verifyhost, 2); $output =curl_exec ($curlobj); Executive Curl_close ($curlobj); Close Curlecho $output;? >