Php: getting webpage titles and content function _ PHP Tutorial
Last Update:2017-05-13
Source: Internet
Author: User
Php retrieves the webpage title and content functions. Sometimes we need to get the title and content of the webpage, which is a collection function. here we will share the code to help you easily: functiongetPageContent ($ url) {$ urlhttp: sometimes we need to obtain the title and content of the webpage, which is a collection function. here we will share it with you to easily find the desired code: function getPageContent ($ url) {// $ url =' http://www.ttphp.com ; $ Pageinfo = array (); $ pageinfo [content_type] = ''; $ pageinfo [charset] =''; $ pageinfo [title] = ''; $ pageinfo [description] = ''; $ pageinfo [keywords] =''; $ pageinfo [body] = ''; $ pageinfo ['httpcode'] = 200; $ pageinfo ['all'] = ''; $ ch = curl_init (); curl_setopt ($ ch, CURLOPT_USERAGENT," Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0) "); curl_setopt ($ ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt ($ ch, Iterator, 0); curl_setopt ($ ch, iterator, 0); curl_setopt ($ ch, CURLOPT_TIMEOUT, 8); curl_setopt ($ ch, CURLOPT_FILETIME, 1); curl_setopt ($ ch, CURLOPT_FOLLOWLOCATION, 1); // curl_setopt ($ ch, CURLOPT_HEADER, 1); curl_setopt ($ ch, CURLOPT_URL, $ url); $ curl_start = microtime (true ); $ store = curl_exec ($ ch); $ curl_time = microtime (true)-$ curl_start; if (curl_error ($ ch) {$ page Info ['httpcode'] = 505; // gate way error echo 'curl error :'. curl_error ($ ch ). "/n"; return $ pageinfo;} // print_r (curl_getinfo ($ ch); $ pageinfo ['httpcode'] = curl_getinfo ($ ch, CURLINFO_HTTP_CODE ); // echo curl_getinfo ($ ch, CURLINFO_CONTENT_TYPE ). "/n"; $ pageinfo [content_type] = curl_getinfo ($ ch, CURLINFO_CONTENT_TYPE); if (intval ($ pageinfo ['httpcode']) <> 200 or! Preg_match ('@ text/html @', curl_getinfo ($ ch, CURLINFO_CONTENT_TYPE) {// print_r (curl_getinfo ($ ch); // exit; return $ pageinfo ;} preg_match ('/charset = ([^/s/n/r] +)/I', curl_getinfo ($ ch, CURLINFO_CONTENT_TYPE), $ matches ); // Obtain charset if (trim ($ matches [1]) from the header) {$ pageinfo [charset] = trim ($ matches [1]);} // echo $ pageinfo [charset]; // exit; curl_close ($ ch); // echo $ store; // remove javascript $ store = preg_replace ("/ $ Store = preg_replace ("/ /SmUi ",'', $ store); // remove