To all friends who want to learn regular expressions and collect regular expressions .? Phpfunctionget_url_content ($ Url, $ Methodc) {introduce the required language encoding. if not, the default value is utf-8. do not worry about. global $ Charset; $ Urlarrparse_url ($ Url); if you check
Function get_url_content ($ Url, $ Method = c ){
// Introduce the required language encoding. if not, the default value is utf-8. do not worry.
Global $ Charset;
$ Urlarr = parse_url ($ Url );
// If the domain name cannot be detected, return.
If (! Isset ($ Urlarr [host]) {
Return false;
}
// We can intelligently define header times.
Foreach (@ getallheaders () as $ key => $ val ){
$ Key = Host & $ val = $ Urlarr [host];
$ Key = Referer & $ val = http: //. $ Urlarr [host];
$ Str. = "$ key: $ val ,";
}
// Virtual routes.
! Eregi (Referer, $ str) & $ str. = "Referer: http: // {$ Urlarr [host]},";
// After modification, basically, the origin site and the host site are Url sites.
$ Header = array (trim ($ str ));
// The following is only the program used for collection.
If ($ Method = f & function_exists (file_get_contents )){
$ Opts = array (
Http => array (
Method => "GET ",
Header => $ Header,
)
);
$ CxContext = stream_context_create ($ opts );
$ File_contents = @ file_get_contents ($ Url, false, $ cxContext );
} Elseif ($ Method === c & function_exists (curl_init )){
$ Ch = curl_init ();
$ Timeout = 5;
Curl_setopt ($ Ch, CURLOPT_HTTPHEADER, $ Header );
Curl_setopt ($ Ch, CURLOPT_URL, $ Url );
Curl_setopt ($ Ch, CURLOPT_RETURNTRANSFER, 1 );
Curl_setopt ($ Ch, CURLOPT_CONNECTTIMEOUT, $ Timeout );
$ File_contents = curl_exec ($ Ch );
Curl_close ($ Ch );
}
// To make the style display beautiful, add a target to it.
$ File_contents = str_replace (,"", $ File_contents );
// Process the most common codes. if the target website has no codes, the default value is GBK.
! Preg_match (/charset = ([^ <> "] *)"/isU, $ file_contents, $ lang) & $ lang [1] = GBK;
Function_exists (mb_convert_encoding) & $ file_contents = mb_convert_encoding ($ file_contents, empty ($ Charset )? UTF-8: $ Charset, $ lang [1]);
// Cancel part of the code;
Unset ($ Url, $ lang, $ Timeout, $ Urlarr, $ Charset );
Return $ file_contents;
}
// Test the file_get_contents method
HEADER ("CONTENT-TYPE: TEXT/HTML; CHARSET = UTF-8 ");
// Http://www.xtzj.com/read-htm-tid-347550.html which is not collected.
$ File = get_url_content ("http://www.hao123.com", f );
$ File = strip_tags ($ file ,);
Preg_match_all (/(http: [^ "<>] *)>/isU, $ file, $ link); unset ($ link [0]);
$ Link = $ link [1];
// We can simulate the data acquisition. change the number by yourself. 0-151. the curl method is used below.
$ X = 10;
$ File = get_url_content ($ link [$ x]);
Echo $ file;
?>
Write instructions, notes ..
If you have any questions, I 'd like to collect more information.
Original address: http://bbs.phpchina.com/viewthread.php? Tid = 99263
Http://www.bkjia.com/PHPjc/486604.htmlwww.bkjia.comtruehttp://www.bkjia.com/PHPjc/486604.htmlTechArticle? Php function get_url_content ($ Url, $ Method = c) {// introduce the required language encoding. if not, the default value is utf-8. do not worry. global $ Charset; $ Urlarr = parse_url ($ Url); // If you check...