CopyCode The Code is as follows: // collect html
Function getwebcontent ($ URL ){
$ CH = curl_init ();
$ Timeout = 10;
Curl_setopt ($ ch, curlopt_url, $ URL );
Curl_setopt ($ ch, curlopt_returntransfer, 1 );
Curl_setopt ($ ch, curlopt_connecttimeout, $ timeout );
Curl_setopt ($ ch, curlopt_followlocation, 1 );
$ Contents = trim (curl_exec ($ ch ));
Curl_close ($ ch );
Return $ contents;
}
// obtain the title and URL
$ string =
getwebcontent ('HTTP: // www. ***. COM/learn/zhunbeihuaiyun/jijibeiyun/2 ');
// regular expression match
obtain the title and address
preg_match_all ("/
(. *) <\/A>/", $ string, $ out, preg_set_order);
foreach ($ out as $ key => $ value) {
$ article ['title'] [] = $ out [$ key] [2];
$ article ['link'] [] = "http: // www. ***. COM/learn/article /". $ out [$ key] [1];
}< br> // Based on the URL Obtain Article content
foreach ($ article ['link'] as $ key => $ value) {
$ content_html = getwebcontent ($ article ['link'] [$ key]);
preg_match ("/
[\ s | \ s] *? <\/Div>/", $ content_html, $ matches);
$ article [content] [$ key] = $ matches [0];
}< br> // the file cannot be saved without transcoding.
foreach ($ article [title] as $ key => $ value) {
$ article [title] [$ key] = iconv ('utf-8', 'gbk', $ value ); // transcoding
}< br> // save the file
$ num = count ($ article ['title']);
for ($ I = 0; $ I <$ num; $ I ++) {
file_put_contents ("too many article=title={ I }.txt ", $ article ['content'] [$ I]);
}< BR >?>