& Lt ;? Phpfunctioncsdn () {// $ uid collection document Category $ url & quot; http://www.csdn.net & quot; $ chcurl_init (); curl_setopt ($ ch, CURLOPT_URL, $ url ); curl_setopt ($ ch, CURLOPT_RETU
Function csdn () {// $ uid collection document Category
$ Url = "http://www.csdn.net ";
$ Ch = curl_init ();
Curl_setopt ($ ch, CURLOPT_URL, $ url );
Curl_setopt ($ ch, CURLOPT_RETURNTRANSFER, 1 );
Curl_setopt ($ ch, CURLOPT_CONNECTTIMEOUT, 5 );
Curl_setopt ($ ch, CURLOPT_ENCODING, 'utf8 ');
$ Content = curl_exec ($ ch );
Preg_match_all ("/http \: \/\ w *\. csdn \. net \/a \/\ d *\. html/", $ content, $ match );
$ Weburl = $ match [0];
$ Weburl = array_unique ($ weburl );
$ J = 0;
Foreach ($ weburl as $ I =>$ vo ){
Curl_setopt ($ ch, CURLOPT_URL, $ vo );
$ Content = curl_exec ($ ch );
Preg_match_all ("/\ (. *) \ <\/h1 \> | \/", $ content, $ match );
If (! Emptyempty ($ match [2] [1]) {
$ List [$ j] ['content'] = $ match [2] [1];
$ List [$ j] ['title'] = $ match [1] [0];
$ J ++;
}
}
Print_r ($ list );
}
?>
It is easy to see
$ List is the news collected in the form of a two-dimensional array
If you want to save it to your database, I will not explain it...
Check whether it is the same as your database article.
You can use md5 to encrypt the title and then compare it with the title of your database. if it is true, it indicates that your database has the same article.
Note that we hope you do not copy the data manually here. Please check the source code method for copying .....
Because the code on the surface seems to be different from the actual code.
If you are thinkphp, then ..www.2cto.com... it's the same as mine... the more convenient code is posted below to add the database directly, including repeated data judgment:
Function csdn ($ uid) {// $ uid collection document Category
$ Url = "http://www.csdn.net ";
$ Ch = curl_init ();
Curl_setopt ($ ch, CURLOPT_URL, $ url );
Curl_setopt ($ ch, CURLOPT_RETURNTRANSFER, 1 );
Curl_setopt ($ ch, CURLOPT_CONNECTTIMEOUT, 5 );
Curl_setopt ($ ch, CURLOPT_ENCODING, 'utf8 ');
$ Content = curl_exec ($ ch );
Preg_match_all ("/http \: \/\ w *\. csdn \. net \/a \/\ d *\. html/", $ content, $ match );
$ Weburl = $ match [0];
$ Weburl = array_unique ($ weburl );
$ J = 0;
Foreach ($ weburl as $ I =>$ vo ){
Curl_setopt ($ ch, CURLOPT_URL, $ vo );
$ Content = curl_exec ($ ch );
Preg_match_all ("/\ (. *) \ <\/h1 \> | \/", $ content, $ match );
If (! Emptyempty ($ match [2] [1]) {
$ List [$ j] ['content'] = $ match [2] [1];
$ List [$ j] ['title'] = $ match [1] [0];
$ J ++;
}
}
$ Db = M ('news ');
$ News = $ db-> where ("uid =". $ uid)-> select ();
$ Flag = true;
Foreach ($ list as $ I =>$ vo ){
Foreach ($ news as $ j =>$ value ){
If (md5 ($ value ['title']) = md5 ($ vo ['title']) {
$ Flag = false;
Break;
}
}
If ($ flag ){
$ Vo ['uid'] = $ uid;
$ Vo ['Date'] = date ('Y-j-m H: I: s ');
$ Vo ['author'] = Session: get ("admin ");
$ Vo ['iscommand'] = 1;
$ Rs = $ db-> add ($ vo );
}
$ Flag = true;
}
}
?>
From the column zouhao619