the following is the PHP source code: <?php /* Cowboy IIS log spider Crawl Record Analyzer V1.1 (PHP GB2312 version) Author: Cowboy qq:172379201 Email:17gd@163.com */ //=================================================== Header ("content-type:text/html; charset=gb2312 "); Site Log directory, note that the directory must have site users Read permissions! If you download the log to local please modify the 143 line URL for your website URL, this operation is not necessary operation, does not affect the analysis results. If you modify the file name iis.php need to modify the code ctrl+h replace all iis.php with the file name you want to modify otherwise the program will run an error. $folder = "d:/vhost/webroot/jooker82465/www/wordpress/uploads/w3svc87164023/"; Remember that you must bring a slash in the back! $pagesize = 50;//Set the number of paging bar! //========================= $type = addslashes ($_get[' type ')); if ($type) $type = Base64_decode ($type); $showfile = addslashes ($_get[' showfile ')); $page = addslashes ($_get[' page ')); if (! $page) $page = 1; //============================ Open Directory if (! $type) { if (file_exists ($folder)) { $FP =opendir ($folder); while (false!= $file =readdir ($fp)) { if ($file!= '. ' && $file!= ' ... ') { $file = "$file"; $arr _file[]= $file; } } if (Is_array ($arr _file)) { For ($i =count ($arr _file)-1; $i >=0; $i--) { $indexstr. = " <TR><TD height=\ "25\" width=\ "10%\" > ". Date (" Y-m-d ", Filectime ($folder. $arr _file[$i))." </td> <TD height=\ "25\" width=\ "10%\" align=\ "Center\" > <a href=\ "iis.php?type=". Base64_encode (Baiduspider). " &showfile= ". $arr _file[$i]." \ "> Baidu </a></td> <TD height=\ "25\" width=\ "10%\" align=\ "Center\" > <a href=\ "iis.php?type=". Base64_encode (Googlebot). " &showfile= ". $arr _file[$i]." \ "> Google (google) </a></td> <TD height=\ "25\" width=\ "10%\" align=\ "Center\" > <a href=\ "iis.php?type=". Base64_encode (Yahoo). " &showfile= ". $arr _file[$i]." \ "> Yahoo </a></td> <TD height=\ "25\" width=\ "10%\" align=\ "Center\" > <a href=\ "iis.php?type=". Base64_encode (Yodaobot). " &showfile= ". $arr _file[$i]." \ "> Youdao (Yodao) </a></td> <TD height=\ "25\" width=\ "10%\" align=\ "Center\" > <a href=\ "iis.php?type=". Base64_encode (Sosospider). " &showfile= ". $arr _file[$i]." \ "> Search (Soso) </a></td> <TD height=\ "25\" width=\ "10%\" align=\ "Center\" > <a href=\ "iis.php?type=". Base64_encode (Sogou). " &showfile= ". $arr _file[$i]." \ > Sogou (Sogou) </a></td> <TD height=\ "25\" width=\ "10%\" align=\ "Center\" > <a href=\ "iis.php?type=". Base64_encode (MSNBot). " &showfile= ". $arr _file[$i]." \ > Microsoft (MSN) </a></td> </tr> "; } } Closedir ($FP); $html = indexhtml (); $copy = Mycopy (); $html = Str_replace ("[Showlog]", $INDEXSTR, $html); $html = Str_replace ("[Copy]", $copy, $html); Echo $html; }else{ echo "This log directory does not exist or insufficient permissions, please check the settings!" "; Exit (); } }elseif ($type = = ' Baiduspider ') { Echo Show ($type, $folder, $showfile, $page, $pagesize); }elseif ($type = = ' Googlebot ') { Echo Show ($type, $folder, $showfile, $page, $pagesize); }elseif ($type = = ' Yahoo ') { Echo Show ($type, $folder, $showfile, $page, $pagesize); }elseif ($type = = ' Yodaobot ') { Echo Show ($type, $folder, $showfile, $page, $pagesize); }elseif ($type = = ' Sosospider ') { Echo Show ($type, $folder, $showfile, $page, $pagesize); }elseif ($type = = ' Sogou ') { Echo Show ($type, $folder, $showfile, $page, $pagesize); }elseif ($type = = ' MSNBot ') { Echo Show ($type, $folder, $showfile, $page, $pagesize); }
Function Show ($type, $folder, $showfile, $page, $pagesize) { if ($type = = ' Baiduspider ') { $title = ' Baidu '; }elseif ($type = = ' Googlebot ') { $title = ' Google '; }elseif ($type = = ' Yahoo ') { $title = ' Yahoo '; }elseif ($type = = ' Yodaobot ') { $title = ' Youdao '; }elseif ($type = = ' Sosospider ') { $title = ' search '; }elseif ($type = = ' Sogou ') { $title = ' Sogou '; }elseif ($type = = ' MSNBot ') { $title = ' MSN '; } if ($type && $folder && $showfile) { if (File_exists ($folder. $showfile)) { $fp = fopen ($folder. $showfile, "R"); }else{ echo "The log file does not exist, please check the settings!" "; Exit } $j = 0; $y = 0; $t = 0; $h = 0; while (!feof ($FP)) { $str = fgets ($FP); $str =iconv ("UTF-8", "Gb2312//ignore", $str); if (Strpos ($str, $type)) { $j + +; $temp [].= $str; $tmpcount = Explode ("", $str); if ($tmpcount [11]==200) $t + +; if ($tmpcount [11]==304) $h + +; if ($tmpcount [11]==404) $y + +; } } Fclose ($FP); $count = count ($temp); if ($page ==1) { $countshow = $count; $mynum = $count-$pagesize; }else{ $countshow = $count-($page * $pagesize-$pagesize); $mynum = $count-$page * $pagesize; } $pagecount =ceil (COUNT ($temp)/$pagesize); if ($page >= $pagecount) { $mynum = $pagecount; } $m = 0; for ($i = $countshow-1; $i >= $mynum; $i-) { $num = Explode ("", $temp [$i]); $domain = "http://tarr.cn"; Do not bring a slash at the end of the site URL $show. = " <tr onmouseout=\ "this.style.backgroundcolor= ' #FFFFFF '" "onmouseover=\" this.style.backgroundcolor= ' #F6F6F6 ' " > <TD class=\ "c\" width=\ "200;\" > ". $num [0]." ". $num [1]." </td> <TD class=\ "c\" > ". $num [9]." </td> <TD class=\ "pl\" ><a href=\ "$domain $num[5]\" _fcksavedurl= "\" $domain $num[5]\ "target=\" _blank\ ">". $num [5]. " </a></td> <TD class=\ "c\" > ". $num [11]." </td> </tr> "; } Unset ($temp); $showpage = "<td colspan=\" 4\ "height=\" 30\ "align=\" Center\ "> per page". $pagesize. "Article current". $page. " /$pagecount "; $showpage. = "<a href=\" type= ". Base64_encode ($type)." &showfile= ". $showfile." \ "> Home </a>"; if ($page!=1) { $showpage. = "<a href=\" type= ". Base64_encode ($type)." &showfile= ". $showfile." &page= ". ($page-1). " \ "> Prev </a>"; } if ($page!= $pagecount) { $showpage. = "<a href=\" type= ". Base64_encode ($type)." &showfile= ". $showfile." &page= ". ($page + 1). " \ "> next page </a>"; $WEEI = "<a href=\" type= ". Base64_encode ($type)." &showfile= ". $showfile." &page= ". ($pagecount). " \ "> Last </a>"; } $showpage. = $weei. " </td> "; if ($show) { $html = pagehtml (); $copy = Mycopy (); $htmltitle = "Cowboy IIS log spider Crawl Record Analyzer eggplant modified version";//Please keep it, thank you! $html = Str_replace ("[title]", $title, $html); $html = Str_replace ("[HTMLTitle]", $htmltitle, $html); $html = Str_replace ("[Show]", $show, $html); $html = Str_replace ("[Count]", $j, $html); $html = Str_replace ("[Page]", $showpage, $html); $html = Str_replace ("[Y]", $y, $html); $html = Str_replace ("[t]", $t, $html); $html = Str_replace ("[H]", $h, $html); $html = Str_replace ("[Copy]", $copy, $html); return $html; } } } function indexhtml () { Return ' <meta http-equiv= "Content-language" content= "ZH-CN" > <meta http-equiv= "Content-type" content= "text/html; charset=gb2312 "> <title> Cowboy IIS log spider Crawl Record analyzer v1.1</title> <style> <!-- td{font-size:12px; font-family:serif;} TR TD P A:LINK,TR TD p A:visited{ Color: #000000; Text-decoration:none; } --> </style> <body> <table border= "1" width= "100%" id= "table1" cellspacing= "0" cellpadding= "0" style= "border-collapse:collapse" > <tr> <TD colspan= "8" bgcolor= "#808080" height= "align=" "Center" ><b> <font color= "#FFFFFF" size= "3" > Cowboy IIS Log spider Crawl Record Analyzer eggplant modified version </font></b></td> </tr> <tr> <TD height= "align=" center "width=" > Date </td> <TD colspan= "6" height= "+" align= "center" > Engine </td> </tr> <tr> [Showlog] </tr> </table> [Copy] </body> } function pagehtml ()//============ display template, label instead of display content! { Return ' <meta http-equiv= "Content-language" content= "ZH-CN" > <meta http-equiv= "Content-type" content= "text/html; charset=gb2312 "> <title>[title] Spider Crawl Analysis-[htmltitle]</title> <style> <!-- TD { font-size:12px; height:18px; } . c{ Text-align:center; } . pl{ padding-left:15px; } TR TD P A:LINK,TR TD p A:visited{ Color: #000000; Text-decoration:none; } --> </style> <body> <table border= "1" width= "100%" id= "table1" cellspacing= "0" cellpadding= "0" style= "border-collapse:collapse" height = ">" <tr> <td><a href= "iis.php" > Return log directory </a> | <a href= "http://www.dj965.com" >dj965</a> <TD colspan= "3" bgcolor= "#808080" height= "align=" "Center" > <font color= "#FFFFFF" size= "3" ><b>[title] spider crawling Analysis </b></font></td> </tr> <tr> <TD colspan= "4" height= "align=" "Center" > This log [title] Spiders crawl <b>[count]</b> times, where normal <b>[t]</ B>, dead chain <b><font color= "Red" >[y]</font></b>, Cache <b>[h]</b> </td> </tr> <tr> <TD align= "center" width= "200px;" ><font color= "#FF0000" > Time </font></td> <TD align= "center" width= "150px;" ><font color= "#FF0000" > Spider ip</font></td> <TD align= "center" ><font color= "#FF0000" > was crawled url</font></td> <TD align= "center" width= "100px;" ><font color= "#FF0000" > Crawling results </font></td> </tr> [Show] <tr> [Page] </tr> </table> [Copy] </body> } function Mycopy () { Return ' <table border= "1" width= "100%" id= "table2" cellspacing= "0" cellpadding= "0" style= "border-collapse:collapse "height=" 402 "> <tr> <TD height= "bgcolor=" "#C0C0C0" align= "center" ><b> notes </b></td> </tr> <tr> <TD height= "170" > <p> Normal: Indicates that the page spider access Normal, and has been downloaded. Crawling state returns 200. </p> <p> dead chain: The spider access to the face page does not exist or link errors, crawling state returned 404. </p> <p> cache: Indicates that the spider has been crawled before and the page has not been updated, the spider buffer already exists the file, no longer download the page content. Crawling state returns 304. </p> <p> Note: Spiders crawl through the face page does not necessarily put out, because the spider crawled back to the data must be filtered after the engine rules will be released, as for details please check the engine included help. </p> </td> </tr> <tr> <td> <p> Program Name: <a target= "_blank" href= "http://tarr.cn/?p=23" > Cowboy IIS Log Spider Crawl Record Analyzer-Eggplant modified version </a> modified by: <a href= "http://www.tarr.cn/" target= "_blank" > Eggplant brown </a></p> <p>*******************************************************</p> <p> Original program name: <a target= "_blank" href= "http://www.niuzi.com/" > Cowboy IIS Log Spider Crawl Record analyzer </a></p> <p> Original Author: Cowboy </p> <p> qq:172379201</p> <p> email:17gd$163.com ($ convert @) </p> <p> Note: This program is only for everyone to learn to use, do not be used for commercial purposes. </p> </tr> </table> '; } ?> |