Sometimes we have to perform a lot of log analysis to check the crawling situation of the spider, so we want to create a plug-in to record the situation of the spider. At the first time, I only recorded the number of spider crawlers, which is not a good analysis. So yesterday, I did it seriously. I have more features and can perform statistical analysis on various search engines. It can be viewed in multiple time periods. In fact, the code is very simple. for the sake of simplicity, the code is compressed to 6 K. divided into 6 files
1. installer spilder_install.php
The code is as follows:
Install plug-ins
If ($ _ POST ['act '] = 'Install') // if it is installed
{
$ Mysql_host = trim ($ _ POST ['MySQL _ host']); // Obtain the host
$ Mysql_user = trim ($ _ POST ['MySQL _ user']); // get the user name
$ Mysql_pwd = trim ($ _ POST ['MySQL _ pwd']); // obtain the password
$ Mysql_db = trim ($ _ POST ['MySQL _ db']); // Database
$ Table_prefix = trim ($ _ POST ['Table _ prefix']); // Get the prefix
If ($ link = mysql_connect ($ mysql_host, $ mysql_user, $ mysql_pwd ))
{
Echo "the connection to the server is successful !.................
";
}
Else
{
Echo "script alert ('link error! Check the database server configuration! '); History. go (-1); script ";
}
If (mysql_select_db ($ mysql_db, $ link ))
{
Echo "the database is connected successfully !.............
Creating table ................
";
}
$ SQL = "CREATE TABLE 'spilder _ sp_count '(
'Id' bigint (20) not null auto_increment,
'R _ time' int (11) not null,
'R _ name' varchar (50) not null,
'R _ url' varchar (200) not null,
Primary key ('id ')
) ENGINE = MyISAM default charset = gb2312 AUTO_INCREMENT = 4;
";
Mysql_query ("drop table if exists '". $ table_prefix. "sp_count';"); // delete an existing
$ SQL = str_replace ("spilder _", $ table_prefix, $ SQL); // replace the prefix
If (mysql_query ($ SQL ))
{
Echo "table created successfully! Writing file ";
}
$ F = file_get_contents ("spilder_config.php"); // Get the configuration content
$ F = str_replace ("", "", $ f); // remove spaces
$ F = preg_replace ("/mysql_host =. *;/iUs", "mysql_host = '{$ mysql_host}';", $ f );
$ F = preg_replace ("/mysql_user =. *;/iUs", "mysql_user = '{$ mysql_user}';", $ f );
$ F = preg_replace ("/mysql_pwd =. *;/iUs", "mysql_pwd = '{$ mysql_pwd}';", $ f );
$ F = preg_replace ("/mysql_db =. *;/iUs", "mysql_db = '{$ mysql_db}';", $ f );
$ F = preg_replace ("/table_prefix =. *;/iUs", "table_prefix = '{$ table_prefix}';", $ f );
$ F = preg_replace ("/sp_admin =. *;/iUs", "sp_admin = '{$ sp_admin}';", $ f );
File_put_contents ("spilder_config.php", $ f );
File_put_contents ("isinstall.txt", "OK ");
Echo "Congratulations! Check the installation of the spider statistics program ";
Exit ();
}
?>
2. Spider record files
The code is as follows:
Require ("spilder_config.php"); // call the configuration file
$ Link = mysql_connect ($ mysql_host, $ mysql_user, $ mysql_pwd) or die ("server connection error"); // link to the database
Mysql_select_db ($ mysql_db, $ link) or die ('database connection error ');
$ Searchbot = get_naps_bot ();
If ($ searchbot)
{
Date_default_timezone_set ('prc'); // you can specify the default time zone.
$ R_time = strtotime (date ("Y-m-d H: I: s"); // last update time
$ ServerName = $ _ SERVER ["SERVER_NAME"];
$ ServerPort = $ _ SERVER ["SERVER_PORT"];
$ ScriptName = $ _ SERVER ["SCRIPT_NAME"];
$ QueryString = $ _ SERVER ["QUERY_STRING"];
$ Url = "http: //". $ ServerName;
If ($ ServerPort! = "80 ")
{
$ Url = $ url. ":". $ ServerPort;
}
$ Url = $ url. $ ScriptName;
If ($ QueryString! = "")
{
$ Url = $ url ."? ". $ QueryString;
}
// $ Url = "http: //". $ _ SERVER ['http _ host']. $ _ SERVER ['request _ URI '];
$ GLOBALS ['DB']-> db_query ("insert into {$ table_prefix} sp_count (r_time, r_name, r_url) values ('$ r_time', '$ searchbot ', '$ URL ')");
}
Function get_naps_bot ()
{
$ Useragent = strtolower ($ _ SERVER ['http _ USER_AGENT ']);
If (strpos ($ useragent, 'googlebot ')! = False ){
Return 'bot bot ';
} Elseif (strpos ($ useragent, 'msnbot ')! = False ){
Return 'msnbot ';
} Elseif (strpos ($ useragent, 'slurp ')! = False ){
Return 'yahoobot ';
} Elseif (strpos ($ useragent, 'baidider Ider ')! = False ){
Return 'baidider Ider ';
} Elseif (strpos ($ useragent, 'sogou ')! = False ){
Return 'sogoubot ';
} Elseif (strpos ($ useragent, 'soso ')! = False ){
Return 'sosobot ';
} Elseif (strpos ($ useragent, 'youdao ')! = False)
{
Return 'youdao ';
}
Else
{
Return false;
}
}
?>
3. view the spider statistics file
The code is as follows:
Session_start ();
Require ("spilder_config.php ");
If ($ _ GET ['AC'] = 'logout ')
{
$ _ SESSION ['SS _ sp_admin'] = ''; // clear
}
If ($ _ GET ['sp _ admin_login '])
{
If ($ _ GET ['sp _ admin_login '] = $ sp_admin)
{
$ _ SESSION ['SS _ sp_admin'] = $ sp_admin;
}
}
Date_default_timezone_set ('prc'); // you can specify the default time zone.
$ Day_start = strtotime (date ("Ymd"). "000001"); // The start time of a day.
$ Day_out = strtotime (date ("Ymd"). "235959"); // end time of a day
$ Day = 3600*24; // the time of the day
$ Link = mysql_connect ($ mysql_host, $ mysql_user, $ mysql_pwd) or die ("server connection error ");
Mysql_select_db ($ mysql_db, $ link) or die ('database connection error'); // connect to the database
If ($ _ GET ['act '] = 'Del ')
{
If ($ _ SESSION ['SS _ sp_admin'] <> '')
{
If ($ _ GET ['dt'] = 0) // clear
{
Mysql_query ("delete from {$ table_prefix} sp_count ");
}
$ D_time = 0;
If ($ _ GET ['dt'] = 1) // delete 30 days ago
{
$ D_time = $ day_out-$ day * 30;
} Elseif ($ _ GET ['dt'] = 2) // delete 60 days ago
{
$ D_time = $ day_out-$ day * 60;
} Elseif ($ _ GET ['dt'] = 3) // delete 90 days ago
{
$ D_time = $ day_out-$ day * 90;
} Elseif ($ _ GET ['dt'] = 4) // delete 180 days ago
{
$ D_time = $ day_out-$ day * 180;
} Elseif ($ _ GET ['dt'] = 5) // delete 360 days ago
{
$ D_time = $ day_out-$ day * 360;
}
If ($ d_time> 0)
{
Mysql_query ("delete from {$ table_prefix} sp_count where r_time <'$ d_time'"); // delete data
}
}
}
$ Ss = intval ($ _ GET ['SS']);
If ($ ss> 0) // view the category Statistics
{
If ($ ss = 999) // clear the category
{
$ _ SESSION ['SS _ ss'] = '';
} Else
{
$ _ SESSION ['SS _ ss'] = $ ss;
}
}
$ Ss = $ _ SESSION ['SS _ ss'];
If ($ ss = 1)
{
$ Ss = 'baidider Ider ';
} Elseif ($ ss = 2)
{
$ Ss = 'bot bot ';
} Elseif ($ ss = 3)
{
$ Ss = "MSNbot ";
} Elseif ($ ss = 4)
{
$ Ss = "Yahoobot ";
} Elseif ($ ss = 5)
{
$ Ss = "Sogoubot ";
} Elseif ($ ss = 6)
{
$ Ss = "Sosobot ";
} Elseif ($ ss = 7)
{
$ Ss = 7;
}
$ T = intval ($ _ GET ['t']); // view the time Classification Statistics
If ($ t = 0) // Time
{
If (empty ($ _ SESSION ['SS _ ss'])
{
$ SQL = "select * from {$ table_prefix} sp_count order by id desc"; // latest situation
$ Sql2 = "select count (*) from {$ table_prefix} sp_count ";
} Else
{
$ SQL = "select * from {$ table_prefix} sp_count where r_name = '$ SS' order by id desc ";
$ Sql2 = "select count (*) from {$ table_prefix} sp_count where r_name = '$ SS '";
}
$ Rscount = mysql_query ($ sql2 );
$ Rscount = @ mysql_fetch_array ($ rscount );
$ Rscount = $ rscount [0]; // obtain the total number of records
$ Page = intval (trim ($ _ GET ['Page']); // The current page number.
$ Pagesize = 30; // number of records per page
$ Pagecount = ceil ($ rscount/$ pagesize); // obtain the total number of pages
$ Pageurl = "";
If ($ page <= 1)
{
$ Page = 1;
$ Pageurl. = "homepage previous page ";
} Else
{
$ Pageurl. = "homepage previous page ";
}
If ($ page> = $ pagecount)
{
$ Page = $ pagecount;
$ Pageurl. = "Last page of the next page ";
} Else
{
$ Pageurl. = "Last page of the next page ";
}
If ($ page <= 1)
{
$ Page = 1;
}
$ Start_rs = intval ($ page-1) * $ pagesize); // start
$ End_rs = intval ($ page * $ pagesize); // end
$ SQL. = "limit {$ start_rs}, {$ end_rs }";
}
If ($ t = 1) // view daily statistics
{
$ S_time = $ day_start; // start time
$ E_time = $ day_out; // end time
$ SQL = "select count (*) as ct, r_name from {$ table_prefix} sp_count where r_time between '$ s_time' and '$ e_time' group by r_name ";
} Elseif ($ t = 2) // view yesterday's statistics
{
$ S_time = $ day_start-$ day; // start
$ E_time = $ day_out-$ day; // end
$ SQL = "select count (*) as ct, r_name from {$ table_prefix} sp_count where r_time between '$ s_time' and '$ e_time' group by r_name ";
} Elseif ($ t = 3) // view the previous daily statistics
{
$ S_time = $ day_start-$ day * 2;
$ E_time = $ day_out-$ day * 2;
$ SQL = "select count (*) as ct, r_name from {$ table_prefix} sp_count where r_time between '$ s_time' and '$ e_time' group by r_name ";
} Elseif ($ t = 4) // view statistics for seven days
{
$ S_time = $ day_start-$ day * 7;
$ E_time = $ day_out;
$ SQL = "select count (*) as ct, r_name from {$ table_prefix} sp_count where r_time between '$ s_time' and '$ e_time' group by r_name ";
}
Elseif ($ t = 5) // view
{
$ S_time = $ day_start-$ day * 30;
$ E_time = $ day_out;
$ SQL = "select count (*) as ct, r_name from {$ table_prefix} sp_count where r_time between '$ s_time' and '$ e_time' group by r_name ";
} Elseif ($ t = 6) // view the 90-day statistics
{
$ S_time = $ day_start-$ day * 90;
$ E_time = $ day_out;
$ SQL = "select count (*) as ct, r_name from {$ table_prefix} sp_count where r_time between '$ s_time' and '$ e_time' group by r_name ";
}
$ Res = mysql_query ($ SQL); // or die ("install the program first ");
?>
Show spider records
View categories |
All Baidu Google Bing Yahoo Sogou Sousearch Youdao |
Time Today Yesterday Day before yesterday Seven days 30 days 90 days
|
If ($ _ SESSION ['SS _ sp_admin'] = $ sp_admin)
{
?>
Management logout |
Clear 30 days ago 60 days ago 90 days ago 180 days ago 360 days ago |
}
?>
If ($ t = 0)
{
?>
Spider |
Time |
Address |
While ($ rs = mysql_fetch_array ($ res )){?>
|
|
"Target =" _ blank "> |
}?>
} Else {
?>
Spider |
Statistics |
While ($ rs = mysql_fetch_array ($ res )){?>
|
|
}?>
}
?>