A class for retrieving URL information implemented in php to obtain URL information
Using this class, you can obtain the following URL information:
-Host
-Path
-Statuscode (eg. 404,200 ,...)
-HTTP Version
-Server
-Content Type
-Date
-The whole header string of the URL
The code is as follows:
/**
* Class for getting information about URL's
* @ Author Sven Wagener <[email] sven.wagener@intertribe.de [/email]>
* @ Copyright Intertribe limited
* @ PHP collect and organize the Chinese community [url] www.phpNet.cn [/url]
* @ Include Funktion: _ include _
*/
Class url {
Var $ url = "";
Var $ url_host;
Var $ url_path;
Var $ file = "";
Var $ code = "";
Var $ code_desc = "";
Var $ http_version = ""; // Variable for HTTP version
Var $ header_stream;
Var $ header_array;
Var $ timeout = "1 ";
/**
* Constructor of class url
* @ Param string $ url the complete url
* @ Desc Constructor of class url
*/
Function url ($ url ){
$ This-> url = $ url;
$ Url_array = parse_url ($ this-> url );
$ This-> url_host = $ url_array ['host'];
$ This-> url_path = $ url_array ['path'];
If ($ this-> url_path = ""){
$ This-> url_path = "/";
}
$ This-> refresh_headerinfo ();
}
/**
* Returns the whole url
* @ Return string $ url the whole url
* @ Desc Returns the whole url
*/
Function get_url (){
Return $ this-> url;
}
/**
* Returns the host of the url
* @ Return string $ url_host the host of the url
* @ Desc Returns the host of the url
*/
Function get_url_host (){
Return $ this-> url_host;
}
/**
* Returns the path of the url
* @ Return string $ url_path the path of the url
* @ Desc Returns the path of the url
*/
Function get_url_path (){
Return $ this-> url_path;
}
/**
* Returns the status code of the url
* @ Return string $ status_code the status code
* @ Desc Returns the status code of the url
*/
Function get_statuscode (){
Return $ this-> code;
}
/**
* Returns the status code description of the url
* @ Return string $ status_code_desc the status code description
* @ Desc Returns the status code description of the url
*/
Function get_statuscode_desc (){
Return $ this-> code_desc;
}
/**
* Returns the http version of the url by the returned headers of the server
* @ Return string $ http_version the http version
* @ Desc Returns the http version of the url by the returned headers of the server
*/
Function get_info_http_version (){
Return $ this-> http_version;
}
/**
* Returns the server type of the url's host by the returned headers of the server
* @ Return string header_array ['server'] the Server type
* @ Desc Returns the server type of the url's host by the returned headers of the server
*/
Function get_info_server (){
Return $ this-> header_array ['server'];
}
/**
* Returns the date of the url's host by the returned headers of the server
* @ Return string $ header_array ['Date'] the Date
* @ Desc Returns the date of the url's host by the returned headers of the server
*/
Function get_info_date (){
Return $ this-> header_array ['Date'];
}
/*
Function get_info_content_length (){
Return $ this-> header_array ['content-length'];
}
*/
/**
* Returns the content type by the returned headers of the server
* @ Return string header_array ['content-type'] the Content Type
* @ Desc Returns the content type by the returned headers of the server
*/
Function get_info_content_type (){
Return $ this-> header_array ['content-type'];
}
/**
* Returns the content of the url without the headers
* @ Return string $ content the content
* @ Desc Returns the content of the url without the headers
*/
Function get_content (){
// Get a web page into a string
$ String = implode ('', file ($ this-> url ));
Return $ string;
}
/**
* Returns the whole header of url without content
* @ Return string $ header the header
* @ Desc Returns the whole header of url without content
*/
Function get_header_stream (){
Return $ this-> header_stream;
}
/**
* Returns the whole headers of the url in an array
* @ Return array $ header_array the headers in an array
* @ Desc Returns the whole headers of the url in an array
*/
Function get_headers (){
Return $ this-> header_array;
}
/**
* Refreshes the header information
* @ Desc Refreshes the header information
*/
Function refresh_headerinfo (){
// Open socket for connection via port 80 to put headers
$ Fp = fsockopen ($ this-> url_host, 80, $ errno, $ errstr, 30 );
If (! $ Fp ){
// Echo "$ errstr ($ errno )";
If ($ errno = 0 ){
$ Errstr = "Server Not Found ";
}
$ This-> code = $ errno;
$ This-> code_desc = $ errstr;
} Else {
$ Put_string = "GET". $ this-> url_path. "HTTP/1.0 rnHost:". $ this-> url_host. "rnrn ";
Fputs ($ fp, $ put_string );
@ Socket_set_timeout ($ fp, $ this-> timeout );
$ Stream = "";
$ This-> header_array = "";
$ Header_end = false;
// Getting header string and creating header array
$ I = 0;
While (! Feof ($ fp )&&! $ Header_end ){
$ Line = fgets ($ fp, 128 );
If (strlen ($ line) = 2 ){
$ Header_end = true;
} Else {
If ($ I = 0 ){
$ Line1 = $ line;
}
$ Stream. = $ line;
$ Splitted_line = split (":", $ line );
$ This-> header_array [$ splitted_line [0] = $ splitted_line [1];
$ I ++;
}
}
Fclose ($ fp );
$ This-> header_stream = $ stream;
$ Splitted_stream = split ("", $ line1 );
// Getting status code and description of the URL
$ This-> code = $ splitted_stream [1];
$ This-> code_desc = $ splitted_stream [2];
If (count ($ splitted_stream)> 3 ){
For ($ I = 3; $ I $ This-> code_desc. = "". $ splitted_stream [$ I];
}
}
// Cleaning up for n and r
$ This-> code_desc = preg_replace ("[\ n]", "", $ this-> code_desc );
$ This-> code_desc = preg_replace ("[\ r]", "", $ this-> code_desc );
// Getting Http Version
$ Http_array = split ("/", $ splitted_stream [0]);
$ This-> http_version = $ http_array [1];
}
}
/**
* Sets the timeout for getting header data from server
* @ Param int $ seconds time for timeout in seconds
* @ Desc Sets the timeout for getting header data from server
*/
Function set_timeout ($ seconds ){
$ This-> timeout = $ seconds;
}
}
?>
The code is as follows:
Include ("url. class. php ");
$ Url = new url ("[url] http://www.phpnet.cn/#/url]");
Echo $ url-> get_header_stream ();
$ Headers = $ url-> get_headers ();
Echo $ headers ['server'];
Echo $ url-> get_content ();
Echo "URL:". $ Url-> get_url ()."
N ";
Echo "URL Host:". $ url-> get_url_host ()."
N ";
Echo "URL Path:". $ url-> get_url_path ()."
N
N ";
Echo "Statuscode:". $ url-> get_statuscode ()."
N ";
Echo "Statuscode description:". $ url-> get_statuscode_desc ()."
N ";
Echo "HTTP Version:". $ url-> get_info_http_version ()."
N ";
Echo "Server:". $ url-> get_info_server ()."
N ";
Echo "Content Type:". $ url-> get_info_content_type ()."
N ";
Echo "Date:". $ url-> get_info_date ()."
N
N ";
Echo "whole headers:
N ";
Echo $ url-> get_header_stream ();
?>