Forward a PHP Web snapshot capture program. The code is forwarded and the comments are original to help new users understand.
Snap. Class. php
<? PHP
// ================================================ ====================
// Filename: Snap. Class. php
// Summary: webpage Snapshot
// Author: millken (lost Lincoln)
// Lastmodifed: 2007-06-29
// Copyright (c) 2007 [email] millken@gmail.com [/Email]
// ================================================ ====================
Class snap {
VaR $ dir;
VaR $ log;
VaR $ contents; // buffer File Content
VaR $ filename; // the address of the cached File
VaR $ host; // target address
VaR $ name; // The Name Of The buffer file. The value is the MD5 code of the URL.
VaR $ data_ts; // The timestamp of the buffered File
VaR $ TTL; // timeout
VaR $ URL; // URL of the snapshot to be captured
VaR $ ts ;//
Function SNAP (){
$ This-> log = "New SNAP () object instantiated. <br/> \ n ";
$ This-> dir = dirname (_ file __)."/";
}
/*
Obtain a web snapshot.
Parameters:
URL: webpage address
TTL: the timeout time in seconds.
After this time is exceeded, it is obtained from the webpage and the cached file is read directly within this time.
Return Value:
None
*/
Function fetch ($ url = "", $ TTL = 10 ){
$ This-> log. = "------------------------------ <br/> fetch () called <br/> \ n ";
$ This-> log. = "url:". $ URL. "<br/> \ n ";
// Verify that the URL is valid.
$ Hosts = parse_url ($ URL );
$ This-> host = $ hosts ['scheme '].': // '. $ hosts ['host'].'/';
If (! $ URL ){
$ This-> log. = "Oops: You need to pass a URL! <Br/> ";
Return false;
}
$ This-> TTL = $ TTL;
$ This-> url = $ URL;
$ This-> name = MD5 ($ this-> URL );
$ This-> filename = $ this-> dir. $ this-> name;
$ This-> log. = "filename:". $ this-> filename. "<br/> ";
$ This-> getfile_ts ();
$ This-> file_get_content ();
}
/*
Obtain the snapshot content.
If it times out, it is obtained directly from the webpage; otherwise, it is obtained from the cache file.
Parameters:
None
Return Value:
True: Successful; false: Failed
*/
Function file_get_content (){
// Open the buffer
Ob_start ();
$ This-> TS = Time ()-$ this-> data_ts;
// If timeout occurs, a snapshot is taken from the webpage. Otherwise, the snapshot is obtained directly from the buffer zone.
If ($ this-> data_ts <> 0 & $ this-> TS <= $ this-> TTL ){
$ This-> log. = "cache has expired <br/> ";
@ Readfile ($ this-> filename );
$ This-> contents = ob_get_contents ();
Ob_end_clean ();
} Else {
$ This-> log. = "cache hasn' t expired <br/> ";
@ Readfile ($ this-> URL );
$ This-> contents = ob_get_contents ();
Ob_end_clean ();
$ This-> savetocache ();
}
Return true;
}
/*
Save the snapshot to the cache file.
Parameters:
None
Return Value:
True: Successful; false: Failed
*/
Function savetocache (){
$ This-> log. = "savetocache () called <br/> ";
// Create a new file user to cache web snapshots
If (! $ Fp = @ fopen ($ this-> filename, "W ")){
$ This-> log. = "cocould not open". $ this-> filename. "<br/> ";
Return false;
}
$ This-> contents = $ this-> formaturl ($ this-> contents, $ this-> host );
$ This-> contents = preg_replace ("'<SCRIPT [^>] *?>. *? </SCRIPT> 'si "," ", $ this-> contents );
// Write an object
If (! @ Fwrite ($ FP, $ this-> contents )){
$ This-> log. = "cocould not write to". $ this-> filename. "<br/> ";
Fclose ($ FP );
Return false;
}
// Close the file
Fclose ($ FP );
Return true;
}
/*
Obtain the file Timestamp
Parameters:
None
Return Value:
True: Successful; false: Failed
*/
Function getfile_ts (){
$ This-> log. = "getfile_ts () called <br/> ";
If (! File_exists ($ this-> filename )){
$ This-> data_ts = 0;
$ This-> log. = $ this-> filename. "does not exist <br/> ";
Return false;
}
$ This-> data_ts = filemtime ($ this-> filename );
Return true;
}
/*
Format special URLs
Parameters:
None
Return Value:
Formatted URL.
*/
Function formaturl ($ L1, $ l2 ){
// Query all IMG, Link, a on the webpage
If (preg_match_all ("/(] + src = \" ([^ \ "] +) \" [^>] *>) | (<link [^>] + href = \ "([^ \"] +) \ "[^>] *>) | (<A [^>] + href = \ "([^ \"] +) \ "[^>] *>) | (] + src = '([^'] +) '[^>] *>) | (<A [^>] + href = '([^'] +) '[^>] *>)/I ", $ L1, $ regs )){
Foreach ($ regs [0] as $ num => $ URL ){
$ L1 = str_replace ($ URL, $ this-> liiiil ($ URL, $ l2), $ L1 );
}
}
Return $ L1;
}
/*
Reparse the second-level address on the screen to ensure that the screen is correctly displayed.
For example, relative address/CSS/sty004.css will be replaced with absolute address http://xxx.com/css/sty004.css
Parameters:
$ L1: address to be resolved
$ L2: Host prefix to be added
Return Value:
Formatted URL.
*/
Function liiiil ($ L1, $ l2 ){
If (preg_match ("/(. *) (href | SRC) \ = (. + ?) (|\/\>|\>). */I ", $ L1, $ regs )){
$ I2 = $ regs [3];
}
If (strlen ($ I2)> 0 ){
// Remove the double quotation marks CHR (34) and single quotation marks CHR (39)
$ I1 = str_replace (CHR (34), "", $ I2 );
$ I1 = str_replace (CHR (39), "", $ I1 );
} Else {
Return $ L1;
}
$ Url_parsed = parse_url ($ l2 );
$ Scheme = $ url_parsed ["Scheme"];
If ($ scheme! = ""){
$ Scheme = $ scheme ."://";
}
$ Host = $ url_parsed ["host"];
$ L3 = $ scheme. $ host;
If (strlen ($ l3) = 0)
{
Return $ L1;
}
$ Path = dirname ($ url_parsed ["path"]);
If ($ path [0] = "\\"){
$ Path = "";
}
$ Pos = strpos ($ I1 ,"#");
If ($ POS> 0 ){
$ I1 = substr ($ I1, 0, $ POS );
}
// Determine the type
If (preg_match ("/^ (HTTP | HTTPS | FTP) :( \// |\\\\) ([\ W \/\\\+ \-~ '@: %]) + \.) + ([\ W \/\. \ = \? \ + \-~ '@\':! % #] | (&) | &) +/I ", $ I1 )){
Return $ L1;
// Jump to an http url;
} Elseif ($ I1 [0] = "/"){
// Absolute path
$ I1 = $ L3. $ I1;
} Elseif (substr ($ I1, 0, 3) = "../"){
// Relative path
While (substr ($ I1, 0, 3) = "../"){
$ I1 = substr ($ I1, strlen ($ I1)-(strlen ($ I1)-3), strlen ($ I1)-3 );
If (strlen ($ PATH)> 0 ){
$ Path = dirname ($ PATH );
}
}
$ I1 = $ L3. $ path. "/". $ I1;
} Elseif (substr ($ I1, 0, 2) = "./"){
$ I1 = $ L3. $ path. substr ($ I1, strlen ($ I1)-(strlen ($ I1)-1), strlen ($ I1)-1 );
} Elseif (strtolower (substr ($ I1,) = "mailto:" | strtolower (substr ($ I1,) = "javascript :"){
Return $ L1;
} Else {
$ I1 = $ L3. $ path. "/". $ I1;
}
Return str_replace ($ I2, "\" $ i1 \ "", $ L1 );
}
}
?>
Test. php
<? PHP
Require_once (dirname (_ file _). '/snap. Class. php ');
$ H = new snap ();
$ H-> fetch ($ _ Get ['url']);
// Echo $ H-> log;
Echo $ H-> contents;
?>