Forward a PHP Web snapshot capture program

Source: Internet
Author: User

Forward a PHP Web snapshot capture program. The code is forwarded and the comments are original to help new users understand.

Snap. Class. php

<? PHP
// ================================================ ====================
// Filename: Snap. Class. php
// Summary: webpage Snapshot
// Author: millken (lost Lincoln)
// Lastmodifed: 2007-06-29
// Copyright (c) 2007 [email] millken@gmail.com [/Email]
// ================================================ ====================
Class snap {
VaR $ dir;
VaR $ log;
VaR $ contents; // buffer File Content
VaR $ filename; // the address of the cached File
VaR $ host; // target address
VaR $ name; // The Name Of The buffer file. The value is the MD5 code of the URL.
VaR $ data_ts; // The timestamp of the buffered File
VaR $ TTL; // timeout
VaR $ URL; // URL of the snapshot to be captured
VaR $ ts ;//
Function SNAP (){
$ This-> log = "New SNAP () object instantiated. <br/> \ n ";
$ This-> dir = dirname (_ file __)."/";
}
/*
Obtain a web snapshot.
Parameters:
URL: webpage address
TTL: the timeout time in seconds.
After this time is exceeded, it is obtained from the webpage and the cached file is read directly within this time.
Return Value:
None
*/
Function fetch ($ url = "", $ TTL = 10 ){
$ This-> log. = "------------------------------ <br/> fetch () called <br/> \ n ";
$ This-> log. = "url:". $ URL. "<br/> \ n ";
// Verify that the URL is valid.
$ Hosts = parse_url ($ URL );
$ This-> host = $ hosts ['scheme '].': // '. $ hosts ['host'].'/';
If (! $ URL ){
$ This-> log. = "Oops: You need to pass a URL! <Br/> ";
Return false;
}
$ This-> TTL = $ TTL;
$ This-> url = $ URL;
$ This-> name = MD5 ($ this-> URL );
$ This-> filename = $ this-> dir. $ this-> name;
$ This-> log. = "filename:". $ this-> filename. "<br/> ";
$ This-> getfile_ts ();
$ This-> file_get_content ();
}
/*
Obtain the snapshot content.
If it times out, it is obtained directly from the webpage; otherwise, it is obtained from the cache file.
Parameters:
None
Return Value:
True: Successful; false: Failed
*/
Function file_get_content (){
// Open the buffer
Ob_start ();
$ This-> TS = Time ()-$ this-> data_ts;
// If timeout occurs, a snapshot is taken from the webpage. Otherwise, the snapshot is obtained directly from the buffer zone.
If ($ this-> data_ts <> 0 & $ this-> TS <= $ this-> TTL ){
$ This-> log. = "cache has expired <br/> ";
@ Readfile ($ this-> filename );
$ This-> contents = ob_get_contents ();
Ob_end_clean ();
} Else {
$ This-> log. = "cache hasn' t expired <br/> ";
@ Readfile ($ this-> URL );
$ This-> contents = ob_get_contents ();
Ob_end_clean ();
$ This-> savetocache ();
}
Return true;
}
/*
Save the snapshot to the cache file.
Parameters:
None
Return Value:
True: Successful; false: Failed
*/
Function savetocache (){
$ This-> log. = "savetocache () called <br/> ";
// Create a new file user to cache web snapshots
If (! $ Fp = @ fopen ($ this-> filename, "W ")){
$ This-> log. = "cocould not open". $ this-> filename. "<br/> ";
Return false;
}
$ This-> contents = $ this-> formaturl ($ this-> contents, $ this-> host );
$ This-> contents = preg_replace ("'<SCRIPT [^>] *?>. *? </SCRIPT> 'si "," ", $ this-> contents );
// Write an object
If (! @ Fwrite ($ FP, $ this-> contents )){
$ This-> log. = "cocould not write to". $ this-> filename. "<br/> ";
Fclose ($ FP );
Return false;
}
// Close the file
Fclose ($ FP );
Return true;
}

/*
Obtain the file Timestamp
Parameters:
None
Return Value:
True: Successful; false: Failed
*/
Function getfile_ts (){
$ This-> log. = "getfile_ts () called <br/> ";
If (! File_exists ($ this-> filename )){
$ This-> data_ts = 0;
$ This-> log. = $ this-> filename. "does not exist <br/> ";
Return false;
}
$ This-> data_ts = filemtime ($ this-> filename );
Return true;
}
/*
Format special URLs
Parameters:
None
Return Value:
Formatted URL.
*/
Function formaturl ($ L1, $ l2 ){
// Query all IMG, Link, a on the webpage
If (preg_match_all ("/(] + src = \" ([^ \ "] +) \" [^>] *>) | (<link [^>] + href = \ "([^ \"] +) \ "[^>] *>) | (<A [^>] + href = \ "([^ \"] +) \ "[^>] *>) | (] + src = '([^'] +) '[^>] *>) | (<A [^>] + href = '([^'] +) '[^>] *>)/I ", $ L1, $ regs )){
Foreach ($ regs [0] as $ num => $ URL ){
$ L1 = str_replace ($ URL, $ this-> liiiil ($ URL, $ l2), $ L1 );
}
}
Return $ L1;
}
/*
Reparse the second-level address on the screen to ensure that the screen is correctly displayed.
For example, relative address/CSS/sty004.css will be replaced with absolute address http://xxx.com/css/sty004.css
Parameters:
$ L1: address to be resolved
$ L2: Host prefix to be added
Return Value:
Formatted URL.
*/
Function liiiil ($ L1, $ l2 ){
If (preg_match ("/(. *) (href | SRC) \ = (. + ?) (|\/\>|\>). */I ", $ L1, $ regs )){
$ I2 = $ regs [3];
}
If (strlen ($ I2)> 0 ){
// Remove the double quotation marks CHR (34) and single quotation marks CHR (39)
$ I1 = str_replace (CHR (34), "", $ I2 );
$ I1 = str_replace (CHR (39), "", $ I1 );
} Else {
Return $ L1;
}
$ Url_parsed = parse_url ($ l2 );
$ Scheme = $ url_parsed ["Scheme"];
If ($ scheme! = ""){
$ Scheme = $ scheme ."://";
}
$ Host = $ url_parsed ["host"];
$ L3 = $ scheme. $ host;
If (strlen ($ l3) = 0)
{
Return $ L1;
}
$ Path = dirname ($ url_parsed ["path"]);
If ($ path [0] = "\\"){
$ Path = "";
}
$ Pos = strpos ($ I1 ,"#");
If ($ POS> 0 ){
$ I1 = substr ($ I1, 0, $ POS );
}
// Determine the type
If (preg_match ("/^ (HTTP | HTTPS | FTP) :( \// |\\\\) ([\ W \/\\\+ \-~ '@: %]) + \.) + ([\ W \/\. \ = \? \ + \-~ '@\':! % #] | (&) | &) +/I ", $ I1 )){
Return $ L1;
// Jump to an http url;
} Elseif ($ I1 [0] = "/"){
// Absolute path
$ I1 = $ L3. $ I1;
} Elseif (substr ($ I1, 0, 3) = "../"){
// Relative path
While (substr ($ I1, 0, 3) = "../"){
$ I1 = substr ($ I1, strlen ($ I1)-(strlen ($ I1)-3), strlen ($ I1)-3 );
If (strlen ($ PATH)> 0 ){
$ Path = dirname ($ PATH );
}
}
$ I1 = $ L3. $ path. "/". $ I1;
} Elseif (substr ($ I1, 0, 2) = "./"){
$ I1 = $ L3. $ path. substr ($ I1, strlen ($ I1)-(strlen ($ I1)-1), strlen ($ I1)-1 );
} Elseif (strtolower (substr ($ I1,) = "mailto:" | strtolower (substr ($ I1,) = "javascript :"){
Return $ L1;
} Else {
$ I1 = $ L3. $ path. "/". $ I1;
}
Return str_replace ($ I2, "\" $ i1 \ "", $ L1 );
}
}
?>

Test. php

<? PHP
Require_once (dirname (_ file _). '/snap. Class. php ');
$ H = new snap ();
$ H-> fetch ($ _ Get ['url']);
// Echo $ H-> log;
Echo $ H-> contents;
?>

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.