<? PHP
// ================================================ ====================
// FileName: snap. class. php
// Summary: webpage Snapshot
// Author: millken (lost Lincoln)
// LastModifed: 2007-06-29
// Copyright (c) 2007 [email] millken@gmail.com [/email]
// ================================================ ====================
Class snap {
Var $ dir;
Var $ log;
Var $ contents;
Var $ filename;
Var $ host;
Var $ name;
Var $ data_ts;
Var $ ttl;
Var $ url;
Var $ ts;
Function snap (){
$ This-> log = "New snap () object instantiated. <br/> n ";
$ This-> dir = dirname (_ FILE __)."/";
}
Function fetch ($ url = "", $ ttl = 10 ){
$ This-> log. = "------------------------------ <br/> fetch () called <br/> n ";
$ This-> log. = "url:". $ url. "<br/> n ";
$ Hosts = parse_url ($ url );
$ This-> host = $ hosts ['scheme '].': // '. $ hosts ['host'].'/';
If (! $ Url ){
$ This-> log. = "OOPS: You need to pass a URL! <Br/> ";
Return false;
}
$ This-> ttl = $ ttl;
$ This-> url = $ url;
$ This-> name = md5 ($ this-> url );
$ This-> filename = $ this-> dir. $ this-> name;
$ This-> log. = "Filename:". $ this-> filename. "<br/> ";
$ This-> getFile_ts ();
$ This-> file_get_content ();
}
Function file_get_content (){
Ob_start ();
$ This-> ts = time ()-$ this-> data_ts;
If ($ this-> data_ts <> 0 & $ this-> ts <= $ this-> ttl ){
$ This-> log. = "cache has expired <br/> ";
@ Readfile ($ this-> filename );
$ This-> contents = ob_get_contents ();
Ob_end_clean ();
} Else {
$ This-> log. = "cache hasn' t expired <br/> ";
@ Readfile ($ this-> url );
$ This-> contents = ob_get_contents ();
Ob_end_clean ();
$ This-> saveToCache ();
}
Return true;
}
Function saveToCache (){
$ This-> log. = "saveToCache () called <br/> ";
// Create file pointer
If (! $ Fp = @ fopen ($ this-> filename, "w ")){
$ This-> log. = "cocould not open". $ this-> filename. "<br/> ";
Return false;
}
$ This-> contents = $ this-> formaturl ($ this-> contents, $ this-> host );
$ This-> contents = preg_replace ("'<script [^>] *?>. *? </Script> 'si "," ", $ this-> contents );
// Write to file
If (! @ Fwrite ($ fp, $ this-> contents )){
$ This-> log. = "cocould not write to". $ this-> filename. "<br/> ";
Fclose ($ fp );
Return false;
}
// Close file pointer
Fclose ($ fp );
Return true;
}
Function getFile_ts (){
$ This-> log. = "getFile_ts () called <br/> ";
If (! File_exists ($ this-> filename )){
$ This-> data_ts = 0;
$ This-> log. = $ this-> filename. "does not exist <br/> ";
Return false;
}
$ This-> data_ts = filemtime ($ this-> filename );
Return true;
}
Function formaturl ($ l1, $ l2 ){
If (preg_match_all ("/(] + src =" ([^ "] +)" [^>] *>) | (<link [^>] + href = "([^"] +) "[^>] *>) | (<a [^>] + href = "([^"] +) "[^>] *>) | (] + src = '([^'] +) '[^>] *>) | (<a [^>] + href = '([^'] +) '[^>] *>)/I ", $ l1, $ regs )){
Foreach ($ regs [0] as $ num => $ url ){
$ L1 = str_replace ($ url, $ this-> lIIIIl ($ url, $ l2), $ l1 );
}
}
Return $ l1;
}
Function lIIIIl ($ l1, $ l2 ){
If (preg_match ("/(. *) (href | src) = (. + ?) (|/> |>). */I ", $ l1, $ regs) {$ I2 = $ regs [3];}
If (strlen ($ I2)> 0 ){
$ I1 = str_replace (chr (34), "", $ I2 );
$ I1 = str_replace (chr (39), "", $ I1 );
} Else {return $ l1 ;}
$ Url_parsed = parse_url ($ l2 );
$ Scheme = $ url_parsed ["scheme"]; if ($ scheme! = "") {$ Scheme = $ scheme ."://";}
$ Host = $ url_parsed ["host"];
$ L3 = $ scheme. $ host;
If (strlen ($ l3) = 0) {return $ l1 ;}
$ Path = dirname ($ url_parsed ["path"]); if ($ path [0] = "") {$ path = "";}
$ Pos = strpos ($ I1 ,"#");
If ($ pos> 0) $ I1 = substr ($ I1, 0, $ pos );
// Determine the type
If (preg_match ("/^ (http | https | ftp) :( // | \) ([w/+ -~ '@: %]) +.) + ([W/. =? + -~ '@':! % #] | (&) | &) +/I ", $ I1) {return $ l1;} // skip the url type starting with http
Elseif ($ I1 [0] = "/") {$ I1 = $ l3. $ I1;} // absolute path
Elseif (substr ($ I1, 0, 3) = "../") {// relative path
While (substr ($ I1, 0, 3) = "../"){
$ I1 = substr ($ I1, strlen ($ I1)-(strlen ($ I1)-3), strlen ($ I1)-3 );
If (strlen ($ path)> 0 ){
$ Path = dirname ($ path );
}
}
$ I1 = $ l3. $ path. "/". $ I1;
}
Elseif (substr ($ I1, 0, 2) = "./"){
$ I1 = $ l3. $ path. substr ($ I1, strlen ($ I1)-(strlen ($ I1)-1), strlen ($ I1)-1 );
}
Elseif (strtolower (substr ($ I1,) = "mailto:" | strtolower (substr ($ I1,) = "java script :"){
Return $ l1;
} Else {
$ I1 = $ l3. $ path. "/". $ I1;
}
Return str_replace ($ I2, "" $ I1 "", $ l1 );
}
}
?>
Usage test. php:
<? Php
Require_once (dirname (_ FILE _). '/snap. class. php ');
$ H = new snap ();
$ H-> fetch ($ _ GET ['url']);
// Echo $ h-> log;
Echo $ h-> contents;
?>