Webpage content Capturing Method:
$ Ch= @ Curl_init ($ URL); @ Curl_setopt ($ Ch, Curlopt_returntransfer,True);$ Text= @ Curl_exec ($ Ch); @ Curl_close ($ Ch);$ Text= Relative_to_absolute ($ Text,$ URL);
Relative Path to absolute path:
Function Relative_to_absolute ( $ Content , $ Feed_url ){ Preg_match ('/(HTTP | HTTPS | FTP ):\/\//', $ Feed_url , $ Protocol ); $ Server_url = Preg_replace ("/(HTTP | HTTPS | FTP | News ):\/\//","", $ Feed_url ); $ Server_url = Preg_replace ("/\/.*/","", $ Server_url ); If ( $ Server_url ='' ){ Return $ Content ;} If ( Isset ( $ Protocol [0 ]) { $ New_content = Preg_replace ('/Href = "\ //', 'href = "'. $ Protocol [0]. $ Server_url .'/', $ Content ); $ New_content = Preg_replace ('/Src = "\ //', 'src = "'. $ Protocol [0]. $ Server_url .'/', $ New_content );} Else { $ New_content = $ Content ;} Return $ New_content ;}
To obtain all hyperlinks:
Function Get_links ( $ Content ){ $ Pattern = '/<(.*?) Href = "(.*?) "(. *?)> (.*?) <\/A>/I' ; Preg_match_all ( $ Pattern , $ Content , $ M ); $ Re =Array_unique ( $ M [2 ]); $ I = 0 ; Foreach ( $ Re As $ Key => $ Value ){ $ RegEx = "(HTTP | HTTPS | FTP | Telnet | News )" ; If ((!Empty ( $ Value ) | Strlen ( $ Value )> 0 )&& Preg_match ( $ RegEx , $ Value )) $ Output [ $ I ++] = $ Value ;} Return $ Output ;}
How to get all image links:
FunctionGet_pic ($ Str){$ IMGs=Array();Preg_match_all("/(HTTP | HTTPS | FTP | Telnet | News): \/[a-z0-9 \/\-_ + = .~! % @? # % &;: $ \ () |] + \. (JPG | GIF | PNG | BMP | SWF | RAR | zip)/ISU ",$ Str,$ IMGs);Return Array_unique($ IMGs[0]);}