<? PHP
/**************
Class that captures an RSS address from a webpage or website
Usage:
$ RSS = new getrssurl;
Configure the capture Timeout:
$ RSS-> timelimit = 60; // One Minute
Capture RSS:
$ Link = $ RSS-> get ("blog.zoneker.com ");
**************/
Class getrssurl
{
// Public
VaR $ URL;
VaR $ domin;
VaR $ file;
VaR $ link;
VaR $ rsslink;
VaR $ rssurl;
VaR $ validrssurl;
VaR $ timelimit;
Function Init ()
{
Set_time_limit ($ this-> timelimit );
}
Function processurl ()
{
// $ This-> url = $ URL;
// Determine whether the input address has an HTTP flag
If (! Eregi ('(http: //) | (http: //)', $ this-> URL ))
{
$ This-> url = "http: //". $ this-> URL;
}
Else
{
$ This-> url = str_replace ("http: //", "http: //", $ this-> URL );
}
// Retrieve the complete Domain Name of the input address
Eregi ('HTTP: // [:./_ 0-9a-z /~ // @/#/$/%/^ /&/*/(/)/? /+/= /! /-] +/* ', $ This-> URL, $ fileurl );
// Determine whether the address ends "/"
If (eregi ('/$', $ fileurl [0])
{
$ This-> domin = substr ($ fileurl [0], 0,-1 );
}
Else
{
$ This-> domin = $ fileurl [0];
}
}
Function gethtmlfile ()
{
// Capture remote webpage to memory
$ This-> file = file_get_contents ($ this-> URL );
}
Function rssorhtml ()
{
// Judge whether the RSS address is an XML document. If so, stop judging the return address.
If (eregi ('<RSS', $ this-> file ))
{
$ This-> rsslink [] = $ this-> URL;
Return $ this-> rsslink;
}
Else
{
Return 0;
}
}
Function getvalidlink ()
{
// Capture all valid URLs
$ Pattern = "| href = ['/"]? ([^ '/"] +) ['/"] | U ";
Preg_match_all ($ pattern, $ this-> file, $ link, preg_pattern_order );
$ This-> link = $ link [1];
$ This-> link = array_unique ($ this-> link); // remove duplicate data
// Rearrange the Array
For ($ I = 0; $ I <count ($ link [1]); $ I ++)
{
If ($ this-> link [$ I]! = '')
{
$ This-> link ['tmp '] [] = $ this-> link [$ I];
}
}
$ This-> link = $ this-> link ['tmp '];
}
Function pregmatchurl ()
{
// Filter links with RSS tags from all links
For ($ I = 0; $ I <count ($ this-> link); $ I ++)
{
If (eregi ('(feed [s] *) | (RSS) | (XML)', $ this-> link [$ I])
{
$ This-> rsslink [] = $ this-> link [$ I];
}
}
// Match the URL in detail to filter the RSS address
If ($ this-> rsslink [0] = '')
$ This-> rsslink = $ this-> link;
Foreach ($ this-> rsslink as $ rsslink)
{
If (eregi ('([:. // _ 0-9a-z /~ // @/#/$/%/^ /&/*/(/)/? /+/= /! /-] + [/.] * [Xmlrssfeed] {3, 5}) | ([:. // _ 0-9a-z /~ // @/#/$/%/^ /&/*/(/)/? /+/= /! /-] + [/.] + [Phpaspx] {3, 4} [/?] * [:. // _ 0-9a-z /~ // @/#/$/%/^ /&/*/(/)/? /+/= /! /-] *) ', $ Rsslink ))
{
If (eregi ('(^/)', $ rsslink ))
{
$ Rsslink = $ this-> domin. $ rsslink;
}
If (! Eregi ('/', $ rsslink ))
{
$ Rsslink = $ this-> domin. "/". $ rsslink;
}
$ Rsslink = strtolower ($ rsslink );
$ This-> rssurl [] = $ rsslink;
}
}
If ($ this-> rssurl [0]! = '')
{
$ COUNT = $ this-> rssurl;
$ This-> rssurl = array_unique ($ this-> rssurl );
// Rearrange the Array
For ($ I = 0; $ I <count ($ count); $ I ++)
{
If ($ this-> rssurl [$ I]! = '')
{
$ This-> rssurl ['tmp '] [] = $ this-> rssurl [$ I];
}
}
$ This-> rssurl = $ this-> rssurl ['tmp '];
Foreach ($ this-> rssurl as $ rssurl)
{
@ $ Fp = fopen ($ rssurl, "R ");
@ $ Content = fread ($ FP, 2048 );
If (eregi ('<RSS', $ content) // judge whether the RSS address is an XML document.
{
$ This-> validrssurl [] = $ rssurl;
}
}
}
}
Function get ($ URL)
{
$ This-> url = $ URL;
$ This-> Init ();
$ This-> processurl ();
$ This-> gethtmlfile ();
$ RSS = $ this-> rssorhtml ();
If (! $ RSS)
{
$ This-> getvalidlink ();
$ This-> pregmatchurl ();
If ($ this-> validrssurl [0]! = '')
Return $ this-> validrssurl;
Else
Return false;
}
Else
Return $ RSS;
}
}
?>