Principle: Read the source files of a website and analyze the source code using regular expressions to obtain all links.
<?
/********** Qiushuiwuhen (2002-5-20 )***********/
If (empty ($ url) $ url = "http://www.csdn.net/expert/"; // set url
$ Site = substr ($ url, 0, strpos ($ url, "/", 8); // site
$ Base = substr ($ url, 0, strrpos ($ url, "/") + 1); // directory of the file
$ Fp = fopen ($ url, "r"); // open the url
While (! Feof ($ fp) $ contents. = fread ($ fp, 1024 );//
$ Pattern = "| href = ["]? ([^ "] +) ["] | U ";
Preg_match_all ($ pattern, $ contents, $ regArr, PREG_SET_ORDER); // match all href =
For ($ I = 0; $ I <count ($ regArr); $ I ++) {// traverse all matches
If (! Eregi (": //", $ regArr [$ I] [1]) // whether it is a relative path, that is, whether there are ://
If (substr ($ regArr [$ I] [1],) = "/") // whether it is the root directory of the site
Echo "link". ($ I + 1). ":". $ site. $ regArr [$ I] [1]. "<br/>"; // root directory
Else
Echo "link". ($ I + 1). ":". $ base. $ regArr [$ I] [1]. "<br/>"; // Current Directory
Else
Echo "link". ($ I + 1). ":". $ regArr [$ I] [1]. "<br/>"; // relative path
}
Fclose ($ fp );
?>