This function was previously met when I was doing a search engine,
X1 represents the http address of the current document, while x2 represents the link address on the page.
Code
Public string GetUrl (string x1, string x2)
{
Bool panduan = false;
If (x1.IndexOf ("http: //") =-1)
{X1 = "http: //" + x1 ;}
If (x2.indexof ("http ://")! =-1)
{
Panduan = true;
}
If (x2.indexof ("/") = 0 & panduan = false)
{
Panduan = true;
If (x1.indexof ("/", 7 )! =-1)
{
X1 = x1.substring (0, x1.indexof ("/", 7 ));
X2 = X1 + x2;
}
Else
{
X2 = X1 + x2;
}
}
If (x2.IndexOf ("../") = 0 & panduan = false)
{
Panduan = true;
X2 = x2.Substring (3 );
If (x1.IndexOf ("/", 7 )! =-1)
{
X1 = x1.Substring (0, x1.IndexOf ("/", 7 ));
X2 = x1 + "/" + x2;
}
Else
{
X2 = X1 + x2;
}
}
If (x2.IndexOf ("/")! = 0 & x2.IndexOf ("../")! = 0 & panduan = false)
{
X1 = x1.Substring (0, x1.LastIndexOf ("/"));
X2 = x1 + "/" + x2;
}
Return X2;
}
"Http://www.cnblogs.com/Such no further judgment, generally do not need. "
Then, the absolute address of the new link is returned.
To obtain all links on the page, you can use the following code:
Code
String strRegex = @ "(href | HREF | src | SRC) [] * = [] * ["" '] [^ ""' #>] + ["" ']) | (href | HREF | src | SRC) [] * = [] * [\ S] + )";
// @ "(Href | HREF | src | SRC) [] * = [] * ["'] [^" "' #>] + [" '] ";
RegEx r = new RegEx (strregex, regexoptions. ignorecase );
Matchcollection M = R. Matches (htmlpage );
All links of href.srcare available, with dual-index, single-index, and no-index links. The system passes the test using mtracer.exe.