/*
* Name: Step by Step delivery network Spider (1)
*
* Version: V1.0
*
* Author: Zhang Shuangxi
*
* Date: 2010.10.17
*
* Function: Find a valid URL from a string (correct URL in HTML syntax expression)
*
* Process Design:
* Filter URLs Based on HTML syntax rules
* 1. function: my_strncmp (char * P, char * q, int N)
* Function: Simulate and implement the database function strncmp.
*
* 2. function: judge_mark (char ** P)
* Function: determines whether it is "<a" or "<a". If yes, perform the next step;
* If not, null is returned;
*
* 3. function: judge_href (char ** P)
* Function: determines whether it is "href". If yes, perform the next step;
* If not, null is returned;
*
* 4. function: judge_equal_mark (char ** P)
* Function: determines whether it is "=". If yes, perform the next step;
* If not, null is returned;
*
* 5. function: judge_http (char ** P)
* Function: determines whether it is "HTTP". If yes, perform the next step;
* If not, null is returned;
*
* 6. function: Link (char ** P)
* Function: Call functions 2, 3, 4, and 5 to find the correct URL.
*
* 7. function: pritnf_link (char * P)
* Function: print the URL
* Note: when the end of the website is reached, the end should be processed; otherwise, other information such as a> will be printed together.
*
**/
# Include <stdio. h>
# Include <stdlib. h>
# Include <string. h>
// Self-implemented strncmp Function
Int my_strncmp (char * P, char * q, int N)
{
Int I = 0;
Int COUNT = 0;
For (I = 0; I <n; I ++)
{
If (* (p + I) = * (q + I ))
{
Count ++;
}
}
If (COUNT = N)
{
Return 0;
}
Else
{
Return 1;
}
}
// Determine whether "<a" is satisfied"
Char * judge_mark (char ** P)
{
While (''= ** P)
{
(* P) ++;
}
If (0 = my_strncmp (* P, "<a", 3 ))
{
* P = * P + 3;
Return * P;
}
Else
{
Return NULL;
}
}
// Determine whether the request meets *** href
Char * judge_href (char ** P)
{
While (''= ** P)
{
(* P) ++;
}
If (0 = my_strncmp (* P, "href", 4 ))
{
* P = * P + 4;
Return * P;
}
Else
{
Return NULL;
}
}
// Determine whether the condition is met. *** =
Char * judge_equal_mark (char ** P)
{
While (''= ** P)
{
(* P) ++;
}
If ('= ** P)
{
Return (* P) + 1;
}
Else
{
Return NULL;
}
}
// Determine whether the request meets the requirements ** ["] ['] HTTP
Char * judge_http (char ** P)
{
While (''= ** p) | ('/"' = ** p) | ('/''= ** p ))
{
(* P) ++;
}
If ('/"' = ** p) | ('H' = ** p) | ('/'' = ** p ))
{
Return (* P );
}
Else
{
Return NULL;
}
}
// Function: generate the URL
Char * Link (char ** P)
{
Char * ret;
Ret = judge_mark (P );
If (null! = RET)
{
Ret = judge_href (& RET );
}
If (null! = RET)
{
Ret = judge_equal_mark (& RET );
}
If (null! = RET)
{
Ret = judge_http (& RET );
}
Return ret;
}
// Print the URL
Void printf_link (char * P)
{
Int I = 0;
If (null! = P)
{
While (* (p + I )! = '/"') & (* (P + I )! = '/'')&&
(* (P + I )! = '>') & (* (P + I )! = '/0') & (* (p + I )! ='
')))
{
Printf ("% C", * (p + I ));
I ++;
}
Printf ("/N ");
}
Return;
}
Int main (INT argc, char * argv [])
{
Char * ret;
Char * S1 = "<a href =/" http://www.akaedu.org/"> OK </a> ";
Char * S2 = "<a href = http://www.akaedu.org ";
Char * S3 = "<a href = http://www.akaedu.org ";
Char * S4 = "<a href =/" http://www.akaedu./"org ";
Ret = Link (& S1 );
Printf_link (RET );
Ret = Link (& S2 );
Printf_link (RET );
Ret = Link (& S3 );
Printf_link (RET );
Ret = Link (& S4 );
Printf_link (RET );
Return 0;
}