// Obtain the HTML source code of the webpage based on the URL
Private string getwebcontent (string Surl)
{
// String Surl = "";
String Sline = "";
String slinepage = "";
Int I = 0;
Webrequest wrgeturl;
// For (Int J = 1; j <= 1; j ++)
//{
// Surl = "http://app1.sfda.gov.cn/datasearch/face3/content.jsp? Tableid = 25 & tablename = table25 & tableview = % E5 % 9B % BD % E4 % Ba % A7 % E8 % 8d % af % E5 % 93% 81 & id = 1 "; // + J. tostring ();
Wrgeturl = webrequest. Create (Surl );
Stream objstream = wrgeturl. getresponse (). getresponsestream ();
Streamreader objreader = new streamreader (objstream );
Slinepage = "";
Sline = objreader. Readline ();
While (Sline! = NULL)
{
I ++;
Sline = objreader. Readline ();
If (Sline! = NULL)
{
Slinepage = slinepage + Sline;
}
}
Return slinepage;
}
// Obtain data between specified strings
Private string splitstr (string SRC, string startstr, string stopstr)
{
// Locate the start character
String resultstr;
Int startpos = 0;
Int stoppos = 0;
Matchcollection matches = RegEx. Matches (SRC, startstr, regexoptions. None );
Foreach (match nextmatch in matches)
{
Startpos = nextmatch. index + startstr. length;
}
Matchcollection matches2 = RegEx. Matches (SRC, stopstr, regexoptions. None );
Foreach (match nextmatch2 in matches2)
{
Stoppos = nextmatch2.index;
}
If (stoppos <startpos)
Stoppos = startpos;
If (stopstr = "Terminator ")
Stoppos = SRC. length;
Resultstr = SRC. substring (startpos, stoppos-startpos );
// MessageBox. Show (resultstr );
Return resultstr;
}
Private void button#click (Object sender, eventargs E)
{
// The URL to be crawled
String date1 = datetime. Now. tostring ("yymmddhhmmss ");
MessageBox. Show (date1 );
Int J;
String SRC;
String strwebcontent;
String DESC, Sr, SP;
Desc = "";
Webbrowser webfda = new webbrowser ();
For (j = 1; j <= 10; j ++)
{
String url = "http://app1.sfda.gov.cn/datasearch/face3/content.jsp? Tableid = 26 & tablename = table26 & tableview = % E5 % 9B % BD % E4 % Ba % A7 % E5 % 99% A8 % E6 % A2 % B0 & id = "+ J. tostring ();
// Obtain the source code of the specified URL
Strwebcontent = getwebcontent (URL );
// Generate htmldocument
Label1.text = J. tostring ();
Webfda. navigate ("about: blank ");
Htmldocument htmldoc = webfda. Document. opennew (true );
Htmldoc. Write (strwebcontent );
// Textbox1. Text = htmldoc. Body. innerhtml;
// Textbox2.text = htmldoc. Body. innertext;
// Production site
Src = htmldoc. Body. innertext;
Sr = "production site ";
SP = "date of change ";//
Desc = splitstr (SRC, Sr, SP );
Sr = "date of change ";
SP = "Remarks ";//
Desc = DESC + "," + splitstr (SRC, Sr, SP );
Sr = "Remarks ";
SP = "Registration No ";//
Desc = DESC + "," + splitstr (SRC, Sr, SP );
Sr = "Registration No ";
SP = "Production Unit ";//
Desc = DESC + "," + splitstr (SRC, Sr, SP );
Sr = "Production Unit ";
SP = "Address ";//
Desc = DESC + "," + splitstr (SRC, Sr, SP );
Sr = "Address ";
SP = "zip code ";//
Desc = DESC + "," + splitstr (SRC, Sr, SP );
Sr = "zip code ";
SP = "Product Name ";//
Desc = DESC + "," + splitstr (SRC, Sr, SP );
Sr = "Product Name ";
SP = "product standard ";//
Desc = DESC + "," + splitstr (SRC, Sr, SP );
Sr = "product standard ";
SP = "product performance structure and composition ";//
Desc = DESC + "," + splitstr (SRC, Sr, SP );
Sr = "product performance structure and composition ";
SP = "validity period ";//
Desc = DESC + "," + splitstr (SRC, Sr, SP );
Sr = "validity period ";
SP = "Approval date ";//
Desc = DESC + "," + splitstr (SRC, Sr, SP );
Sr = "Approval date ";
SP = "Product applicability ";//
Desc = DESC + "," + splitstr (SRC, Sr, SP );
Sr = "Product applicability ";
SP = "type and model ";//
Desc = DESC + "," + splitstr (SRC, Sr, SP );
Sr = "type and model ";
SP = "Terminator ";//
Desc = DESC + "," + splitstr (SRC, Sr, SP) + "\ n ";
Label2.text = J. tostring ();
Textbox2.text = textbox2.text + DESC;
Desc = "";
}
}