// Create a data table according to the following fields first // this program is not very well written. It is all traversed through the for loop, and the efficiency is not too high. The expert can use multithreading to give pointers.
// Create a data table based on the following fields
Public partial class form2: Form
{
Public form2 ()
{
Initializecomponent ();
}
// Name
Public static string XM = "";
// Age
Public static string NL = "";
// Gender
Public static string XB = "";
// Height
Public static string SG = "";
// Political outlook
Public static string Mm = "";
// Nationality
Public static string MZ = "";
// Education level
Public static string XL = "";
// Marital status
Public static string HK = "";
// Major
Public static string ZY = "";
// Work experience
Public static string gzjy = "";
// Employer
Public static string zzdw = "";
// Position
Public static string zzzw = "";
// Work experience
Public static string gzjl = "";
// Requires monthly salary
Public static string YX = "";
// Working nature
Public static string gzxz = "";
// Intention to apply for a job
Public static string qzyx = "";
// Specific position
Public static string jtzw = "";
// Expected Place of Work
Public static string qwgzd = "";
// Education, language level, technical expertise
Public static string Qt = "";
Private void button#click (Object sender, eventargs E)
{
Label1.text = "collecting data ...... ";
// Traverse the data page
For (INT I = 1; I <= 50; I ++)
{
CJ ("http://www.xcjob.cn/renli.asp? Pageno = "+ I );
}
Label1.text = "Congratulations! collection is complete! ";
MessageBox. Show ("Congratulations! collection is complete! ");
}
// Collect data
Private void CJ (string URL)
{
// Obtain the page source file (HTML)
String strwebcontent = ym (URL );
// Extract the data-related source code based on the tags in the HTML.
Int ibodystart = strwebcontent. indexof ("<body", 0 );
Int AAA = strwebcontent. indexof ("Keyword:", ibodystart );
Int itablestart = strwebcontent. indexof ("<Table", AAA );
Int itableend = strwebcontent. indexof ("</table>", itablestart );
String strweb = strwebcontent. substring (itablestart, itableend-itablestart );
// Generate htmldocument
Htmlelementcollection htmltr = htmltr_content (strweb, "TR ");
Foreach (htmlelement TR in htmltr)
{
Try
{
// Name
XM = tr. getelementsbytagname ("A") [0]. innertext;
// Obtain the URL of the detailed information page
String A = tr. getelementsbytagname ("A") [0]. getattribute ("href"). tostring ();
A = "http://www.xcjob.cn" + A. substring (11 );
Content ();
}
Catch {}
}
}
// Collect detailed data
Private void content (string URL)
{
Try
{
String strwebcontent = ym (URL );
// Extract the data-related source code based on the tags in the HTML.
Int ibodystart = strwebcontent. indexof ("<body", 0 );
Int itablestart = strwebcontent. indexof ("browsing Times", ibodystart );
Int itableend = strwebcontent. indexof ("<Table", itablestart );
Int dd = strwebcontent. indexof ("</table>", itableend );
String strweb = strwebcontent. substring (itableend, DD-itableend + 8 );
Htmlelementcollection htmltr = htmltr_content (strweb, "table ");
Foreach (htmlelement TR in htmltr)
{
Try
{
// Age
NL = tr. getelementsbytagname ("TR") [1]. getelementsbytagname ("TD") [1]. innertext;
// Gender
String xb_sg = tr. getelementsbytagname ("TR") [1]. getelementsbytagname ("TD") [3]. innertext;
XB = xb_sg.substring (0, 1 );
// Height
SG = xb_sg.substring (11 );
// Political outlook
Mm = tr. getelementsbytagname ("TR") [2]. getelementsbytagname ("TD") [1]. innertext;
// Nationality
MZ = tr. getelementsbytagname ("TR") [2]. getelementsbytagname ("TD") [3]. innertext;
// Education level
XL = tr. getelementsbytagname ("TR") [3]. getelementsbytagname ("TD") [1]. innertext;
// Marital status
HK = tr. getelementsbytagname ("TR") [3]. getelementsbytagname ("TD") [3]. innertext;
// Major
ZY = tr. getelementsbytagname ("TR") [5]. getelementsbytagname ("TD") [1]. innertext;
// Work experience
Gzjy = tr. getelementsbytagname ("TR") [5]. getelementsbytagname ("TD") [3]. innertext;
// Employer
Zzdw = tr. getelementsbytagname ("TR") [6]. getelementsbytagname ("TD") [1]. innertext;
// Position
Zzzw = tr. getelementsbytagname ("TR") [6]. getelementsbytagname ("TD") [3]. innertext;
// Work experience
Gzjy = tr. getelementsbytagname ("TR") [7]. getelementsbytagname ("TD") [1]. innertext;
// Requires monthly salary
YX = tr. getelementsbytagname ("TR") [9]. getelementsbytagname ("TD") [1]. innertext;
// Working nature
Gzxz = tr. getelementsbytagname ("TR") [9]. getelementsbytagname ("TD") [3]. innertext;
// Intention to apply for a job
Qzyx = tr. getelementsbytagname ("TR") [10]. getelementsbytagname ("TD") [1]. innertext;
// Specific position
Jtzw = tr. getelementsbytagname ("TR") [10]. getelementsbytagname ("TD") [3]. innertext;
// Expected Place of Work
Qwgzd = tr. getelementsbytagname ("TR") [11]. getelementsbytagname ("TD") [1]. innertext;
// Education, language level, technical expertise
Qt = tr. getelementsbytagname ("TR") [13]. getelementsbytagname ("TD") [1]. innertext;
Insert ();
}
Catch
{}
}
}
Catch {}
}
// Insert data into the database
Private void insert ()
{
Try
{
String STR = "provider = Microsoft. Jet. oledb.4.0; Data Source = data. mdb ";
String SQL = "insert into talent information (name, age, gender, height, political outlook, ethnicity, education, marital status, Major ,";
SQL + = "work experience, in-service organization, in-service position, work experience, requirements for monthly salary, job nature, job intention, specific job title, desired job location, other) values ";
SQL + = "('" + XM + "'," + NL + ", '" + XB + "', '" + SG + "', '"+ mm +"', '"+ MZ +"', '"+ XL +"', '"+ HK +"', '"+ ZY + "', '"+ gzjy +"', '"+ zzdw +"', '"+ zzzw + "',";
SQL + = "'" + gzjy + "', '" + Yx + "', '" + gzxz + "', '" + qzyx + "', '"+ jtzw +"', '"+ qwgzd +"', '"+ QT + "')";
Oledbconnection con = new oledbconnection (STR );
Oledbcommand COM = new oledbcommand (SQL, con );
Con. open ();
Com. executenonquery ();
Con. Close ();
}
Catch {}
}
// Return an htmlelementcollection and query the content.
Private htmlelementcollection htmltr_content (string strweb, string TJ)
{
Try
{
// Generate htmldocument
Webbrowser Webb = new webbrowser ();
Webb. navigate ("about: blank ");
// Window.doc ument returns an htmldocument object, indicating operations on an HTML document
// The htmldocument object is created on the basis of xmldocument and has all the method attributes of xmldocument
Htmldocument htmldoc = Webb. Document. opennew (true );
Htmldoc. Write (strweb );
Htmlelementcollection htmltr = htmldoc. getelementsbytagname (TJ );
Return htmltr;
}
Catch {return NULL ;}
}
// Obtain the original URL code
Private string ym (string URL)
{
String strresult = "";
Try
{
Httpwebrequest request = (httpwebrequest) webrequest. Create (URL );
Request. method = "get ";
Httpwebresponse response = (httpwebresponse) request. getresponse ();
Stream streamreceive = response. getresponsestream ();
Encoding encoding = encoding. getencoding ("gb2312 ");
Streamreader = new streamreader (streamreceive, encoding );
Strresult = streamreader. readtoend ();
}
Catch {}
Return strresult;
}
}
// This program is not very well written. It is all traversed through the for loop, and the efficiency is not too high. The expert can use multiple threads to give pointers.