Asp. The realization of NET data collection

Source: Internet
Author: User
Tags foreach object insert sql net return string
This is my own write a data acquisition program, is to collect a talent network inside talent information, the first time to write a blog, write a bad don't laugh.

First, create a datasheet by following the field

The following are the referenced contents:

public partial class Form2:form
{
Public Form2 ()
{
InitializeComponent ();
}
Name
public static string XM = "";
Age
public static string nl = "";
Gender
public static string XB = "";
Height
public static string SG = "";
Political outlook
public static string mm = "";
National
public static string MZ = "";
Degree
public static string XL = "";
Marital status
public static string HK = "";
The subject of study
public static string ZY = "";
Work experience
public static string gzjy = "";
Working units
public static string zzdw = "";
Job Title
public static string zzzw = "";
Work experience
public static string gzjl = "";
Salary Required
public static string YX = "";
Nature of work
public static string gzxz = "";
Job intention
public static string Qzyx = "";
Specific positions
public static string jtzw = "";
Expect to work
public static string qwgzd = "";
Educational situation, language proficiency, technical expertise
public static string QT = "";
private void Button1_Click (object sender, EventArgs e)
{
Label1. Text = "Collecting data ...";

Number of pages traversing data
for (int i = 1; I <=50; i++)
{
CJ ("http://www.xcjob.cn/renli.asp?pageno=" + i);
}

Label1. Text = "Congratulations on your collection!" ";
MessageBox.Show ("Congratulations on your collection!") ");
}

Collect Data
private void CJ (string Url)
{
Get page source file (Html)
String strwebcontent = YM (URL);

According to the tags inside the HTML to remove the data related to the source code
int ibodystart = Strwebcontent.indexof ("<body", 0);
int AAA = Strwebcontent.indexof ("keywords:", ibodystart);
int itablestart = Strwebcontent.indexof ("<table", AAA);
int itableend = Strwebcontent.indexof ("</table>", Itablestart);
String strweb = Strwebcontent.substring (Itablestart, Itableend-itablestart);

Generate HTMLDocument
HtmlElementCollection htmltr = htmltr_content (Strweb, "tr");

foreach (HtmlElement tr in htmltr)
{
Try
{
Name
XM = tr. getElementsByTagName ("a") [0]. InnerText;
Get the URL for the details page
String a = tr. getElementsByTagName ("a") [0]. GetAttribute ("href"). ToString ();
A = "http://www.xcjob.cn" + a.substring (11);

Content (a);
}
Catch {}
}
}

Collect detailed data
private void Content (string URL)
{
Try
{
String strwebcontent = YM (URL);

According to the tags inside the HTML to remove the data related to the source code
int ibodystart = Strwebcontent.indexof ("<body", 0);
int itablestart = strwebcontent.indexof ("Browse Times", Ibodystart);
int itableend = Strwebcontent.indexof ("<table", Itablestart);
int dd = Strwebcontent.indexof ("</table>", itableend);
String strweb = Strwebcontent.substring (itableend, dd-itableend + 8);

HtmlElementCollection htmltr = htmltr_content (strweb, "table");

foreach (HtmlElement tr in htmltr)
{
Try
{
Age
NL = tr. getElementsByTagName ("tr") [1]. getElementsByTagName ("TD") [1]. InnerText;
Gender
String xb_sg = tr. getElementsByTagName ("tr") [1]. getElementsByTagName ("TD") [3]. InnerText;
XB = Xb_sg. Substring (0, 1);
Height
SG = Xb_sg. Substring (11);
Political outlook
MM = tr. getElementsByTagName ("tr") [2]. getElementsByTagName ("TD") [1]. InnerText;
National
MZ = tr. getElementsByTagName ("tr") [2]. getElementsByTagName ("TD") [3]. InnerText;
Degree
XL = tr. getElementsByTagName ("tr") [3]. getElementsByTagName ("TD") [1]. InnerText;
Status of marital smoke
HK = tr. getElementsByTagName ("tr") [3]. getElementsByTagName ("TD") [3]. InnerText;
The subject of study
ZY = tr. getElementsByTagName ("tr") [5]. getElementsByTagName ("TD") [1]. InnerText;
Work experience
Gzjy = tr. getElementsByTagName ("tr") [5]. getElementsByTagName ("TD") [3]. InnerText;
Working units
ZZDW = tr. getElementsByTagName ("tr") [6]. getElementsByTagName ("TD") [1]. InnerText;
Job Title
ZZZW = tr. getElementsByTagName ("tr") [6]. getElementsByTagName ("TD") [3]. InnerText;
Work experience
Gzjy = tr. getElementsByTagName ("tr") [7]. getElementsByTagName ("TD") [1]. InnerText;
Salary Required
YX = tr. getElementsByTagName ("tr") [9]. getElementsByTagName ("TD") [1]. InnerText;
Nature of work
GZXZ = tr. getElementsByTagName ("tr") [9]. getElementsByTagName ("TD") [3]. InnerText;
Job intention
Qzyx = tr. getElementsByTagName ("tr") [10]. getElementsByTagName ("TD") [1]. InnerText;
Specific positions
JTZW = tr. getElementsByTagName ("tr") [10]. getElementsByTagName ("TD") [3]. InnerText;
Expect to work
Qwgzd = tr. getElementsByTagName ("tr") [11]. getElementsByTagName ("TD") [1]. InnerText;
Educational situation, language proficiency, technical expertise
QT = tr. getElementsByTagName ("tr") [13]. getElementsByTagName ("TD") [1]. InnerText;

Insert ();
}
Catch
{ }
}
}
Catch {}
}

Inserting data into a database
private void Insert ()
{
Try
{
String str = "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=data.mdb";
String sql = "INSERT INTO talent information (name, age, sex, height, political appearance, ethnicity, education, marital status, specialty,";
SQL + + "work experience, in-service units, in-job positions, work experience, salary requirements, job nature, job search intention, specific positions, expectations workplace, other) values";
sql = "(' + XM +" ', "+ nl +", "" + XB + "', '" + SG + "', '" + mm + "', '" + MZ + "', '" + XL + "', '" + HK + "', '" + ZY + " ', ' "+ Gzjy +" ', ' "+ ZZDW +" ', ' "+ zzzw +" ', ";
SQL + + "' + Gzjy +" ', ' "+ YX +" ', ' "+ gzxz +" ', ' "+ Qzyx +" ', ' "+ jtzw +" ', ' "+ Qwgzd +" ', ' "+ QT +" ') ";

OleDbConnection con = new OleDbConnection (str);
OleDbCommand com = new OleDbCommand (sql, con);
Con. Open ();
Com. ExecuteNonQuery ();
Con. Close ();
}
Catch {}
}

Returns a htmlelementcollection and then queries the content
Private HtmlElementCollection htmltr_content (String strweb, String TJ)
{
Try
{
Generate HTMLDocument
WebBrowser Webb = new WebBrowser ();
Webb. Navigate ("About:blank");
Window.document returns a HTMLDocument object that represents the operation of an HTML document
The HTMLDocument object is established on the basis of XmlDocument, with all the method attributes of XmlDocument
HTMLDocument htmldoc = Webb. Document.opennew (TRUE);
Htmldoc. Write (Strweb);
HtmlElementCollection htmltr = Htmldoc. getElementsByTagName (TJ);

return htmltr;
}
catch {return null;}
}


Get the original URL code
private string YM (String Url)
{
String strresult = "";

Try
{
HttpWebRequest request = (HttpWebRequest) webrequest.create (URL);
Request. method = ' Get ';
HttpWebResponse response = (HttpWebResponse) request. GetResponse ();
Stream streamreceive = Response. GetResponseStream ();
Encoding Encoding = encoding.getencoding ("GB2312");
StreamReader StreamReader = new StreamReader (streamreceive, encoding);
strresult = Streamreader.readtoend ();
}
Catch {}

return strresult;
}
}

This program is not written too well, are all used for loop traversal out, the efficiency is not too high, the master can use multithreading pointing.



Related Article

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.