Write a simple reptile with jsoup and save the data crawled from the Web page to the database __java

Source: Internet
Author: User
Tags getdate

Today, I studied some basic usages of jsoup to share with you. I first put the data from the top of the page into an object, and then put the object into the HashMap, and finally through JDBC to save to the database.

Today, the site of the crawler is Zhaopin. Each recruitment message can be viewed as an object. Then you need to have a custom javabeen class.

There are two jar packs to use, respectively: Jsoup-1.10.3.jar and Mysql-connector-java-5.1.39.jar two packages.

Package javabeen;

Import Java.util.Date; /** * Work Custom class * * @author XML */public class Job {private string position;//position private string company;//company name pri  Vate string compensation;//Payroll private string workplace;//work place private string date;//release date private string education;// Education private string experience;//work experience private string type;//position category private string number;//number of jobs private string Jobdesc ription;//Job Description Private String comdescription;//Corporate Description Public Job (string position, string company, string compensation, S Tring Workplace, string date, string education, string experience, String type, string number, string jobdescription, S
		Tring comdescription) {super ();
		This.position = position;
		This.company = Company;
		This.compensation = compensation;
		This.workplace = workplace;
		This.date = date;
		This.education = education;
		This.experience = experience;
		This.type = type;
		This.number = number;
		This.jobdescription = jobdescription; This.comdescription = CoMdescription;
	Public Job () {super ();
	}/** * @return the position */public String getPosition () {return position; }/** * @param position the position to set */public void setposition (String position) {this.position = position
	;
	/** * @return The company/public String Getcompany () {return company;
	/** * @param Company of Company to set */public void Setcompany (String company) {this.company = Company;
	}/** * @return The compensation */public String getcompensation () {return compensation; /** * @param compensation The compensation to set */public void setcompensation (String compensation) {this.com
	Pensation = compensation;
	}/** * @return the workplace */public String Getworkplace () {return workplace; }/** * @param workplace the Workplace to set */public void Setworkplace (String workplace) {this.workplace = Wor
	Kplace; }/** * @return The date/public String getDate () {return Date;
	/** * @param date the date to set */public void setdate (String date) {this.date = date;
	}/** * @return the education */Public String geteducation () {return education; /** * @param education The education to set */public void seteducation (String education) {this.education = edu
	cation;
	}/** * @return the experience */public String getexperience () {return experience; /** * @param experience the experience to set */public void setexperience (String experience) {This.experience
	= experience;
	}/** * @return the type */public String GetType () {return type;
	}/** * @param type the type to set */public void SetType (String type) {this.type = type;
	}/** * @return the number */public String GetNumber () {return number;
	}/** * @param number The number to set */public void Setnumber (String number) {this.number = number; }/** * @return the jobdescription */public String GETJOBDEscription () {return jobdescription;
		/** * @param jobdescription the jobdescription to set */public void setjobdescription (String jobdescription) {
	This.jobdescription = jobdescription;
	}/** * @return the comdescription */public String getcomdescription () {return comdescription;
		/** * @param comdescription the comdescription to set */public void setcomdescription (String comdescription) {
	This.comdescription = comdescription; }/* (non-javadoc) * @see java.lang.object#tostring () */@Override public String toString () {return "Job [Positi  On= "+ position +", company= "+ company +", compensation= "+ compensation +", workplace= "+ Workplace +", date= "+ Date + ", education=" + Education + ", experience=" + Experience + ", type=" + Type + ", number=" + number + ", Jobde
	Scription= "+ jobdescription +", comdescription= "+ Comdescription +"];
 }

}
And then we do the reptile operation.

Package control;
Import java.io.IOException;
Import java.sql.Connection;
Import java.sql.PreparedStatement;
Import java.sql.SQLException;
Import Java.util.HashMap;
Import Java.util.ListIterator;

Import Java.util.Set;
Import Org.jsoup.Jsoup;
Import org.jsoup.nodes.Document;
Import org.jsoup.nodes.Element;


Import org.jsoup.select.Elements; Import Javabeen.
Job;


Import Utils.datautils; public class Spider {static String url = ' http://sou.zhaopin.com/jobs/searchresult.ashx?jl=%E9%83%91%E5%B7%9E&kw=
	Java&sm=0&p=3 ";
	static int i = 0;
        
        public static void Body1 () throws ioexception{Document doc = jsoup.connect (URL). get (); 	
Element element = Doc.select ("div#newlist_list_content_table").	
        SYSTEM.OUT.PRINTLN (Element);
		Further gets the object corresponding to the table element Elements Elements tables = element.select ("table");
		Hashmap<integer, Job>hmap = new Hashmap<integer, job> (); listiterator<element> listiter = tables.listiterator (1);
		while (Listiter.hasnext ()) {//System.out.println (Listiter.next ());
			
			Element table = Listiter.next ();
			Element link = table.select ("TR&GT;TD.ZWMC"). Select ("a").
			Element Link1 = Table.select ("TR&GT;TD.GSMC"). Select ("a").
			Element link2 = Table.select ("Tr>td.zwyx").
			Element Link3 = Table.select ("Tr>td.gzdd").
			Element Link4 = Table.select ("TR&GT;TD.GXSJ"). Select ("span").
			Element link5 = Table.select ("TR&GT;TD.GXSJ"). Select ("span").
			Job Job = new Job ();
			String url1=link.attr ("href"). toString ();
			Document Doc1 = Jsoup.connect (URL1). get ();
			
Element element1 = Doc1.select ("Ul#terminal-ul clearfix").
			System.out.println (element1+ "= = =");
			Link.text ();
			
			i++;
			Job.setposition (Link.text (). toString ());
			Job.setcompany (Link1.text (). toString ());
			Job.setcompensation (Link2.text (). toString ());
			Job.setworkplace (Link3.text (). toString ());
Job.setdate (Link4.text (). toString ());			Hmap.put (i, job);
		
		} set<integer> keys = Hmap.keyset ();
			for (Integer key:keys) {Job value = Hmap.get (key);
			Connection conn = null;
				try {PreparedStatement PS = null;
				conn = Datautils.getconnection ();
				String sql = INSERT INTO job (position,company,compensation,workplace,date) values (?,?,?,?,?);
				PS = conn.preparestatement (SQL);
				Ps.setstring (1, value.getposition ());
				Ps.setstring (2, Value.getcompany ());
				Ps.setstring (3, Value.getcompensation ());
				Ps.setstring (4, Value.getworkplace ());
				Ps.setstring (5, Value.getdate ());
				Ps.executeupdate ();
			Conn.close ();
				catch (SQLException e) {//TODO auto-generated catch block E.printstacktrace ();
			SYSTEM.OUT.PRINTLN ("Database access failed");
		} System.out.println (key+ "," +value.tostring ());
	} public static void Main (string[] args) throws IOException, ClassNotFoundException, SQLException {body1 ();
 }
}

To access a database, you use a database tool class:

Package utils;

Import java.sql.Connection;
Import Java.sql.DriverManager;
Import java.sql.SQLException;


public class Datautils {public
	 static final String Driver = "Com.mysql.jdbc.Driver";  
	    public static final String URL = "Jdbc:mysql://localhost:3306/nyb?usessl=true";  
	    public static final String user = "root";  
	    public static final String password = "123456";  
	  
	    static {  
	        try {  
	            class.forname (Driver);  
	        } catch (ClassNotFoundException e) {  
	            e.printstacktrace ();  
	        }  
	    }  
	    public static Connection getconnection () throws sqlexception{  
	        Connection = null;  
	        conn = Drivermanager.getconnection (Url,user,password);  
	        Return conn  
	    }  
}



Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.