Java Implements crawler functionality

Source: Internet
Author: User

/**
* Crawl News information, encapsulated as entity Bean
*/
public class Getnews {
Public list<news> getnews () {
Storing News objects
list<news> list = new arraylist<news> ();
try {
Request DOM Document
Document document = Jsoup.connect ("http://baijia.baidu.com/"). get ();
Analytical
String selector = "H3>a";
Elements titlels = document.select (selector);

for (Element title:titlels) {
System.out.println ("Title---" + title.text ());
Request a tag again to get the content
String url = title.absurl ("href");
Document Document1 = jsoup.connect (URL). get ();
String selecttime = Document1.select ("Span[class=time]"). Text ();
SYSTEM.OUT.PRINTLN ("Time---" + selecttime);
String selectbody = Document1.select (
"Div[class=article-detail]"). Text ();
SYSTEM.OUT.PRINTLN ("Body---" + selectbody);
Make a News object join the list collection
News news = new News ();
News.settitle (Title.text ());
News.setbody (Selectbody);
News.setdate (Selecttime);
List.add (news);
}

} catch (IOException e) {
TODO auto-generated Catch block
E.printstacktrace ();
}

return list;
}

}

/*
* Save the received news object to the database
*/
public int Save (list<news> List) {

  //SQL prefix
  string sql = "INSERT INTO News (title,body,date) values";
  /*
   * This is the fastest way to insert a database
   */
  for (News news:list) {
 & Nbsp; sql = sql + "('" + news.gettitle () + "', '" + news.getbody () + "', '"
     + news.get Date () + "'),";
  }
  sql = sql.substring (0, Sql.length ()-1);
  system.out.println (SQL);
  int rows = basedao.executeupdate (SQL);
  return rows;
 }

/**
* Common tool classes for database connection
*
*/
public class Basedao {
Create need to get JDBC API
protected static Connection Connection = null;
protected static PreparedStatement PS = null;
protected static ResultSet rs = null;

01. Get a database connection
public static Boolean getconnection () {
/**
* Prerequisites for obtaining a 4-factor connection database for a database connection
*/
String Driver = Configmanager.getinstance (). GetValue ("Jdbc.driver");
String URL = configmanager.getinstance (). GetValue ("Jdbc.url");
String userName = Configmanager.getinstance (). GetValue ("Jdbc.username");
String password = configmanager.getinstance (). GetValue ("Jdbc.password");

  try {
   class.forname (driver);//Load driver
   connection = Drivermanager.getconnection (URL, userName, password);
  } catch (ClassNotFoundException e) {
   e.printstacktrace ();
   return false;
  } catch (SQLException e) {
   e.printstacktrace ();
   return false;
  }
  return true;
 }

/**
* 03. Change Executeupdate () returns an int represents the number of rows in the database that are affected by delete from user; Delete from
* User where id=? and name=?;
*/
public static int executeupdate (String sql, Object ... params) {
int rowNum = 0;
if (getconnection ()) {//Operation database affirms existing connection
try {
PS = connection.preparestatement (SQL);
Loop to the SQL statement? Placeholder Assignment
for (int i = 0; i < params.length; i++) {
Ps.setobject (i + 1, params[i]);
}
Execute SQL statement
RowNum = Ps.executeupdate ();
} catch (SQLException e) {
E.printstacktrace ();
} finally {
CloseConnection (); Close connection
}

}

return rowNum;
}

/**
* 04. Query ExecuteQuery () returns resultset select * from user; SELECT * FROM user
* Where id=? and name=?;
*/
public static ResultSet executeQuery (String sql, Object ... params) {
if (getconnection ()) {//Operation database affirms existing connection
try {
PS = connection.preparestatement (SQL);
Loop to the SQL statement? Placeholder Assignment
for (int i = 0; i < params.length; i++) {
Ps.setobject (i + 1, params[i]);
}
Execute SQL statement
rs = Ps.executequery ();
} catch (SQLException e) {
E.printstacktrace ();
}
}
Return RS;
}

02. Releasing Resources
public static Boolean CloseConnection () {
If none of the objects have been created? Can you close it? Non-null judgments must be made
if (rs! = null) {
try {
Rs.close ();
} catch (SQLException e) {
E.printstacktrace ();
return false;
}
}
if (PS! = null) {
try {
Ps.close ();
} catch (SQLException e) {
E.printstacktrace ();
return false;
}
}
if (connection! = null) {
try {
Connection.close ();
} catch (SQLException e) {
E.printstacktrace ();
return false;
}
}
return true;
}

}

/*
* input keyword query fuzzy query
  */
Public list<news> selectnews (String name) {
list<news> list = new arraylist<news> ();
String sql = "SELECT * from the news where title like?";
Object[] params = {"%" + name + "%"};
ResultSet rs = basedao.executequery (sql, params);
try {
Traversing result Sets
while (Rs.next ()) {
Create a News object
News news = new News ();
Get each column of each row
News.setid (Rs.getint ("id"));
News.settitle (rs.getstring ("title"));
News.setbody (rs.getstring ("body"));
News.setdate (rs.getstring ("date"));
List.add (news);
}
} catch (Exception e) {
Todo:handle exception
}
return list;
}

/*
* Single-instance read configuration file tool class
* */

public class ConfigManager {

01. Create your own static objects
private static ConfigManager manager = new ConfigManager ();
private static properties properties;

02. Privatization Structure
Private ConfigManager () {
Get the path to the configuration file
String Path = "Jdbc.properties";
Properties = new properties ();
Create an input stream
InputStream stream = ConfigManager.class.getClassLoader ()
. getResourceAsStream (path);
try {
Properties.load (stream);
} catch (IOException e) {
E.printstacktrace ();
} finally {
try {
Stream.Close ();
} catch (IOException e) {
E.printstacktrace ();
}
}

}

03. Provide an interface for external access
public static synchronized ConfigManager getinstance () {
Return manager;
}

Provides a way to obtain value from key
public static string GetValue (String key) {
return Properties.getproperty (key);
}

}

/*

*properties file

*/

Jdbc.url=jdbc\:mysql\://localhost\:3306/test
Jdbc.username=hhr
Jdbc.password=hhr
Jdbc.driver=com.mysql.jdbc.driver

Java Implements crawler functionality

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.