Test the SOFTWARE Environment:
1, 16G Windows7 x64 32core CPU.
2. JDK 1.7 Tomcat 6.x SOLR 4.8
Database Software Environment:
1, 16G Windows7 x64 32core CPU.
2. Oracle 11g
One, the SOLR Default index tool DIH.
Using SOLR dih index data, 19 million data, takes about 45 minutes, 6,500 per second/s, Total 39w no minutes.
The maximum heap memory for the associated JVM uses the default parameters for 4g,solr index CONFIG.
SOLR DIH Import:
Second, SOLRJ API index data.
Using the SOLRJ API is slightly less efficient, with a total of 30w per second, taking one hours.
The SOLR server configuration parameter is the same as above. On the client machine, read the database data and index it using the SOLRJ API. The code is as follows:
Import Java.io.ioexception;import java.sql.connection;import java.sql.drivermanager;import java.sql.ResultSet; Import Java.sql.sqlexception;import java.sql.statement;import Java.util.arraylist;import Java.util.HashMap;import Java.util.list;import Java.util.uuid;import Org.apache.solr.client.solrj.solrserver;import Org.apache.solr.client.solrj.solrserverexception;import Org.apache.solr.client.solrj.impl.httpsolrserver;import Org.springframework.context.applicationcontext;import Org.springframework.context.support.classpathxmlapplicationcontext;import org.springframework.util.StringUtils; Import Com.tianditu.search.v2.poi;public class Importpoi implements Ijobdef{private Solrserver server;private Datasourceconfig jdbcconfig;private solrconfig solrconfig;private poiimportconfig poiConfig;public DatasourceConfig Getjdbcconfig () {return jdbcconfig;} public void Setjdbcconfig (Datasourceconfig jdbcconfig) {this.jdbcconfig = Jdbcconfig;} Public Solrconfig Getsolrconfig () {return solrconfig;} Public void Setsolrconfig (Solrconfig solrconfig) {this.solrconfig = Solrconfig;} Public Poiimportconfig Getpoiconfig () {return poiconfig;} public void Setpoiconfig (Poiimportconfig poiconfig) {this.poiconfig = Poiconfig;} /** * @param args */public static void main (string[] args) {//TODO auto-generated method Stubapplicationcontext context = New Classpathxmlapplicationcontext ("App-spring.xml"); Importpoi Importtool = (importpoi) Context.getbean (" Importpoitool ") Importtool.submit (new Jobdonecallback () {public void OnCallback (Jobstatus status) {//TODO Auto-generated Method StubSystem.out.println (Status.getstatus ()); System.out.println (Status.getmessage ());}},new Jobtimer () {public void ontimeupdate (long timecost) {//TODO Auto-generated method StubSystem.out.println ("Solr commits once, is time-consuming from the beginning of the task:" +timecost/(1000*60) + "Minutes");});} Public Solrserver Getserver () {return server;} public void Setserver (Solrserver server) {this.server = server;} public boolean Importpoi (hashmap<string, object> params) {return false;} Private POI Getpoi (ResultSet rs) throws Sqlexception{poi poi = new poi ();p Oi.setid ((Uuid.randomuuid ()). ToString ()); Poi.setname (rs.getstring ("Nameforstore"));p oi.setaddress (rs.getstring ("Addressforstore")); String lat = rs.getstring ("lat"), if (Lat!=null&&!lat.equalsignorecase ("null") &&lat.length () >0) { Poi.setlat (double.valueof (LAT));} String lon = rs.getstring ("Lon"),//poi.setlon (Rs.getdouble ("Lon")), if (Lon!=null&&!lon.equalsignorecase (" Null ") &&lon.length () >0) {Poi.setlon (double.valueof (Lon));} Poi.setnid (rs.getstring ("DOCID")); String totalcity = rs.getstring ("totalcity"); Stringutils.isempty (totalcity)) {//---------citycodestring[] cities = Totalcity.split (""); List<string> cs = new arraylist<string> (); for (String c:cities) {cs.add (c);} Poi.setcities (CS);} String types = rs.getstring ("type"); Stringutils.isempty (Types)) {//type-----------------string[] TypeA = Types.split (""); list<string> t = new arraylist<string> (); for (StrinG C:typea) {T.add (c);} Poi.setcities (CS);p oi.settypes (t);} return poi;}; public void Submit (Jobdonecallback Callback,jobtimer timer) {if (solrconfig==null) {throw new IllegalArgumentException ( "SOLRJ is not configured correctly."); if (Jdbcconfig = = null) {throw new IllegalArgumentException ("JDBC is not configured correctly."); if (Poiconfig = = null) {throw new IllegalArgumentException ("The POI configuration file is not configured correctly."); Connection con = null; Statement PST = NULL; ResultSet rs = null; Solrserver SS = null; Jobstatus status = New Jobstatus () status.setname ("Importpoi"); Status.setstatus ("failure"); int i = 0;int c = 0;long start = System.currenttimemillis (); try {class.forname (Jdbcconfig.getdriverclass ()). newinstance (); con = Drivermanager.getconnection (Jdbcconfig.geturl (), Jdbcconfig.getusername (), Jdbcconfig.getpassword ()); int BatchSize = integer.valueof (Poiconfig.getimportrecordsize ()); ss = new Httpsolrserver (Solrconfig.getsolrurl ()); if ( Poiconfig.isdeleteonstartup ()) {ss.deletebyquery ("*:*"); Ss.commit ();} if (Jdbcconfig.getdriverclass (). ToString (). Contains ("MySQL") {//mysqlpst = (com.mysql.jdbc.Statement) con.createstatement (resultset.fetch_forward,resultset.concur_read_only );p st.setfetchsize (1);((com.mysql.jdbc.Statement) PST). Enablestreamingresults ();} Else{pst = Con.createstatement ();} rs = Pst.executequery (Poiconfig.getimportsql ()); POI p = null; list<poi> POIs = new arraylist<poi> (), while (Rs.next ()) {p = Getpoi (RS);//ss.addbean (P);p Ois.add (p); if (i >=batchsize) {Long Committ = System.currenttimemillis ();//system.out.println ("Elapsed Time:" + (Committ-start)/1000*60+ "minutes Timer.ontimeupdate (Committ-start),//system.out.println ("Submit Once"), Ss.addbeans (POIs); Ss.commit ();p ois.clear () ; c++;i=0;} else{i++;}} Ss.addbeans (POIs); Ss.commit (); Long end = System.currenttimemillis (); Status.setstatus ("Success"); Status.setmessage ("Processing success, Total time:" + (End-start)/1000*60+ "minutes"); Status.settimecost ((End-start)/1000*60);} catch (SQLException e) {//TODO auto-generated catch Block//e.printstacktrace (); Status.setmessage (e.tostring ());} catch (ClassNotFoundException e) {//TODO AUto-generated catch Block//e.printstacktrace (); Status.setmessage (e.tostring ());} catch (Instantiationexception e) {//TODO auto-generated catch Block//e.printstacktrace (); Status.setmessage ( E.tostring ());} catch (Illegalaccessexception e) {//TODO auto-generated catch Block//e.printstacktrace (); Status.setmessage ( E.tostring ());} catch (Solrserverexception e) {//TODO auto-generated catch Block//e.printstacktrace (); Status.setmessage (E.tostring () );} catch (IOException e) {//TODO auto-generated catch Block//e.printstacktrace (); Status.setmessage (e.tostring ());} Finally{try {if (rs!=null) {Rs.close ()}} catch (SQLException e) {//TODO auto-generated catch Blocke.printstacktrace ();} try {if (pst!=null) Pst.close ();} catch (SQLException e) {//TODO auto-generated catch Blocke.printstacktrace ();} try {if (con!=null) Con.close ();} catch (SQLException e) {//TODO auto-generated catch Blocke.printstacktrace ();} if (callback!=null) {callback.oncallback (status);}} return false;};}
The whole process is to read the database, turn the data into a DTO, and then insert the SOLR server via Solrserver.addbeans, call Solrserver.commit for index submission (you can query the results).
Read the conversion process code as follows:
Private POI Getpoi (ResultSet rs) throws Sqlexception{poi poi = new poi ();p Oi.setid ((Uuid.randomuuid ()). ToString ()); Poi.setname (rs.getstring ("Nameforstore"));p oi.setaddress (rs.getstring ("Addressforstore")); String lat = rs.getstring ("lat"), if (Lat!=null&&!lat.equalsignorecase ("null") &&lat.length () >0) { Poi.setlat (double.valueof (LAT));} String lon = rs.getstring ("Lon"),//poi.setlon (Rs.getdouble ("Lon")), if (Lon!=null&&!lon.equalsignorecase (" Null ") &&lon.length () >0) {Poi.setlon (double.valueof (Lon));} Poi.setnid (rs.getstring ("DOCID")); String totalcity = rs.getstring ("totalcity"); Stringutils.isempty (totalcity)) {//---------citycodestring[] cities = Totalcity.split (""); List<string> cs = new arraylist<string> (); for (String c:cities) {cs.add (c);} Poi.setcities (CS);} String types = rs.getstring ("type"); Stringutils.isempty (Types)) {//type-----------------string[] TypeA = Types.split (""); list<string> t = new arraylist<string> (); for (String C: TypeA) {t.add (c);} Poi.setcities (CS);p oi.settypes (t);} return poi;};
SOLRJ Index Procedure Code:
list<poi> POIs = new arraylist<poi> (), while (Rs.next ()) {//traverse jdbc Resultsetp = Getpoi (RS);//ss.addbean (P); Pois.add (P); if (i>=batchsize) {//Quantitative bulk index logic long Committ = System.currenttimemillis ();//system.out.println ("Elapsed Time:" + ( Committ-start)/1000*60+ "minutes"); Timer.ontimeupdate ((Committ-start));//system.out.println ("Submit Once") Ss.addbeans (POIs );//To Solrserverss.commit ();p ois.clear (); c++;i=0;} else{i++;}} Ss.addbeans (POIs);//Make Final submission ss.commit ();
Analysis:
1, the performance difference is mainly where?
A: The main difference between the scheme one and the scheme is that once the program accesses the data, it calls the SOLR internal Updatehandler directly and puts the data into the index. And scenario two, call SOLRJ index data, a more network IO. Also, in scenario two, before the SOLRJ index, the data is converted to a dto, then solrj the DTO into a Solrinputdocument object, and then the Solrinputdocument object is converted to the string required by the SOLR rest interface, with multiple transformations in the middle , there is also a performance loss (note: The way to call SOLRJ Addbeans Bulk Import index is to improve performance, if one commits, performance will be worse, HTTP requests more).
2, how to optimize?
Answer: The analysis of problem one is the answer to question two. The main so many data entity conversion that block, the main compliance: 1, using the call interface as simple as possible, using ResultSet directly converted to solrinputdocument objects, less data conversion. 2, the use of arrays and other data structures, replace the current list<bean>.
SOLRJ API Index Efficiency comparison analysis