First write-How to efficiently parse large XML files with sax

Source: Internet
Author: User
Tags polyline return tag
Recently used an XML file to read it and then save it to the database. But there are some ways to find out on the Internet that the problem is not that the file is too big to cause the heap to overflow, that the efficiency is too low, millions of data will take several hours or even one or two days, so I have to figure out that I wrote one. is to use SAX parsing, storage database for SQLserver2012, finally for nearly 3 G contains millions of data XML file, parse time about twenty or thirty minutes bar, efficiency can also, of course, still need to improve, not use multithreading method, after all, is a beginner, Many things are their own elusive, I hope we have a lot of guidance.
Package Xmlreadersecond;
Import java.io.IOException;
Import java.sql.Connection;
Import Java.sql.DriverManager;
Import java.sql.PreparedStatement;
Import java.sql.SQLException;
Import Java.sql.Timestamp;
Import Java.text.SimpleDateFormat;
Import java.util.ArrayList;
Import Java.util.Date;

Import java.util.List;
Import javax.xml.parsers.ParserConfigurationException;
Import Javax.xml.parsers.SAXParser;

Import Javax.xml.parsers.SAXParserFactory;
Import org.xml.sax.Attributes;
Import org.xml.sax.SAXException;
Import Org.xml.sax.XMLReader;

Import Org.xml.sax.helpers.DefaultHandler;
Import Xmlreadersecond.beanlisthandler;
Import Xmlreadersecond.dbconnect;
Import Xmlreadersecond.nodes;
Import Xmlreadersecond.way;

Import Xmlreadersecond.xmlreadersecond; public class Xmlreadersecond {* * * fuction: For parsing oversized XML files and storing * author:hz * time:2016.08.09.22:00///..... The following is a third method that solves the problem of large file reads and is highly efficient .... public void Readpolyandpointxml () throws Parserconfigurationexception, SAXException, ioexception{//Create Resolution factory SAXParserFactory factory = Saxparserfactory.newinstance (); 
Create parser SAXParser parser = Factory.newsaxparser (); 
Get reader XMLReader reader = Parser.getxmlreader (); 
Set Content Processor Beanlisthandler handler = new Beanlisthandler (); 
Reader.setcontenthandler (handler); 
Read XML document//Reader.parse ("C:/users/administrator/desktop/xmlceshi.txt"); 
Reader.parse ("C:/users/administrator/desktop/seattlexmlceshi.osm"); public static void Main (string[] args) throws Parserconfigurationexception, Saxexception, ioexception{long starttime=s

Ystem.currenttimemillis ();//Record start time date datestart =new date ();
Xmlreadersecond readersecond=new Xmlreadersecond ();

Readersecond.readpolyandpointxml ();
Dbconnect dbmysql=new dbconnect ();
Connection Connection=dbmysql.connmssql (); try{System.out.println ("warehousing is complete.) Cleaning up invalid data, please later ...
");
String strsql= ""; strSQL = "Delete from poly where ID." (select DISTINCT (ID) from poly where polytype=3) and polytype=2 ";/" set field UpdateData all to 0 to differentiate the newData PreparedStatement psstatement = Connection.preparestatement (strSQL);

Psstatement.executeupdate (); }catch (Exception ex) {ex.printstacktrace ();} long Endtime=system.currenttimemillis ();//record End time float exctime= (float) (
Endtime-starttime)/1000;
int hours= (int) (exctime/3600);
int minutes= (int) ((exctime%3600)/60);
Float seconds= (exctime%3600)%60;
SimpleDateFormat df = new SimpleDateFormat ("Yyyy-mm-dd HH:mm:ss"); SYSTEM.OUT.PRINTLN ("Done!") Begins at: "+df.format (DateStart) +" ends with: "+df.format (New Date ()) +" time consuming: "+ hours+": "+minutes+" minute: "+seconds+" seconds.

");
System.exit (0); } //。。。。。。。。。。。。。。。。 The above is the third method, which is preferable ... Class Beanlisthandler extends DefaultHandler {private string curretntag= ""; private string idcontents= ""; private
String versioncontents= "";
Private String uidioncontents= "";
Private String usercontents= "";
Private String loncontents= "";
Private String latcontents= "";
Private String changesetcontents= "";
Private String timestampcontents= "";
Private String visiblecontents= ""; PrivateString kvcontentsw= "";
Private String kvcontentsn= "";
Private String pointids= "";
Private Nodes Nodes;

Private Way Way; 
Private list<nodes> nodeslist; 

Private list<way> waylist;
Boolean type=false,temtympe=false;
Private Integer countp=0;
Private Integer countw=0; Private integer plagn=0;//is used to extract only the first two labels private integer plagw=0;//used to extract only the first two labels public void Startdocument () throws Saxexceptio 
n{nodeslist = new arraylist<nodes> (); 
Waylist = new arraylist<way> (); System.out.println ("Reading the XML document, please count to 2 seconds ...")
"); System.out.println ("Historical data is in storage, if the amount of data is too large for a period of time, please be patient ...")

"); public void Startelement (string uri, String localname, String qName, Attributes Attributes) throws Saxexception {if ( "Node". Equals (QName)) {if (Attributes.getvalue ("id")!=null&&attributes.getvalue ("id")!= "") idcontents=
Attributes.getvalue ("id");
else idcontents= "0"; if (Attributes.getvalue ("version")!=null&&attributes.getvalue ("version")!= "") versioncontents= Attributes.getvalUE ("version");
else versioncontents= "0"; if (Attributes.getvalue ("UID")!=null&&attributes.getvalue ("UID")!= "") uidioncontents=attributes.getvalue
("UID");
else uidioncontents= "0"; if (Attributes.getvalue ("User")!=null&&attributes.getvalue ("user")!= "") usercontents=attributes.getvalue
("User");
else usercontents= "0"; if (Attributes.getvalue ("Lon")!=null&&attributes.getvalue ("Lon")!= "") Loncontents=attributes.getvalue ("
Lon ");
else loncontents= "0"; if (Attributes.getvalue ("lat")!=null&&attributes.getvalue ("lat")!= "") Latcontents=attributes.getvalue ("
Lat ");
else latcontents= "0"; if (Attributes.getvalue ("Changeset")!=null&&attributes.getvalue ("changeset")!= "") changesetcontents=
Attributes.getvalue ("changeset");
else changesetcontents= "0"; if (Attributes.getvalue ("timestamp")!=null&&attributes.getvalue ("timestamp")!= "") timestampcontents=
Attributes.getvalue ("timestamp");
else timestampcontents= "0"; if (Attributes.getvalue ("visible")!=null&&atTributes.getvalue ("visible")!= "")//no longer stores visible field//Visiblecontents=attributes.getvalue ("visible");
else//visiblecontents= "0";
curretntag= "Node"; 

countp++;
} if ("Node". Equals (Curretntag) && "tag". Equals (QName) &&plagn<4) {String kcontents= "";
String vcontents= ""; 
Kcontents=attributes.getvalue ("K"); 
Vcontents=attributes.getvalue ("V");
kvcontentsn+=kcontents+ "=" +vcontents+ ";"; plagn++;//is used to extract only the first 4 tags} if ("Way". Equals (QName)) {//To way operation if (Attributes.getvalue ("id")!=null&&
Attributes.getvalue ("id")!= "") Idcontents=attributes.getvalue ("id");
else idcontents= "0"; if (Attributes.getvalue ("version")!=null&&attributes.getvalue ("version")!= "") versioncontents=
Attributes.getvalue ("version");
else versioncontents= "0"; if (Attributes.getvalue ("UID")!=null&&attributes.getvalue ("UID")!= "") uidioncontents=attributes.getvalue
("UID");
else uidioncontents= "0"; if (Attributes.getvalue ("User")!=null&&attributes.getvalue ("user")!= "") usercontents=attrIbutes.getvalue ("user");
else usercontents= "0"; if (Attributes.getvalue ("Changeset")!=null&&attributes.getvalue ("changeset")!= "") changesetcontents=
Attributes.getvalue ("changeset");
else changesetcontents= "0"; if (Attributes.getvalue ("timestamp")!=null&&attributes.getvalue ("timestamp")!= "") timestampcontents=
Attributes.getvalue ("timestamp");
else timestampcontents= "0"; if (Attributes.getvalue ("visible")!=null&&attributes.getvalue ("visible")!= "")//no longer stores visible field//
Visiblecontents=attributes.getvalue ("visible");
else//visiblecontents= "0";
Curretntag= "Way";
countw++;
} if ("Way". Equals (Curretntag) && "tag". Equals (QName) &&plagw<2) {String kcontents= "";
String vcontents= ""; 
Kcontents=attributes.getvalue ("K"); 
Vcontents=attributes.getvalue ("V");
kvcontentsw+=kcontents+ "=" +vcontents+ ";"; plagw++;//is used to extract only the first two tags//to ND operations if ("Way". Equals (Curretntag) && "nd". Equals (QName)) {String ref= ""; ref= 
Attributes.getvalue ("ref"); Pointids+=ref+ ";";}
@Override public void EndElement (string uri, String localname, String qName) throws Saxexception {//Node processing if ("N Ode ". Equals (QName)) {nodes=new nodes (); Nodes.setid (idcontents); nodes.setversion (versioncontents); Nodes.setuid (
uidioncontents);
Nodes.setuser (usercontents);
Nodes.setlon (loncontents);
Nodes.setlat (latcontents);
Nodes.setchangeset (changesetcontents);
Nodes.settimestamp (timestampcontents);
Nodes.setvisible (visiblecontents); 

Nodes.settag (KVCONTENTSN);
Nodeslist.add (nodes);
Process the waylist to be full if (nodeslist.size () >=100000) {dbconnect dbmysql=new dbconnect ();
Connection Connection=dbmysql.connmssql (); for (int i=0;i<nodeslist.size (); i++) {try {String strsql= '; strsql= ' Insert into point (Id,version,lon,lat,userid,
Username,changeset,timestamp,visible,tag,updatedata) Values (?,?,?,?,?,?,?,?,?,?, 0) "; System.out.println ("Point Id:" +nodeslist.get (i) getId () + "version:" +nodeslist.get (i). GetVersion () + "timestamp:" +nodeslist.get (i). Gettimestamp () + "tag:" + NODESLIST.GET (i). Gettag ());
PreparedStatement PS = connection.preparestatement (strSQL); 
Ps.setlong (1, Long.parselong (Nodeslist.get (i). GetId ()));
Ps.setint (2, Integer.parseint (Nodeslist.get (i). GetVersion ()));
Ps.setstring (3,nodeslist.get (i). Getlon ()); 
Ps.setstring (4,nodeslist.get (i). Getlat ()); 
Ps.setlong (5, Long.parselong (Nodeslist.get (i). Getuid ()));
Ps.setstring (6, Nodeslist.get (i). GetUser ());
Ps.setstring (7, Nodeslist.get (i). Getchangeset ());
Ps.settimestamp (8, timestamp.valueof (Nodeslist.get (i). Gettimestamp (). Replace ("T", ""). Replace ("Z", ""));
Ps.setstring (9, Nodeslist.get (i). getvisible ());
Ps.setstring (Nodeslist.get (i). Gettag ());
Ps.executeupdate ();

catch (SQLException e) {e.printstacktrace ();}
} nodeslist.clear ();
} kvcontentsn= "";
Curretntag= "";
plagn=0;
Nodes=null; ///Way processing if ("Way". Equals (QName)) {//To process the Nodeslist set not previously processed if (!nodeslist.isempty ()) {Dbconnect dbmysql=new
Dbconnect ();
Connection Connection=dbmysql.connmssql (); for (int i=0;i<nodeslist.size (); i++) {try {String strsql= "; strsql=" Insert into point (Id,version,lon,lat,userid,username,changeset,timestamp,visible,tag,
UpdateData) Values (?,?,?,?,?,?,?,?,?,?, 0) "; System.out.println ("Point Id:" +nodeslist.get (i) getId () + "version:" +nodeslist.get (i). GetVersion () + "timestamp:"
+nodeslist.get (i). Gettimestamp () + "tag:" +nodeslist.get (i)-gettag ());
PreparedStatement PS = connection.preparestatement (strSQL); 
Ps.setlong (1, Long.parselong (Nodeslist.get (i). GetId ()));
Ps.setint (2, Integer.parseint (Nodeslist.get (i). GetVersion ()));
Ps.setstring (3,nodeslist.get (i). Getlon ()); 
Ps.setstring (4,nodeslist.get (i). Getlat ()); 
Ps.setlong (5, Long.parselong (Nodeslist.get (i). Getuid ()));
Ps.setstring (6, Nodeslist.get (i). GetUser ());
Ps.setstring (7, Nodeslist.get (i). Getchangeset ());
Ps.settimestamp (8, timestamp.valueof (Nodeslist.get (i). Gettimestamp (). Replace ("T", ""). Replace ("Z", ""));
Ps.setstring (9, Nodeslist.get (i). getvisible ());
Ps.setstring (Nodeslist.get (i). Gettag ());
Ps.executeupdate (); catch (SqlexcEption e) {e.printstacktrace ();}
} nodeslist.clear ();
} way=new Way ();
Nodes=new nodes ();
Way.setid (idcontents);
Way.setversion (versioncontents);
Way.setuid (uidioncontents);
Way.setuser (usercontents);
Way.setchangeset (changesetcontents);
Way.settimestamp (timestampcontents);
Way.settag (KVCONTENTSW);
Way.setpointids (Pointids); 

Way.setvisible (visiblecontents);

Waylist.add (way);
Process the waylist to be full if (waylist.size () >=100000) {dbconnect dbmysql=new dbconnect ();
Connection Connection=dbmysql.connmssql ();
for (int i=0;i<waylist.size (); i++) {try{String strsql= ""; if (Polygonorpolyline (Waylist.get (). Getpointids ())) 
Type=true;//polygon;
else Type=false; if (type==true) {//strsql = "Insert into polygon (id,version,userid,username,changeset,timestamp,visible,tag,pointids
) Values (?,?,?,?,?,?,?,?,?) "; strSQL = "Insert into poly (id,version,userid,username,changeset,timestamp,visible,tag,pointids,polytype,updatedata
) Values (?,?,?,?,?,?,?,?,?, 3,0) "; } if (type==false) {//strsql = InsertInto Polyline (id,version,userid,username,changeset,timestamp,visible,tag,pointids) Values (?,?,?,?,?,?,?,?,?) "; strSQL = "Insert into poly (id,version,userid,username,changeset,timestamp,visible,tag,pointids,polytype,updatedata
) Values (?,?,?,?,?,?,?,?,?, 2,0) "; }//system.out.println ("Poly Id:" +waylist.get (i). GetId () + "version:" +waylist.get (i). GetVersion () + "tag:" +
Waylist.get (i). Gettag ()); 
PreparedStatement PS = connection.preparestatement (strSQL); 
Ps.setlong (1, Long.parselong (Waylist.get (i). GetId ())); 
Ps.setint (2, Integer.parseint (Waylist.get (i). GetVersion ())); 
Ps.setlong (3, Long.parselong (Waylist.get (i). Getuid ()));
Ps.setstring (4, Waylist.get (i). GetUser ());
Ps.setstring (5, Waylist.get (i). Getchangeset ());
Ps.settimestamp (6, timestamp.valueof (Waylist.get (i). Gettimestamp (). Replace ("T", ""). Replace ("Z", ""));
Ps.setstring (7, Waylist.get (i). getvisible ());
Ps.setstring (8,waylist.get (i). Gettag ());
Ps.setstring (9, Waylist.get (i). Getpointids ());
Ps.executeupdate ();
catch (Exception e) {E.printstacktrace ();
} waylist.clear ();
} kvcontentsw= "";
Curretntag= "";
Pointids= "";
plagw=0;
Way=null; ()} public void Enddocument () throws saxexception{//processing of previously unhandled waylist if (!waylist.isempty ()) {Dbconnect dbmysql=n
EW Dbconnect ();
Connection Connection=dbmysql.connmssql ();
for (int i=0;i<waylist.size (); i++) {try{String strsql= ""; if (Polygonorpolyline (Waylist.get (). Getpointids ())) 
Type=true;//polygon;
else Type=false; if (type==true) {//strsql = "Insert into polygon (id,version,userid,username,changeset,timestamp,visible,tag,pointids
) Values (?,?,?,?,?,?,?,?,?) "; strSQL = "Insert into poly (id,version,userid,username,changeset,timestamp,visible,tag,pointids,polytype,updatedata
) Values (?,?,?,?,?,?,?,?,?, 3,0) "; } if (type==false) {//strsql = "Insert into polyline (Id,version,userid,username,changeset,timestamp,visible,tag,
Pointids) Values (?,?,?,?,?,?,?,?,?) "; strSQL = "Insert into poly (id,version,userid,username,changeset,timestamp,visible,tag,pointids,polytype,updatedatA) Values (?,?,?,?,?,?,?,?,?, 2,0) "; }//system.out.println ("Poly Id:" +waylist.get (i). GetId () + "version:" +waylist.get (i). GetVersion () + "tag:" +
Waylist.get (i). Gettag ()); 
PreparedStatement PS = connection.preparestatement (strSQL); 
Ps.setlong (1, Long.parselong (Waylist.get (i). GetId ())); 
Ps.setint (2, Integer.parseint (Waylist.get (i). GetVersion ())); 
Ps.setlong (3, Long.parselong (Waylist.get (i). Getuid ()));
Ps.setstring (4, Waylist.get (i). GetUser ());
Ps.setstring (5, Waylist.get (i). Getchangeset ());
Ps.settimestamp (6, timestamp.valueof (Waylist.get (i). Gettimestamp (). Replace ("T", ""). Replace ("Z", ""));
Ps.setstring (7, Waylist.get (i). getvisible ());
Ps.setstring (8,waylist.get (i). Gettag ());
Ps.setstring (9, Waylist.get (i). Getpointids ());
Ps.executeupdate (); 
catch (Exception e) {e.printstacktrace ();
} waylist.clear (); } Private Boolean Polygonorpolyline (String nodes) {//used to differentiate line and surface data//if return True, the poly is a polygon. If (Nodes.len
Gth () <1) return false;
string[] ss = Nodes.split (";");if (ss.length<4) return false;
if (Ss[0].equals (Ss[ss.length-1])) return true;
else return false; 
The class nodes{//corresponds to node in XML, the point private String ID in the database; 
Private String version;
Private String uid; 
Private String user; 
Private String Lon; 
Private String lat; 
Private String Changeset; 
Private String timestamp; 
private String visible;

Private String tag; public string Gettag () {return tag.} public void Settag (string tag) {This.tag = tag,} public string GetId () {return I
D public void SetId (string id) {this.id = ID;} public String getversion () {return version;} public void Setversion (STR ing version {this.version = version; public String Getuid () {return UID;} public void SetUid (String uid) {This.uid
= UID; 
The public string GetUser () {return user} is public void SetUser (string user) {This.user = user}, public string Getlon () {
return lon; The public void Setlon (string lon) {This.lon = lon} is public string Getlat () {return lat;} public void Setlat (StrinG lat) {This.lat = lat;} public String Getchangeset () {return changeset.} public void Setchangeset (String changeset) {
This.changeset = changeset; Public String Gettimestamp () {return timestamp.} public void Settimestamp (String timestamp) {This.timestamp = Timesta
mp 
Public String getvisible () {return visible.} public void setvisible (String visible) {this.visible = visible;}} 
Class way{//corresponds to the Way in the XML, the Polylin in the database, and the polygon private String ID; 
Private String version;
Private String uid; 
Private String user; 
Private String Changeset; 
Private String timestamp;
Private String tag;
Private String point;
Private String Pointids; 

private String visible; Public String getvisible () {return visible.} public void setvisible (String visible) {this.visible = visible;} public S Tring Getpointids () {return pointids.} public void Setpointids (String pointids) {this.pointids = pointids;} public Str ing GetPoint () {return point;} public void SetPoint (String point) {This.point = point;  public string Gettag () {return tag.} public void Settag (string tag) {This.tag = tag,} public string GetId () {return
Id public void SetId (string id) {this.id = ID;} public String getversion () {return version;} public void Setversion (STR ing version {this.version = version; public String Getuid () {return UID;} public void SetUid (String uid) {This.uid
= UID; public string GetUser () {return User:} public void SetUser (string user) {This.user = user}, public string GetChanges ET () {return changeset.} public void Setchangeset (String changeset) {this.changeset = changeset;} public String Gettim

Estamp () {return timestamp.} public void Settimestamp (String timestamp) {this.timestamp = timestamp;} Class Dbconnect {//Connection SQL Server database public String dbname = ' ceshixml '; public Connection connmssql () {String Driverna 
me = "Com.microsoft.sqlserver.jdbc.SQLServerDriver"; String Dburl = "jdbc:sqlserver://localhost:1433;" 
Databasename= "+this.dbname; String UserName = "sa"; Default User name String userpwd = "123456";
Password Connection dbconn = null; 
try {class.forname (drivername); dbconn = Drivermanager.getconnection (Dburl, UserName, userpwd);
catch (Exception e) {e.printstacktrace ();} return dbconn; } 

}


Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.