Java read operations Big data Excel

Source: Internet
Author: User

Work requires that you read large amounts of data in Excel. Read in the normal mode of Apache poi and throw a memory overflow. The query document learns that there is another pattern-user mode. This mode does not suddenly complete the entire file load in memory, but a row of reads, so that memory overflow can be avoided.

On the code:

Package Com.ism.excel.pkg07;import Java.io.inputstream;import Java.sql.sqlexception;import java.util.ArrayList; Import Java.util.iterator;import Java.util.list;import Org.apache.poi.xssf.eventusermodel.xssfreader;import Org.apache.poi.xssf.model.sharedstringstable;import Org.apache.poi.xssf.usermodel.xssfrichtextstring;import Org.apache.poi.openxml4j.opc.opcpackage;import Org.xml.sax.attributes;import Org.xml.sax.InputSource;import Org.xml.sax.saxexception;import Org.xml.sax.xmlreader;import Org.xml.sax.helpers.defaulthandler;import org.xml.sax.helpers.xmlreaderfactory;/** * XSSF and sax (Event API) */public abstract class Xxlsabstract extends Defaultha Ndler {private sharedstringstable sst;private String lastcontents;private boolean nextisstring;private int sheetindex =- 1;private list<string> rowlist = new arraylist<string> ();p rivate int currow = 0;//current line private int curcol = 0;// Current column index private int precol = 0;//previous column column index private int titlerow = 0;//header row, typically 0private int rowsize = 0;//number of columns//excel record row operation method, with sheet index, row index and row element list as parameters, operation on sheet line element, element is type public abstract void optrows (int sheetindex, int Currow, list<string> rowlist) throws sqlexception;//only traverse one sheet, where sheetID is the sheet index to traverse, starting from 1, 1-3/** * * @param FileName * @param sheetid sheetID is the sheet index to traverse, starting from 1, 1-3 * @throws Exception */public void Processonesheet (String filename, int sheetID) throws Exception {Opcpackage pkg = opcpackage.open (filename); Xssfreader r = new Xssfreader (pkg); Sharedstringstable SST = r.getsharedstringstable (); XMLReader parser = Fetchsheetparser (SST);//RID2 found by processing the workbook//find rid# s based on rsheet# or Sheetinputstream Heet2 = R.getsheet ("rId" +sheetid), Sheetindex++;inputsource sheetsource = new InputSource (Sheet2);p Arser.parse ( Sheetsource); Sheet2.close ();} /** * Traverse Excel file */public void process (String filename) throws Exception {Opcpackage pkg = opcpackage.open (filename); Xssfreader r = new Xssfreader (pkg); Sharedstringstable SST = r.getsharedstringstable (); XMLReader parser = FETchsheetparser (SST);iterator<inputstream> sheets = R.getsheetsdata (); while (Sheets.hasnext ()) {currow = 0; Sheetindex++;inputstream sheet = sheets.next () inputsource sheetsource = new InputSource (sheet);p arser.parse ( Sheetsource); Sheet.close ();}} Public XMLReader Fetchsheetparser (sharedstringstable SST) throws Saxexception {XMLReader parser = Xmlreaderfactory.createxmlreader ();//.createxmlreader ("Org.apache.xerces.parsers.SAXParser"); this.sst = SST; Parser.setcontenthandler (this); return parser;} public void Startelement (string uri, String localname, String Name,attributes Attributes) throws Saxexception {//C = = Cell if (name.equals ("C")) {//If the next element is an index of SST, nextisstring is marked as truestring Celltype = Attributes.getvalue ("T"); String rowstr = Attributes.getvalue ("R"); Curcol = This.getrowindex (ROWSTR); if (celltype! = null && Celltype.equals ("s")) {nextisstring = true;} else {nextisstring = false;}} Empty lastcontents = "";} public void EndElement (string uri, String localname, String naMe) throws Saxexception {//////The actual string to be stored according to the index value of the SST///This time the characters () method may be called multiple times if (nextisstring) {try {int idx = Integer.parseint (lastcontents); lastcontents = new Xssfrichtextstring (Sst.getentryat (IDX)). ToString (); catch (Exception e) {}}//v = = value of the cell, if the cell is a string then the value of the V tag is the index of the string in the SST//The cell contents are added to the Rowlist, before the string is removed before and after the white space character if ( Name.equals ("V")) {String value = Lastcontents.trim (); value = Value.equals ("")? " ": Value;int cols = curcol-precol;if (cols>1) {for (int i = 0;i < cols-1;i++) {Rowlist.add (Precol," ");}} Precol = Curcol;rowlist.add (curCol-1, value);} else {//If the label name is row, this means that it has reached the end of the line, call the Optrows () method if (Name.equals ("Row")) {int tmpcols = rowlist.size (); if (currow> This.titlerow && tmpcols<this.rowsize) {for (int i = 0;i < this.rowsize-tmpcols;i++) {Rowlist.add ( Rowlist.size (), "");}} try {optrows (sheetindex,currow,rowlist);} catch (SQLException e) {e.printstacktrace ();} if (currow==this.titlerow) {this.rowsize = Rowlist.size ();} Rowlist.clear (); currow++;curcol = 0;precol = 0;}}} Public void characters (char[] ch, int start, int length) throws Saxexception {//Gets the value of the cell contents lastcontents + = new String (ch, start, le Ngth);} To get the column index, the R property of each column C element is composed of a letter plus a number, the letter combination is a column index, and the number is a row index,//such as AB45, represented as (a-a+1) *26+ (b-a+1) *26 column, 45 lines public int Getrowindex ( String rowstr) {rowstr = Rowstr.replaceall ("[^a-z]", "" "); byte[] rowabc = Rowstr.getbytes (); int len = rowabc.length;float n Um = 0;for (int i=0;i<len;i++) {num + = (rowabc[i]-' A ' + 1) *math.pow (26,len-i-1);} return (int) num;} public int Gettitlerow () {return titlerow;} public void Settitlerow (int titlerow) {this.titlerow = Titlerow;}}

Java read operations Big data Excel

Related Article

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.