Use POI to convert an Excel file suffix. xls. xlsx files to a txt/csv text file
First, introduce the required jar packages:
<dependencies> 2 <dependency> 3 <groupId>net.sf.opencsv</groupId> 4 <artifactId>opencsv</artifactId> 5 <version>2.1</version> 6 </dependency> 7 <dependency> 8 <groupId>org.apache.poi</groupId> 9 <ARTIFACTID>OOXM L-schemas</artifactid>10 <version>1.1</version>11 <type>pom</type>12 </dependency>13 <dependency>14 <groupid>org.apache.poi</groupid>15 <artifactid>poi</artifactid>16 <version>3.7</version>17 </dependency>1 8 <dependency>19 <groupid>org.apache.poi</groupid>20 <ARTIFACTID>OOXM l-schemas</artifactid>21 <version>1.1</version>22 </dependency>23 <DEP Endency>24 <groupid>org.apache.poi</groupid>25 <artifactid>poi-ooxml</artifactid>26 &L t;version>3.7</version>27 </dependency>28 <dependency>29 <GROUPID>DOM4 J</groupid>30 <artifactid>dom4j</artifactid>31 <version>1.6.1</version> ; </dependency>33 </dependencies>
Full code
Import JXL. Cell;import JXL. Sheet;import JXL. Workbook;import JXL. Workbooksettings;import java.io.*;import java.util.locale;/** * @ClassName: Exceltocsv * @Description: Convert Excel into CSV format * @ Author:mashiwei */public class Exceltocsv {/** * convert Excel (XLS/XLSX) to CSV file * @param excelfile * @param csvfile * @return String */public static string Getcsv (String excelfile, String csvfile) {//.xlsx file suffix converted to CSV if (Excelfile.endswith (". xlsx")) {Xlsx2csv.trans (excelfile,csvfile); return csvfile; The//.xls file suffix is converted to csv else {try {outputstream OS = new FileOutputStream (The new file (CSV File)); OutputStreamWriter OSW = new OutputStreamWriter (OS, "UTF8"); BufferedWriter bw = new BufferedWriter (OSW); Load Excel file Workbooksettings ws = new workbooksettings (); Ws.setlocale (New Locale ("en", "en")); Workbook wk = Workbook.getworkbooK (New File (excelfile), WS); Obtain each page (sheets) for (int sheet = 0; sheet < wk.getnumberofsheets (); sheet++) from the workbook (workbook) { Sheet s = wk.getsheet (Sheet); cell[] row = null; From each page (sheet) Get each chunk (Cell) for (int i = 0; i < s.getrows (); i++) {row = S.get Row (i); if (Row.length > 0) {bw.write (row[0].getcontents ()); for (int j = 1; j < Row.length; J + +) {//write delimiter bw.write (' ,‘); Bw.write (Row[j].getcontents ()); }} bw.newline (); }} bw.flush (); Bw.close (); } catch (Exception e) {System.err.println (e.tostring ()); E.prIntstacktrace (); } return csvfile; } }}
Package Com.ys.penspark.steps.excelinput;import Org.apache.poi.openxml4j.exceptions.openxml4jexception;import Org.apache.poi.openxml4j.opc.opcpackage;import Org.apache.poi.openxml4j.opc.packageaccess;import Org.apache.poi.ss.usermodel.builtinformats;import Org.apache.poi.ss.usermodel.dataformatter;import Org.apache.poi.xssf.eventusermodel.readonlysharedstringstable;import Org.apache.poi.xssf.eventusermodel.xssfreader;import Org.apache.poi.xssf.model.stylestable;import Org.apache.poi.xssf.usermodel.xssfcellstyle;import Org.apache.poi.xssf.usermodel.xssfrichtextstring;import Org.xml.sax.*;import Org.xml.sax.helpers.defaulthandler;import javax.xml.parsers.ParserConfigurationException; Import Javax.xml.parsers.saxparser;import Javax.xml.parsers.saxparserfactory;import Java.io.IOException;import Java.io.inputstream;import Java.io.printstream;public class Xlsx2csv {/** * The type of the data value is indicate D by a attribute on the cell. The * value is usually in a "V" element WIThin the cell. */enum Xssfdatatype {BOOL, ERROR, FORMULA, Inlinestr, Sstindex, number,} class Myxssfsheethandler Exten DS DefaultHandler {/** * Table with styles */private stylestable stylestable; /** * Table with unique strings */private readonlysharedstringstable sharedstringstable; /** * Destination for Data */private final printstream output; /** * Number of columns to read starting with leftmost */private final int mincolumncount; Set when V start element is seen private Boolean visopen; Set when cell start element is seen; used when cell close element is seen. Private Xssfdatatype Nextdatatype; Used to format numeric cell values. Private short Formatindex; Private String formatString; Private final Dataformatter formatter; private int thiscolumn =-1; The last column printed to the output stream private int lastcolumnnumber =-1; Gathers characters as they is seen. private StringBuffer value; Public Myxssfsheethandler (stylestable styles, readonlysharedstringstable strings, int col s, PrintStream target) {this.stylestable = styles; This.sharedstringstable = strings; This.mincolumncount = cols; This.output = target; This.value = new StringBuffer (); This.nextdatatype = Xssfdatatype.number; This.formatter = new Dataformatter (); The public void startelement (string uri, String localname, string name, Attributes at Tributes) throws Saxexception {if ("Inlinestr". Equals (name) | | "V". Equals (name) {Visopen = true; Clear Contents Cache value.setlength (0); }//c = cell else if ("C". Equals (name)) {//Get the cell reference String r = Attributes.getv Alue ("R"); int firstdigit =-1; for (int c = 0; c < r.length (); ++c) {if (Character.isdigit (R.charat (c))) { Firstdigit = C; Break }} thiscolumn = Nametocolumn (r.substring (0, firstdigit)); Set up defaults. This.nextdatatype = Xssfdatatype.number; This.formatindex =-1; this.formatstring = null; String Celltype = Attributes.getvalue ("T"); String cellstylestr = Attributes.getvalue ("s"); if ("B". Equals (Celltype)) Nextdatatype = Xssfdatatype.bool; else if ("E". Equals (Celltype)) Nextdatatype = Xssfdatatype.error; else if ("Inlinestr". Equals (Celltype)) Nextdatatype = Xssfdatatype.inlinestr; else if ("s". Equals (Celltype)) Nextdatatype = Xssfdatatype.sstindex; else if ("str". Equals (Celltype)) Nextdatatype = Xssfdatatype.formula; else if (cellstylestr! = null) {//It ' s a number, but almost certainly one//with A special style or format int styleindex = Integer.parseint (CELLSTYLESTR); Xssfcellstyle style = Stylestable.getstyleat (Styleindex); This.formatindex = Style.getdataformat (); this.formatstring = Style.getdataformatstring (); if (this.formatstring = = null) this.formatstring = Builtinformats. G Etbuiltinformat (This.formatindex); }}} public void EndElement (string uri, String localname, string name) Throws Saxexception {String thisstr = null; v = contents of a cell if ("V". Equals (name)) {//Process the value contents as required. Do today, as characters () May is called more than once switch (Nextdatatype) { Case Bool:char first = Value.charat (0); Thisstr = First = = ' 0 '? "FALSE": "TRUE"; Break Case error://thisstr = "\" ERROR: "+ value.tostring () + '" '; Thisstr = "\" ERROR: "+ value.tostring () + '" '; Break Case FORMULA://A FORMULA could result in A string value,//So always add Double-quote characters.//thisstr = ' "' + value.tostring () + '"; Thisstr = Value.tostring (); Break Case INLINESTR:///Todo:have Seen a example of this, so it ' s untested. xssfrichtextstring Rtsi = new Xssfrichtextstring (value. toString ());// Thisstr = ' "' + rtsi.tostring () + '"; Thisstr = Rtsi.tostring (); Break Case Sstindex:string Sstindex = value.tostring (); try {int idx = Integer.parseint (Sstindex); Xssfrichtextstring rtss = new Xssfrichtextstring (Sharedstringstable.getentryat (IDX)); /thisstr = ' "' + rtss.tostring () + '"; Thisstr = Rtss.tostring (); } catch (NumberFormatException ex) {output.println ("Failed to parse SST index '" + Sstindex + "':" + ex.tostring ()); } break; Case number:string n = value.tostring (); if (this.formatstring! = null) Thisstr = formatter.formatrawcellcontents (Double . parsedouble (n), This.formatindex, this.formatstring); else Thisstr = n; Break default://thisstr = "(todo:unexpected type:" + Nextdatatype + ")"; Thisstr = "(todo:unexpected type:" + Nextdatatype + ")"; Break }//Output After we ' ve seen the string contents//Emit commas for any fields that were mis Sing on the This row if (Lastcolumnnumber = =-1) {Lastcolumnnumber = 0; } for (int i = lastcolumnnumber; i < thiscolumn; ++i) output.print (', '); Might be the empty string. Output.print (THISSTR); Update column if (Thiscolumn >-1) lastcolumnnumber = Thiscolumn; } else if ("Row". Equals (name)) {//Print out any missing commas if needed if (Mincolumns &G T 0) {//Columns is 0 based if (lastcolumnnumber = =-1) {Las Tcolumnnumber = 0; } for (int i = Lastcolumnnumber; I < (this.mincolumncount); i++) {Output.pri NT (', '); }}//We ' re onto a new row output.println (); Lastcolumnnumber =-1; }} public void characters (char[] ch, int start, int length) Throws Saxexception {if (visopen) value.append (CH, start, length); } private int Nametocolumn (String name) {int column =-1; for (int i = 0; i < name.length (); ++i) {int c = Name.charat (i); Column = (column + 1) * + + C-' A '; } return column; }}///////////////////////////////////////private opcpackage xlsxpackage; private int mincolumns; Private PrintStream output; Private final String Output_charset = "GBK"; /** * Creates a new xlsx, CSV Converter * @param pkg * The XLSX package to process * @param output * The PrintStream to output the CSV to * @param mincolumns * The minimum Number of columns to output, or-1 for no minimum */public xlsx2csv (opcpackage pkg, printstream output, int mincol Umns) {this.xlsxpackage = pkg; This.ouTput = output; This.mincolumns = Mincolumns; }//todo Catch Exceptions public xlsx2csv (String Inputfilepath, String outputfilepath) throws Exception {xls Xpackage = Opcpackage.open (Inputfilepath, Packageaccess.read); Output = new PrintStream (Outputfilepath, Output_charset); Mincolumns =-1; }/** * Parses and shows the content of one sheet using the specified styles and * shared-strings tables. * * @param styles * @param strings * @param sheetinputstream */public void Processsheet (Stylestable St Yles, readonlysharedstringstable strings, InputStream sheetinputstream) throws Ioex Ception, Parserconfigurationexception, saxexception {inputsource sheetsource = new InputSource (sheetInputStream); SAXParserFactory saxfactory = Saxparserfactory.newinstance (); SAXParser saxparser = Saxfactory.newsaxparser (); XMLReader Sheetparser = Saxparser.getxmlreader (); ContentHandler handler = new Myxssfsheethandler (styles, strings, this.mincolumns, this.output); Sheetparser.setcontenthandler (handler); Sheetparser.parse (Sheetsource); } public void Process () throws IOException, Openxml4jexception, Parserconfigurationexception, saxexception { Readonlysharedstringstable strings = new readonlysharedstringstable (this.xlsxpackage); Xssfreader Xssfreader = new Xssfreader (this.xlsxpackage); Stylestable styles = xssfreader.getstylestable (); Xssfreader.sheetiterator iter = (xssfreader.sheetiterator) xssfreader. Getsheetsdata (); int index = 0; while (Iter.hasnext ()) {InputStream stream = Iter.next (); String sheetname = Iter.getsheetname ();//This.output.println ();//This.output.println (SheetName + " [index= "+ Index +"]: "); Processsheet (styles, strings, stream); Stream.Close (); ++index; }} public static void trans (String fileinput,string fileOutput) {xlsx2csv xlsx2csv = null; try {xlsx2csv = new Xlsx2csv (fileInput, fileOutput); } catch (Exception e) {e.printstacktrace (); } try {xlsx2csv.process (); } catch (IOException e) {e.printstacktrace (); } catch (Openxml4jexception e) {e.printstacktrace (); } catch (Parserconfigurationexception e) {e.printstacktrace (); } catch (Saxexception e) {e.printstacktrace (); } }}
Call
Exceltocsv.getcsv ("Input", "output");
A way to convert Excel xls and xlsx files into CSV files using Java, xls2csv, xlsx2csv