Before parsing a csv file, let's take a look at what is the csv file and the format of the csv file.
A csv (Comma Separate Values) file is a Comma-separated file. It is a text file that can be opened directly in text and separated by commas. Windows is opened in excel by default. Its format includes the following items (it is best to see how excel is parsed .) :
① Each record occupies one row;
② Use commas as the separator;
③ Spaces before and after Commas are ignored;
④ The field contains a comma, which must be enclosed in double quotation marks;
⑤ The field contains a line break, which must be enclosed in double quotation marks;
6. The field contains spaces before and after it is enclosed in double quotation marks;
7. Double quotation marks in the field are represented by two double quotation marks;
If the quotation mark field contains double quotation marks, this field must be enclosed in double quotation marks;
The first record of lifecycle, which can be a field name;
The comma and double quotation marks mentioned above are both halfwidth characters.
The following uses regular expressions and java to parse csv files.
First, we provide a regular expression that matches the smallest unit of data in a csv file (for example, 1, 2, 3 is a row of data in the csv file, then 1 is a smallest unit of data in the csv file ):
"([^", \ N] * [, \ n]) * ([^ ", \ n] *" {2}) * [^ ", \ n] * "[] *, [] * | [^", \ n] * [] *, [] * | "([^ ", \ n] * [, \ n]) * ([^ ", \ n] *" {2}) * [^ ", \ n] * "[] * | [^", \ n] * [] *
The following is the java code for parsing the file:
Package myutil;
Import java. io. BufferedReader;
Import java. io. BufferedWriter;
Import java. io. File;
Import java. io. FileNotFoundException;
Import java. io. FileReader;
Import java. io. FileWriter;
Import java. io. IOException;
Import java. util. ArrayList;
Import java. util. List;
Import java. util. logging. Level;
Import java. util. logging. Logger;
Import java. util. regex. Matcher;
Import java. util. regex. Pattern;
/**
* @ Author panhf2003
* @ Version 2008/09/05,
*/
Public class CsvFileUtil {
/**
* Construct and disable instantiation
*/
Private CsvFileUtil (){
}
Public static void main (String [] args ){
// Test
Try {
ReadCsvFile ("e: \ test1.csv ");
} Catch (FileNotFoundException ex ){
Logger. getLogger (CsvFileUtil. class. getName (). log (Level. SEVERE, null, ex );
} Catch (IOException ex ){
Logger. getLogger (CsvFileUtil. class. getName (). log (Level. SEVERE, null, ex );
}
}
/**
* Csv file reading <BR/>
* Read the csv file data whose absolute path is argPath and return the data in List.
*
* @ Param argPath: absolute path of the csv file
* @ Return csv file data (List <String []>)
* @ Throws FileNotFoundException
* @ Throws IOException
*/
Public static List readCsvFile (String argPath) throws FileNotFoundException, IOException {
CsvFileUtil util = new CsvFileUtil ();
File cvsFile = new File (argPath );
List list = new ArrayList ();
FileReader fileReader = null;
BufferedReader bufferedReader = null;
Try {
FileReader = new FileReader (cvsFile );
BufferedReader = new BufferedReader (fileReader );
String regExp = util. getRegExp ();
// Test
System. out. println (regExp );
String strLine = "";
String str = "";
While (strLine = bufferedReader. readLine ())! = Null ){
Pattern pattern = Pattern. compile (regExp );
Matcher matcher = pattern. matcher (strLine );
List listTemp = new ArrayList ();
While (matcher. find ()){
Str = matcher. group ();
Str = str. trim ();
If (str. endsWith (",")){
Str = str. substring (0, str. length ()-1 );
Str = str. trim ();
}
If (str. startsWith ("\" ") & str. endsWith ("\"")){
Str = str. substring (1, str. length ()-1 );
If (util. isExisted ("\" \ "", str )){
Str = str. replaceAll ("\"\"","\"");
}
}
If (! "". Equals (str )){
// Test
System. out. print (str + "");
ListTemp. add (str );
}
}
// Test
System. out. println ();
List. add (String []) listTemp. toArray (new String [listTemp. size ()]);
}
} Catch (FileNotFoundException e ){
Throw e;
} Catch (IOException e ){
Throw e;
} Finally {
Try {
If (bufferedReader! = Null ){
BufferedReader. close ();
}
If (fileReader! = Null ){
FileReader. close ();
}
} Catch (IOException e ){
Throw e;
}
}
Return list;
}
/**
* Csv files <BR/>
* Write argList to the argFileName file in the argPath.
*
* @ Param argList the data to be written into the csv file (List <String []>)
* @ Param argPath csv file path
* @ Param argFileName csv file name
* @ Param isNewFile: Indicates whether to overwrite the original file.
* @ Throws IOException
* @ Throws Exception
*/
Public static void writeCsvFile (List argList, String argPath, String argFileName, boolean isNewFile)
Throws IOException, Exception {
CsvFileUtil util = new CsvFileUtil ();
// Data check
If (argList = null | argList. size () = 0 ){
Throw new Exception ("no data ");
}
For (int I = 0; I <argList. size (); I ++ ){
If (! (ArgList. get (I) instanceof String []) {
Throw new Exception ("incorrect data format ");
}
}
FileWriter fileWriter = null;
BufferedWriter bufferedWriter = null;
String strFullFileName = argPath;
If (strFullFileName. lastIndexOf ("\") = (strFullFileName. length ()-1 )){
StrFullFileName + = argFileName;
} Else {
StrFullFileName + = "\" + argFileName;
}
File file = new File (strFullFileName );
// Check the file path
If (! File. getParentFile (). exists ()){
File. getParentFile (). mkdirs ();
}
Try {
If (isNewFile ){
// Overwrite the original file
FileWriter = new FileWriter (file );
} Else {
// Append data to the original file
FileWriter = new FileWriter (file, true );
}
BufferedWriter = new BufferedWriter (fileWriter );
For (int I = 0; I <argList. size (); I ++ ){
String [] strTemp = (String []) argList. get (I );
For (int j = 0; j <strTemp. length; j ++ ){
If (util. isExisted ("\" ", strTemp [j]) {
StrTemp [j] = strTemp [j]. replaceAll ("\"","\"\"");
BufferedWriter. write ("\" "+ strTemp [j] + "\"");
} Else if (util. isExisted (",", strTemp [j])
| Util. isExisted ("\ n", strTemp [j])
| Util. isExisted ("", strTemp [j])
| Util. isExisted ("��", strTemp [j]) {
BufferedWriter. write ("\" "+ strTemp [j] + "\"");
} Else {
BufferedWriter. write (strTemp [j]);
}
If (j <strTemp. length-1 ){
BufferedWriter. write (",");
}
}
BufferedWriter. newLine ();
}
} Catch (IOException e ){
E. printStackTrace ();
} Finally {
Try {
If (bufferedWriter! = Null ){
BufferedWriter. close ();
}
If (fileWriter! = Null ){
FileWriter. close ();
}
} Catch (IOException e ){
Throw e;
}
}
}
/**
* @ Param argChar
* @ Param argStr
* @ Return
*/
Private boolean isExisted (String argChar, String argStr ){
Boolean blnReturnValue = false;
If (argStr. indexOf (argChar)> = 0)
& (ArgStr. indexOf (argChar) <= argStr. length ())){
BlnReturnValue = true;
}
Return blnReturnValue;
}
/**
* Regular expression.
* @ Return matches the regular expression of the smallest unit in the csv file.
*/
Private String getRegExp (){
String strRegExp = "";
StrRegExp =
"\" ("+ SPECIAL_CHAR_A +" * [, \ n]) * ("+ SPECIAL_CHAR_A +" * \ "{2 })*) * "+ SPECIAL_CHAR_A +" * \ "[] *, [] *"
+ "|" + SPECIAL_CHAR_ B + "* [] *, [] *"
+ "| \" ("+ SPECIAL_CHAR_A +" * [, \ n]) * ("+ SPECIAL_CHAR_A +" * \ "{2 })*) * "+ SPECIAL_CHAR_A +" * \ "[] *"
+ "|" + SPECIAL_CHAR_ B + "* [] *";
Return strRegExp;
}
Private static final String SPECIAL_CHAR_A = "[^ \", \ n] ";
Private static final String SPECIAL_CHAR_ B = "[^ \", \ n] ";
}
This article from the CSDN blog, reproduced please indicate the source: http://blog.csdn.net/panhf2003/archive/2008/09/16/2937853.aspx