The program _java of fast slicing files with Java multithreading

Source: Internet
Author: User
Tags file separator getmessage readline log4j

Some time before the need for a large number of data import, the DBA to provide a CVS file, but each CVS file is several GB size, directly to the load, the database is very slow and the problem of low memory, in order to achieve this function, write a fast segmentation file program.

Import Org.apache.log4j.LogManager;
 
Import Org.apache.log4j.Logger;
Import java.io.*;
Import java.util.*;
 
Import java.util.concurrent.*;
  public class Filesplitutil {private final static Logger log = Logmanager.getlogger (Filesplitutil.class); Private static final Long originfilesize = 1024 * 1024 * 100;//100M private static final int blockfilesize = 1024 * 102
  4 * 64;//to prevent Chinese garbled, must take 2 of the n-th/** * CVS File separator * * * private static final char cvsseparator = ' ^ ';
    public static void Main (String args[]) {Long start = System.currenttimemillis ();
      try {String fileName = ' d:\\csvtest\\aa.csv ';
      File SourceFile = new file (fileName);
        if (Sourcefile.length () >= originfilesize) {String cvsfilename = Filename.replaceall ("\\\\", "/");
        Filesplitutil filesplitutil = new Filesplitutil ();
        List<string> parts=filesplitutil.splitbysize (Cvsfilename, blockfilesize); for (String part:parts) {System.out.println ("PartName IS: "+part"); 
    SYSTEM.OUT.PRINTLN ("Total file Length" +sourcefile.length () + ", split file time consuming:" + (System.currenttimemillis ()-start) + "Ms.");
    }catch (Exception e) {log.info (E.getstacktrace ());
   /** * Split File * * @param filename the full filename to be split * @param bytesize number of bytes split * @return Split file name list * * Public list<string> splitbysize (String fileName, int bytesize) throws IOException, Interruptedexception
    {list<string> parts = new arraylist<string> ();
    File File = new file (fileName);
    int count = (int) Math.ceil (File.length ()/(double) bytesize);
    int Countlen = (count + ""). Length ();
    Randomaccessfile RAF = new Randomaccessfile (FileName, "R");
    Long Totallen = Raf.length ();
 
    Countdownlatch latch = new Countdownlatch (count);
          for (int i = 0; i < count; i++) {String partfilename = File.getpath () + "."
      + Leftpad ((i + 1) + "", Countlen, ' 0 ') + ". CVs";
      int readsize=bytesize; LOng startpos= (Long) i * bytesize;
      Long nextpos= (Long) (i+1) * bytesize;
      if (Nextpos>totallen) {readsize= (int) (TOTALLEN-STARTPOS);
      New Splitrunnable (ReadSize, startpos, partfilename, file, latch). Run ();
    Parts.add (Partfilename);
    Latch.await ()//Waiting for all files to be written///due to cutting may cause the line to be cut off, processing all the split files, merging the rows mergerow (parts);
  return parts; /** * Split Processing Runnable * * @author Supeidong/Private class Splitrunnable implements Runnable {int
    ByteSize;
    String Partfilename;
    File Originfile;
    Long startpos;
    Countdownlatch latch; Public splitrunnable (int bytesize, long startpos, String partfilename, File originfile, Countdownlatch LATC
      h) {this.startpos = startpos;
      This.bytesize = ByteSize;
      This.partfilename = Partfilename;
      This.originfile = Originfile;
    This.latch = latch;
      public void Run () {Randomaccessfile rfile;
      OutputStream OS;
  try {      Rfile = new Randomaccessfile (Originfile, "R");
        Byte[] B = new Byte[bytesize];
        Rfile.seek (startpos)//move the pointer to the beginning of each "segment" int s = rfile.read (b);
        OS = new FileOutputStream (partfilename);
        Os.write (b, 0, s);
        Os.flush ();
        Os.close ();
      Latch.countdown ();
        catch (IOException e) {log.error (E.getmessage ());
      Latch.countdown (); /** * Merge Cut line * * @param parts * * private void Mergerow (list<string> parts) {List
    <PartFile> partfiles = new arraylist<partfile> ();
        try {//Assemble the Shard Table object for (int i=0;i<parts.size (); i++) {String partfilename=parts.get (i);
        File Splitfiletemp = new file (partfilename);
          if (splitfiletemp.exists ()) {Partfile partfile = new Partfile ();
          BufferedReader reader=new BufferedReader (New InputStreamReader (New FileInputStream (splitfiletemp), "GBK")); String firstrow = Reader.readLine ();
          String Secondrow = Reader.readline ();
          String Endrow = Readlastline (partfilename);
          Partfile.setpartfilename (Partfilename);
          Partfile.setfirstrow (FirstRow);
          Partfile.setendrow (Endrow);
            if (i>=1) {String prepartfile=parts.get (i-1);
            String Preendrow = Readlastline (prepartfile);
          Partfile.setfirstisfull (GetCharCount (Firstrow+preendrow) >getcharcount (Secondrow));
          } partfiles.add (Partfile);
        Reader.close (); The Write for (int i = 0; i < partfiles.size ()-1; i++) {Partfile partfile = Partfi
        Les.get (i);
        Partfile Partfilenext = partfiles.get (i + 1);
        StringBuilder sb = new StringBuilder ();
          if (Partfilenext.getfirstisfull ()) {sb.append ("\ r \ n");
        Sb.append (Partfilenext.getfirstrow ());
        else {sb.append (Partfilenext.getfirstrow ()); } WritelAstline (Partfile.getpartfilename (), sb.tostring ());
    The catch (Exception e) {log.error (E.getmessage ());
    }/** * Gets the number of occurrences of a character * @param s * @return/private int getcharcount (String s) {int count = 0;
      for (int i = 0; i < s.length (); i++) {if (S.charat (i) = = Cvsseparator) {count++;
  } return count; /** * Read the number of file rows bufferedinputstream * * @param filename * @return/public int getfilerow (String f
    Ilename) throws IOException {InputStream is = new Bufferedinputstream (new FileInputStream (filename));
    Byte[] C = new byte[1024];
    int count = 0;
    int readChars = 0;
          while ((ReadChars = Is.read (c))!=-1) {for (int i = 0; i < ReadChars; ++i) {if (c[i] = = ' \ n ')
      ++count;
    } is.close ();
  return count; /** * Read the last line of data * @param filename * @return * @throws ioexception/private String Readlastline (Str ing FIlename) throws IOException {//Use Randomaccessfile to find the last row of data from behind Randomaccessfile = new RAF (Randomaccessfile
    Me, "R");
    Long len = Raf.length ();
    String lastline = "";
      if (len!=0l) {Long pos = len-1;
        while (pos > 0) {pos--;
        Raf.seek (POS);
          if (raf.readbyte () = = ' \ n ') {lastline = Raf.readline ();
          Lastline=new String (lastline.getbytes ("8859_1"), "GBK");
        Break
    }} raf.close ();
  return lastline; /** * Modify last line of data * @param fileName * @param laststring * @return * @throws ioexception/Private VO ID writelastline (String filename,string laststring) {try {//Open a random Access file stream, Randomaccessfile by read-write Randomfil
      E = new Randomaccessfile (fileName, "RW");
      File length, byte number long filelength = Randomfile.length ();
      Moves the write file pointer to the end of the file.
      Randomfile.seek (filelength); You must add GBK here, otherwise there will be write garbled randomfile.write (laststring.getbytes ("GBK"));
    Randomfile.close ();
    catch (IOException e) {log.error (E.getmessage ()); }/** * Left padding * * @param str * @param length * @param ch * @return/public static String
    Pad (String str, int length, char ch) {if (str.length () >= length) {return str;
    } char[] CHS = new Char[length];
    Arrays.fill (CHS, CH);
    char[] src = str.tochararray ();
    System.arraycopy (SRC, 0, CHS, Length-src.length, src.length);
  return new String (CHS);
    /** * Merge File Line internal class */class Partfile {private String partfilename;
    Private String FirstRow;
    Private String Endrow;
 
    Private Boolean firstisfull;
    Public String Getpartfilename () {return partfilename;
    } public void Setpartfilename (String partfilename) {this.partfilename = Partfilename;
    Public String Getfirstrow () {return firstrow; } public void Setfirstrow (String firstrow) {This.firstrow = firsTrow;
    Public String Getendrow () {return endrow;
    } public void Setendrow (String endrow) {this.endrow = Endrow;
    public Boolean getfirstisfull () {return firstisfull;
    } public void Setfirstisfull (Boolean firstisfull) {this.firstisfull = Firstisfull; }
  }
 
}

The above is the entire content of this article, I hope to learn Java program to help you.

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.