Use java to split and merge large files

Source: Internet
Author: User

[Java package com. test; import java. io. bufferedReader; import java. io. bufferedWriter; import java. io. fileNotFoundException; import java. io. fileReader; import java. io. fileWriter; import java. io. IOException; import java. util. collections; import java. util. iterator; import java. util. using list; import java. util. random; public class LargeMappedFiles {/*** big data sorting and merging ** @ param args */public static void Main (String [] args) throws IOException {// file writing path String filePath = "D: \ 456"; // file splitting path String sqlitFilePath = "D: \ 456 \ 123 "; // Data Count int CountNumbers = 10000000; // The number of subfiles int CountFile = 10; // The precision int countAccuracy = 30 * CountFile; long startNumber = System. currentTimeMillis (); // write the big data file WriteData (filePath, CountNumbers); System. out. println ("Storage completed"); // split the big data file into ten other small files: sqlitFileDate (filePath, sqlitF IlePath, CountFile); System. out. println ("the file has been cut! "); // Sort the data of each file. singleFileDataSort (sqlitFilePath, CountFile); System. out. println (" sorting of each sub-file is complete! "); // Precision adjustment, comparison and integration of 10 file data deathDataFile (filePath, sqlitFilePath, countAccuracy, CountFile); System. out. println ("completed integration"); long stopNumber = System. currentTimeMillis (); System. out. println ("Time consumed" + (stopNumber-startNumber)/1000 + "millisecond");} // write the big data file public static void WriteData (String path, int CountNumbers) throws IOException {path = path + "\ 12114.txt"; FileWriter fs = new FileWriter (path); BufferedWriter fw = new BufferedWriter (fs); for (int I = 0; I <CountNumbers; I ++) {fw. write (new Random (). nextInt (Integer. MAX_VALUE) + "\ r \ n");} fw. close (); fs. close () ;}// split the big data file into ten other small files: public static void sqlitFileDate (String filepath, String sqlitPath, int CountFile) throws IOException {FileWriter fs = null; BufferedWriter fw = null; FileReader fr = new FileReader (filepath + "\ 12114.txt"); BufferedReader br = new BufferedReader (fr ); // read and obtain the entire row of Data int I = 1; initialize list WriterLists = new writable list (); // initialize the object set of the file stream into list fwLists = new writable list (); for (int j = 1; j <= CountFile; j ++) {// declare object fs = new FileWriter (sqlitPath + "\ 12" + j + ". txt ", false); fw = new BufferedWriter (fs); // loads the object into the set WriterLists. add (fs); fwLists. add (fw);} // checks whether there is data in the file stream and returns while (br. ready () {int count = 1; // initialize the first file stream for (Iterator iterator = fwLists. iterator (); iterator. hasNext ();) {BufferedWriter type = (BufferedWriter) iterator. next (); if (I = count) // judge the first file stream to write data {// write data, jump out and start the next file stream, the write type of the next data. write (br. readLine () + "\ r \ n"); break;} count ++;} // determines whether the last file has been streamed if (I> = CountFile) {I = 1;} else I ++;} br. close (); fr. close (); for (Iterator iterator = fwLists. iterator (); iterator. hasNext ();) {BufferedWriter object = (BufferedWriter) iterator. next (); object. close () ;}// traverse and close all sub-file streams for (Iterator iterator = WriterLists. iterator (); iterator. hasNext ();) {FileWriter object = (FileWriter) iterator. next (); object. close () ;}// sort the data of each file. public static void singleFileDataSort (String path1, int CountFile) throws IOException {shortlist nums = null; for (int I = 1; I <= CountFile; I ++) {nums = new counter list (); String path = path1 + "\ 12" + I + ". txt "; try {FileReader fr = new FileReader (path); BufferedReader br = new BufferedReader (fr); while (br. ready () {// Add a single read data to the set nums. add (Integer. parseInt (br. readLine ();} // sort Collections. sort (nums); // write sorted data to the source file numberSort (nums, path); br. close (); fr. close ();} catch (NumberFormatException e) {e. printStackTrace ();} catch (FileNotFoundException e) {e. printStackTrace ();} catch (IOException e) {e. printStackTrace () ;}}// sorts the data of each file, and then writes the public static void numberSort (sorted list, String path) to the source file) {try {FileWriter fs = new FileWriter (path); BufferedWriter fw = new BufferedWriter (fs); for (Iterator iterator = list. iterator (); iterator. hasNext ();) {Object object = (Object) iterator. next (); fw. write (object + "\ r \ n");} fw. close (); fs. close ();} catch (IOException e) {e. printStackTrace () ;}// final integration of file data (precision adjustment) public static void deathDataFile (String filepath, String sqlitFilePath1, int countAccuracy, int CountFile) throws IOException {shortlist nums = new shortlist (); // Add data and sort Object temp = null; // record the last digit boolean ispass = false in each sort; optional list ispasses = null; // record the status information of the data file FileWriter fs = new FileWriter (filepath + "\ Sort.txt", false); // create a file stream, so that the integrated data can be written into BufferedWriter bw = new BufferedWriter (fs); FileReader fr = null; // declare to read the file stream BufferedReader br = null; // declare BufferedReader writable list WriterLists = new writable list (); // initialize the file stream object set writable list WriterListFile = new writable list (); for (int j = 1; j <= CountFile; j ++) {// declare object, enable all sub-file streams to access data of all sub-files. fr = new FileReader (sqlitFilePath1 + "\ 12" + j + ". txt "); // enable all BufferedReader to facilitate the next reading of the entire row br = new BufferedReader (fr); // load all FileReader objects into the set WriterListFile. add (fr); // load all BufferedReader objects to the set WriterLists. add (br) ;}for (;) {// Save the data of the ten source files to the set, so that the ispasses = new signature list () can be determined later (); // read the individual data of the 10 source files respectively for (Iterator iterator = WriterLists. iterator (); iterator. hasNext ();) {BufferedReader object = (BufferedReader) iterator. next (); Object obj = null; while (object. ready () {// Add nums for each data stream of all files. add (Integer. parseInt (object. readLine (). toString (); break;} if (object. ready () = false) ispasses. add ("true"); // Save the data status of each file to the Collection} // determines whether it is the first time in if (nums. size () % countAccuracy = 0 & ispass = false) {// sort Collections. sort (nums); // receives the largest data. Other data is written to the total sorting file temp = numberSortData (nums, filepath, false, countAccuracy, bw ); // reinitialize the set nums = new writable list (); // Add the previous set of nums that compare the remaining data. add (temp); ispass = true; // record the number of data in the source file for next traversal of continue;} if (ispass) {if (nums. size () % countAccuracy = 1 & amp; nums. size ()> 1) {// sort Collections. sort (nums); // receives the largest data. Other data is written to the total sorting file temp = numberSortData (nums, filepath, true, countAccuracy, bw ); nums = new vertex list (); nums. add (temp); continue ;}/// record the location of the next set of data // determine if there are no data for ten files if (ispasses. size () = CountFile) {Collections. sort (nums); temp = numberSortData (nums, filepath, true, countAccuracy, bw); nums = new sort list (); break;} bw. close (); // close the write stream fs. close (); // close all BufferedReader for (Iterator iterator = WriterLists. iterator (); iterator. hasNext ();) {BufferedReader object2 = (BufferedReader) iterator. next (); object2.close () ;}// close all FileReader for (Iterator iterator = WriterListFile. iterator (); iterator. hasNext ();) {FileReader object = (FileReader) iterator. next (); object. close () ;}// sort the data and write it to the final file (precision adjustment) public static Object numberSortData (precise list, String filePath, boolean ispass, int countAccuracy, bufferedWriter fs) {Object temp = 0; // record the last value int tempCount = 0; // record the written data location try {for (Iterator iterator = list. iterator (); iterator. hasNext ();) {Object object = (Object) iterator. next (); // determine whether it is the last number if (tempCount = list. size ()-1) {// judge if (list. size () <countAccuracy + 1 & ispass) {temp = null;} else {temp = object; break;} www.2cto.com} // write data source fs. write (object + "\ r \ n"); // The subscript tempCount ++;} catch (IOException e) {e. printStackTrace ();} return temp ;}}

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.