Java: writing, reading, splitting, sorting, and merging big data files

Source: Internet
Author: User

Package arrays. file;

Import java. Io. bufferedreader;
Import java. Io. bufferedwriter;
Import java. Io. filenotfoundexception;
Import java. Io. filereader;
Import java. Io. filewriter;
Import java. Io. ioexception;
Import java. util. collections;
Import java. util. iterator;
Import java. util. Collections list;
Import java. util. Random;

Public class concludecombinationsortwrite {
/**
* Sort and merge Big Data
*
* @ Param ARGs
*/
Public static void main (string [] ARGs) throws ioexception {
// Path of the file to be written
String filepath = "D: // 456 ";
// Path of the splitting File
String sqlitfilepath = "D: // 456 /// 123 ";

// Number of data
Int countnumbers = 10000000;

// Number of subfiles
Int countfile = 10;

// Accuracy
Int countaccuracy = 30 * countfile;

Long startnumber = system. currenttimemillis ();
// Write a big data file
Writedata (filepath, countnumbers );
System. Out. println ("Storage completed ");

// Split the big data file into ten other small files
Sqlitfiledate (filepath, sqlitfilepath, countfile );
System. Out. println ("the file has been cut! ");

// Sort the data of each file
Singlefiledatasort (sqlitfilepath, countfile );
System. Out. println ("sorting of each sub-file is complete! ");

// Precision adjustment, comparison and integration of 10 file data
Deathdatafile (filepath, sqlitfilepath, countaccuracy, countfile );
System. Out. println ("integrated ");
Long stopnumber = system. currenttimemillis ();
System. Out. println ("Time consumed" + (stopnumber-startnumber)/1000 + "millisecond ");
}

// Write a big data file
Public static void writedata (string path, int countnumbers) throws ioexception {
Path = path + "// 12114.txt ";
Filewriter FS = new filewriter (PATH );
Bufferedwriter fw = new bufferedwriter (FS );
For (INT I = 0; I <countnumbers; I ++ ){
FW. Write (new random (). nextint (integer. max_value) + "/R/N ");
}
FW. Close ();
FS. Close ();

}

// Split the big data file into ten other small files
Public static void sqlitfiledate (string filepath, string sqlitpath,
Int countfile) throws ioexception {
Filewriter FS = NULL;
Bufferedwriter fw = NULL;
Filereader Fr = new filereader (filepath + "// 12114.txt ");
Bufferedreader BR = new bufferedreader (FR); // read and obtain the entire row of data

Int I = 1;
Jsonlist writerlists = new jsonlist (); // initializes a collection of file stream objects
Listlist fwlists = new listlist ();
For (Int J = 1; j <= countfile; j ++ ){

// Declare the object
FS = new filewriter (sqlitpath + "// 12" + J + ". txt", false );
FW = new bufferedwriter (FS );


// Load the object into a set
Writerlists. Add (FS );
Fwlists. Add (FW );
}
// Determine whether data is returned in the file stream.
While (Br. Ready ()){

Int COUNT = 1; // initialize the first file stream
For (iterator = fwlists. iterator (); iterator. hasnext ();){
Bufferedwriter type = (bufferedwriter) iterator. Next ();
If (I = count) // determines the first file stream to write data.
{
// Write data, jump out, and write the next file stream and next data
Type. Write (Br. Readline () + "/R/N ");
Break;
}
Count ++;
}
// Determine whether the last file stream has arrived
If (I> = countfile ){
I = 1;
} Else
I ++;
}
BR. Close ();
Fr. Close ();
For (iterator = fwlists. iterator (); iterator. hasnext ();){
Bufferedwriter object = (bufferedwriter) iterator. Next ();
Object. Close ();
}
// Print and close all sub-file streams
For (iterator = writerlists. iterator (); iterator. hasnext ();){
Filewriter object = (filewriter) iterator. Next ();
Object. Close ();
}
}

// Sort the data of each file
Public static void singlefiledatasort (string path1, int countfile) throws ioexception {
Required list Nums = NULL;
For (INT I = 1; I <= countfile; I ++ ){
Nums = new vertex list ();
String Path = path1 + "// 12" + I + ". txt ";
Try {
Filereader Fr = new filereader (PATH );
Bufferedreader BR = new bufferedreader (FR );
While (Br. Ready ()){
// Add a single read data to the set
Nums. Add (integer. parseint (Br. Readline ()));
}
// Sort the set
Collections. Sort (Nums );

// Write sorted data to the source file
Numbersort (Nums, PATH );
BR. Close ();
Fr. Close ();
} Catch (numberformatexception e ){
E. printstacktrace ();
} Catch (filenotfoundexception e ){
E. printstacktrace ();
} Catch (ioexception e ){
E. printstacktrace ();
}
}
}

// Sort the data of each file before writing the source file
Public static void numbersort (shortlist list, string path ){
Try {
Filewriter FS = new filewriter (PATH );
Bufferedwriter fw = new bufferedwriter (FS );
For (iterator = List. iterator (); iterator. hasnext ();){
Object object = (object) iterator. Next ();
FW. Write (Object + "/R/N ");
}
FW. Close ();
FS. Close ();
} Catch (ioexception e ){
E. printstacktrace ();
}
}

// Final integration of file data (precision adjustment)
Public static void deathdatafile (string filepath, string sqlitfilepath1,
Int countaccuracy, int countfile) throws ioexception {
Sorted list Nums = new sorted list (); // Add and sort data
Object temp = NULL; // record the last number after each sorting
Boolean ispass = false;
Optional list ispasses = NULL; // records the status information of the data file
Filewriter FS = new filewriter (filepath + "// sort.txt", false); // create a file stream for Integrated Data Writing
Bufferedwriter BW = new bufferedwriter (FS );
Filereader Fr = NULL; // declare to read the file stream
Bufferedreader BR = NULL; // declare bufferedreader
Jsonlist writerlists = new jsonlist (); // initializes a collection of file stream objects
Required list writerlistfile = new required list ();
For (Int J = 1; j <= countfile; j ++ ){

// Declare object, enable all sub-file streams to access data of all sub-Files
Fr = new filereader (sqlitfilepath1 + "// 12" + J + ". txt ");

// Enable all bufferedreader to facilitate the next full row read
BR = new bufferedreader (FR );

// Load all filereader objects into a set
Writerlistfile. Add (FR );

// Load all bufferedreader objects into a set
Writerlists. Add (BR );
}

For (;;){

// Store the data of the ten source files to the set for later judgment.
Ispasses = new sort list ();

// Read the data of each of the 10 source files.
For (iterator = writerlists. iterator (); iterator. hasnext ();){
Bufferedreader object = (bufferedreader) iterator. Next ();
Object OBJ = NULL;
While (object. Ready ()){
// Add the data of all file streams each time
Nums. Add (integer. parseint (object. Readline (). tostring ()));
Break;
}
If (object. Ready () = false)
Ispasses. Add ("true"); // Save the data status of each file to the Collection
}

// Determine whether it is the first time you come in
If (nums. Size () % countaccuracy = 0 & ispass = false ){

// Sort the set
Collections. Sort (Nums );

// Receives the largest data, and other data is written to the total sorting file.
Temp = numbersortdata (Nums, filepath, false, countaccuracy, BW );

// Reinitialize the set
Nums = new vertex list ();

// Add the previous group to compare the remaining data
Nums. Add (temp );
Ispass = true;

// Record the number of data in the source file for next Traversal
Continue;
}
If (ispass ){
If (nums. Size () % countaccuracy = 1 & nums. Size ()> 1 ){
// Sort the set
Collections. Sort (Nums );

// Receives the largest data, and other data is written to the total sorting file.
Temp = numbersortdata (Nums, filepath, true, countaccuracy,
BW );
Nums = new vertex list ();
Nums. Add (temp );
Continue;
}

}
// Record the location of the next group of data

// Determine if there is no data in ten files
If (ispasses. Size () = countfile ){
Collections. Sort (Nums );
Temp = numbersortdata (Nums, filepath, true, countaccuracy, BW );
Nums = new vertex list ();
Break;
}
}

Bw. Close ();
// Close the write stream
FS. Close ();

// Close all bufferedreader
For (iterator = writerlists. iterator (); iterator. hasnext ();){
Bufferedreader object2 = (bufferedreader) iterator. Next ();
Object2.close ();
}

// Close all filereader
For (iterator = writerlistfile. iterator (); iterator. hasnext ();){
Filereader object = (filereader) iterator. Next ();
Object. Close ();
}
}

// Sort the data and write it into the final file (precision adjustment)
Public static object numbersortdata (writable list, string filepath,
Boolean ispass, int countaccuracy, bufferedwriter FS ){
Object temp = 0; // record the last value
Int tempcount = 0; // the location where the data is written.
Try {
For (iterator = List. iterator (); iterator. hasnext ();){
Object object = (object) iterator. Next ();

// Determine whether it is the last number
If (tempcount = List. Size ()-1 ){
// Judge the number of less than one hundred data entries in the Set
If (list. Size () <countaccuracy + 1 & ispass ){
Temp = NULL;
} Else {
Temp = object;
Break;
}
}

// Write data source
FS. Write (Object + "/R/N ");

// Subscript of recorded data
Tempcount ++;
}
} Catch (ioexception e ){
E. printstacktrace ();
}
Return temp;
}
}

Related Article

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.