C # Fast and random reading of large text files by line

Source: Internet
Author: User

The following is a random reading class for data files that I implemented. A row of large text files can be randomly read. On my machine, the speed of reading a 200,000th MB text file has increased from MS to 3 ms.
When reading text files, Readline () is generally used for Row-by-row reading. In this case, the filestream and bufferedstream classes in C # have more than enough processing capabilities. It does not read all the files, but buffer reads. However, if you want to read a row at random and the row data length is inconsistent, the efficiency of traversing the row to the specified row is obviously low.
Of course, the cost is also there. It introduces the time for opening the file for the first time and occupies a small amount of memory (the amount occupied can be set, of course, the smaller the occupied speed, the slower, but the maximum value is much smaller than the total read value ).

(Partially rewrite the network code)

using System;
using System.Collections.Generic;
using System.Text;
using System.Collections;
using System.Threading;
using System.IO;

namespace DataBuffer
{
    public static class FileConfig
    {
        public static int STREAM_BUFFER_SIZE = 1024000;
        public static int MAP_DISTANCE = 10;
    }

Public class datafile
{
///
/// Data file name
///
Public String filename = "";
///
/// Initialize the read completion mark
///
Public bool done = false;

///
/// Current stream position
///
Public long position = 0;

///
/// File header information
///
Private hashtable head = new hashtable ();
Public hashtable head {get {return head;} set {head = value ;}}

///
/// File Map
///
Private arraylist map = new arraylist ();
Public arraylist map {get {return map;} set {map = value ;}}

///
/// Number of rows in the file
///
Private long lines = 0;
Public long lines {get {return lines;} set {lines = value ;}}
}

Public class databuffer
{
Private filestream FS = NULL;
Private bufferedstream BS = NULL;
Private streamreader sr = NULL;
Private streamwriter Sw = NULL;
///
/// File Information Data Structure
///
Public datafile = new datafile ();

Public databuffer (string name)
{
Datafile. filename = Name;
}

///
/// Open the file
///
Public bool open ()
{
Try
{
// Initialize each stream
FS = new filestream (datafile. filename, filemode. Open, fileaccess. readwrite );
BS = new bufferedstream (FS, fileconfig. stream_buffer_size );
Sr = new streamreader (FS );
Sw = new streamwriter (FS );
Thread initfile = new thread (New threadstart (initdatafile ));
Initfile. Start ();
Return true;
}
Catch (exception ee)
{
Errorhandler. errorhandler Eh = new errorhandler. errorhandler (EE, "Open File ");
Return false;
}
}

Private void initdatafile ()
{
// Open another read stream
Bufferedstream BS = new bufferedstream (FS );
Streamreader sr = new streamreader (BS );

// Read the data file header information. 14 rows in total
String thisline = nextline (ref SR );
Datafile. Head. Add ("subject", thisline. substring (11 ));

Thisline = nextline (ref SR );
Datafile. Head. Add ("date", thisline. substring (8 ));

Thisline = nextline (ref SR );
Datafile. Head. Add ("time", thisline. substring (8 ));

Thisline = nextline (ref SR );
Datafile. Head. Add ("channels", thisline. substring (12 ));

Thisline = nextline (ref SR );
Datafile. Head. Add ("rate", thisline. substring (8 ));

Thisline = nextline (ref SR );
Datafile. Head. Add ("type", thisline. substring (8 ));

Thisline = nextline (ref SR );
Datafile. Head. Add ("rows", thisline. substring (8 ));

Thisline = nextline (ref SR );
Thisline = nextline (ref SR );
Datafile. Head. Add ("electrode labels", thisline );
Thisline = nextline (ref SR );
Thisline = nextline (ref SR );
Thisline = nextline (ref SR );
Thisline = nextline (ref SR );
Thisline = nextline (ref SR );
// Lower your priority
Thread. currentthread. Priority = threadpriority. belownormal;

// Number of rows to create a map
Long lines = 1;
// Add the location information of the first data entry to the map
Datafile. Map. Add (datafile. position );
// Create a file map in sequence
While (! Sr. endofstream)
{
Thisline = nextline (ref SR );
If (++ lines) % fileconfig. map_distance = 0)
{
Datafile. Map. Add (datafile. position );
}
}
Datafile. Lines = lines;
Datafile. Done = true;
}

///
/// Close the file
///
Public bool close ()
{
Try
{
// Close each stream in sequence
Sw. Close ();
Sr. Close ();
BS. Close ();
FS. Close ();
Return true;
}
Catch (exception ee)
{
Errorhandler. errorhandler Eh = new errorhandler. errorhandler (EE, "disable file ");
Return false;
}
}

///
/// Read the next row in sequence. Low Efficiency it is not recommended to use it on a large scale. It is only used once when the file is opened.
///
///
Public String nextline (ref streamreader SR)
{
String next = Sr. Readline ();
// + 2 refers to Windows newline carriage return. Change to + 1 in Linux
Datafile. Position + = next. Length + 2;
Return next;
}

// Specify the target row content
Public String Readline (long line)
{
Try
{
// If the load is complete
If (datafile. Done)
{
// Determine the index number of the data block
Int Index = (INT) line/fileconfig. map_distance;
// Move to the specified location
BS. Seek (long. parse (datafile. Map [Index]. tostring (), seekorigin. Begin );
// Create a stream Reader
Sr = new streamreader (BS );
// Move to the specified row
For (INT I = 1; I <= (line-Index * fileconfig. map_distance); I ++)
{
Sr. Readline ();
}
// Return the value of the specified row
Return Sr. Readline ();
}
Else
{
Return "";
}
}
Catch (exception ee)
{
Errorhandler. errorhandler Eh = new errorhandler. errorhandler (EE, "File Read ");
Return "";
}
}
}

}

C # Fast and random reading of large text files by line

Related Article

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.