Implementation of cstringlistex faster than BCB (updated)

Source: Internet
Author: User

Because the cstringlist of MFC is too simple to process files efficiently, cstringlistex is written based on the cstringlist by combining fast sorting, binary search, and memory ing. This class is highly efficient. It also reads more than 100 MB of files. The tstringlist of BCB is about 20 s (including processing), and The cstringlistex is only 3 ~ 4 seconds. You can try this type of effect in your project. Copyright: wxy3064one # 163.com

The source code is provided directly:

//. H file:

 

# Ifndef _ q_string_list_ex_h
# DEFINE _ q_string_list_ex_h

# If _ msc_ver> 1000
# Pragma once
# Endif // _ msc_ver> 1000

# Define use_win_memory_map

 

Enum file_line_type {flt_window = 0, flt_unix = 1, flt_apple = 2 };

 

Class cstringlistex: Public cstringlist
{
Bool m_bsorted;
Bool customfind (cstring value, Int & Index );
Public:
Cstringlistex (INT nblocksize = 10 );
Void sort ();
Void clear ();
Int indexof (cstring value );
Void add (const cstring & newstr );
Cstring operator [] (INT index );
Void loadfromfile (lpctstr lpszfilename );
Void savetofile (lpctstr lpszfilename );
};

# Endif

 

//. Cpp File

# Include "stringlistex. H"
# Ifndef use_win_memory_map
# Include <fstream>
Using namespace STD;
# Endif

 

 

/*
Binary Search
*/

Template <typename T>
Bool binsearch (T * alist, int low, int high, T value, INT (* compare_func) (t, t), Int & Index)
{
Bool ret = false;
Int I, C, L = low, r = high;
While (L <= r)
{
I = (L + r)> 1;
C = (* compare_func) (t) (* (alist + I), value );
If (C <0) L = I + 1;
Else {
R = I-1;
If (C = 0)
{
Ret = true;
L = I;
Break;
}
}
}
Index = L;
Return ret;
}

/*
Insert sorting:
It consists of n-1 sorting. For p = 1 to P = n-1, insert sorting ensures that the elements from position 0 to position P are sorted.
Type: Internal sorting
Complexity: O (N (2 ))
*/
// Any algorithm that performs sorting by exchanging adjacent elements requires an average O (N (2) time.
Template <typename T>
Void insertionsort (t a [], int N)
{
Int J, P;
T TMP;
For (P = 1; P <n; P ++)
{
TMP = A [p];
For (j = P; j> 0 & A [J-1]> TMP; j --)
{
A [J] = A [J-1];
}
A [J] = TMP;
}
}
/*
Quick sorting:
The algorithm consists of four steps:
1. If the number of elements in S is 0 or 1, return
2. Take any element V in S and call it the hub element.
3. split S-{v} into two non-intersecting sets: S1 = {x ε s-{v}, X ≦ v}, and S2 = {x ε s-{v }, X ≧ v}
4. After returning quicksort (S1), follow V and then quicksort (S2)

A Safe policy is to randomly select the hub element, but the generation of random numbers is very expensive, which does not reduce the average running time of the rest of the algorithm.
We recommend that you use the three-digit median splitting method.
You can construct a quick selection method: First back up the array and add a sequence number parameter k, k <= I, split the left side of the quick sorting, otherwise the right side of the quick sorting. After the completion, directly take the array sequence number
K value.
Best: O (nlogn) average: O (nlogn) Worst O (N (2 ))
Type: Internal sorting
Complexity: O (nlogn)
*/
Template <typename T>
Void swap_t (T & A, T & B)
{
T c;
C =;
A = B;
B = C;
}

Template <typename T>
Int partion (t a [], int left, int right)
{
Int I, j = left-1;
T s = A [-- right];
For (I = left; I <right; I ++)
{
If (A [I] <s)
{
J ++;
Swap_t (A [I], a [J]);
}
}
J ++;
A [right] = A [J];
A [J] = s;
Return J;
}

Template <typename T>
Int randompartion (t a [], int left, int right)
{
Int I = left + rand () % (right-left-1 );
Swap_t (A [I], a [Right-1]);
Return partion (A, left, right );
}

Template <typename T>
T median3 (t a [], int left, int right)
{
Int center = (left + right)/2;

If (A [left]> A [center])
Swap_t (A [left], a [center]);
If (A [left]> A [right])
Swap_t (A [left], a [right]);
If (A [center]> A [right])
Swap_t (A [center], a [right]);

Swap_t (A [center], a [Right-1]);/* Hide Development */
Return a [Right-1];
}

Template <typename T>
Void quicksort (t a [], int left, int right, bool busedef = false)
{
Int I, J;
T branch;
If (! Busedef)
{
Const int cutoff = 3;
If (left + cutoff <= right)
{
Centers = median3 (A, left, right );
I = left;
J = right-1;
While (1)
{
While (A [++ I] <signature ){;}
While (A [-- J]> tables ){;}
If (I <j)
Swap_t (A [I], a [J]);
Else
Break;
}
Swap_t (A [I], a [Right-1]);/* restore metadata */
Quicksort (A, left, I-1 );
Quicksort (A, I + 1, right );
} Else {
Insertionsort (a + Left, right-left + 1 );
}
} Else {
If (left <right-1)
{
I = randompartion (A, left, right );
Quicksort (A, left, I );
Quicksort (A, I + 1, right );
}
}
}

# Ifdef use_win_memory_map
Handle hthread;
DWORD dwthreadid;

Struct memorymapreadstruct
{
Cstringlistex * plist;
Lpvoid Buf;
Int filelinetype;
};

Void setstring (cstring & S, char * Buf, int Len)
{
If (BUF! = NULL)
{
Char * P = new char [Len + 1];
Memset (p, 0, Len + 1 );
Memcpy (p, Buf, Len-1 );
Memcpy (p + Len-1, BUF + Len-1, 1 );
S = P;
Delete [] P;
}
}

Int get_file_delim_type (const char * filename)
{
Int ret = flt_window;
Char tmpchar;
Char Buf [2] = {0 };
Int COUNT = sizeof (BUF );
Int ncount = 0;
File * fp = fopen (filename, "R ");
If (FP)
{
Fread (BUF, Count, 1, FP );
Tmpchar = Buf [0];
Ncount ++;

While (fread (BUF, Count, 1, FP) & ncount <4095)
{
If (tmpchar = 0x0d)
{
If (BUF [0] = 0x0a) // window type // 0x0d 0x0a
Ret = flt_window;
Else // UNIX type // 0x0d
Ret = flt_unix;
Break;
} Else {
If (tmpchar = 0x0a) // Apple Type // 0x0a
{
Ret = flt_apple;
Break;
}
}
Tmpchar = Buf [0];
Ncount ++;
}
Fclose (FP );
}
Return ret;
}

DWORD winapi loadstrings (lpvoid lpparam)
{
Cstring S;
Char * P, * start;
Memorymapreadstruct * preadparam = (memorymapreadstruct *) lpparam;

P = (char *) preadparam-> Buf;
Switch (preadparam-> filelinetype)
{
Case flt_apple:
Case flt_unix:
Case flt_window:
{
If (P! = NULL)
{
While (* P! = Char (0 ))
{
Start = P;
While (* P! = Char (0) & * P! = Char (0x0d) & * P! = Char (0x0a ))
P ++;
If (p-Start> 0)
{
Setstring (S, start, p-Start );
Preadparam-> plist-> Add (s );
}
If (* P = char (0x0d ))
P ++;
If (* P = char (0x0a ))
P ++;
}
}
} Break;
Default: break;
}

Return 0;
}
# Endif

Cstringlistex: cstringlistex (INT nblocksize): cstringlist (nblocksize)
{
M_bsorted = false;
}

Cstring cstringlistex: operator [] (INT index)
{
Return getat (findindex (INDEX ));
}

Bool cstringlistex: customfind (cstring value, Int & Index)
{
If (! M_bsorted)
{
Index = 0;
Position Pos = getheadposition ();
While (Pos! = NULL)
{
If (strcmp (maid) getat (POS), (maid) value) = 0)
{
Return true;
}
Getnext (POS );
Index ++;
}
Index =-1;
Return false;
} Else {
Int ncount = getcount ();
Position Pos = getheadposition ();
Cstring * ptrarray = new cstring [ncount];
For (INT I = 0; I <ncount; I ++)
{
Ptrarray [I] = getnext (POS). getbuffer (0 );
}
Bool ret = binsearch <const char *> (const char **) ptrarray, 0, nCount-1, (const char *) value, strcmp, index );
Delete [] ptrarray;
Return ret;
}
}
Int cstringlistex: indexof (cstring value)
{
Int Index = 0;
If (customfind (value, index ))
Return Index;
Return-1;
}

Void cstringlistex: add (const cstring & newstr)
{
Addtail (newstr );
M_bsorted = false;
}

Void cstringlistex: Sort ()
{
Int I;
Int ncount = getcount ();
Position Pos = getheadposition ();
Cstring * ptrarray = new cstring [ncount];
For (I = 0; I <ncount; I ++)
{
Ptrarray [I] = getnext (POS). getbuffer (0 );
}
Quicksort <cstring> (ptrarray, 0, nCount-1 );
Removeall ();
For (I = 0; I <ncount; I ++)
{
Add (ptrarray [I]);
}
Delete [] ptrarray;
M_bsorted = true;
}

Void cstringlistex: clear ()
{
Removeall ();
}

Void cstringlistex: savetofile (lpctstr lpszfilename)
{
File * file;
If (file = fopen (lpctstr) lpszfilename, "W +") = NULL)
{
Return;
}
Position Pos = getheadposition ();
While (Pos! = NULL)
{
Cstring STR = getnext (POS). getbuffer (0 );
Fwrite (lpctstr) STR, str. getlength (), 1, file );
}
Fclose (File );
}
Void cstringlistex: loadfromfile (lpctstr lpszfilename)
{
# Ifdef use_win_memory_map
Handle hfile = createfile (lpszfilename, generic_read | generic_write,
File_pai_read,
Null,
Open_existing,
File_attribute_normal,
Null );
If (hfile = invalid_handle_value)
{
Return;
}
Handle hfilemapping = createfilemapping (hfile, null, page_readwrite, 0, 0, null );
If (hfilemapping = NULL)
{
Closehandle (hfile );
Return;
}
Int filelinetype = get_file_delim_type (lpszfilename );
System_info Si;
Getsysteminfo (& Si );
DWORD dwbytesinblock = 1000 * si. dwallocationgranularity;
_ Int64 dwfileoffset = 0;
DWORD dwfilesizehigh;
_ Int64 dwfilesize = getfilesize (hfile, & dwfilesizehigh );
Dwfilesize | = (_ int64) dwfilesizehigh) <32 );
Closehandle (hfile );
Hfile = invalid_handle_value;
Bool bpmenough = false;
If (dwfilesize <dwbytesinblock)
{
Dwbytesinblock = dwfilesize;
Bpmenough = true;
}
If (! Bpmenough)
{
Dwbytesinblock = dwfilesize;
Bpmenough = true;
}
If (bpmenough)
{
Pbyte pbfile = (pbyte) mapviewoffile (hfilemapping, file_map_all_access,
(DWORD) (dwfileoffset> 32 ),
(DWORD) (dwfileoffset & 0 xffffffff ),
Dwbytesinblock );
Memorymapreadstruct thread_param;
Thread_param.buf = pbfile;
Thread_param.plist = this;
Thread_param.filelinetype = filelinetype;
Hthread = createthread (null, 0, loadstrings, (lpvoid) & thread_param, 0, & dwthreadid );
While (1 = 1)
{
DWORD dwret;
MSG;
Handle objarray [1];
Objarray [0] = hthread;
Dwret =: msgwaitformultipleobjects (1, objarray, false, infinite, qs_allinput );
If (dwret = wait_object_0)
{
Break;
}
Else
{
: Peekmessage (& MSG, null, 0, 0, pm_remove );
: Dispatchmessage (& MSG );
}
}
If (pbfile! = NULL)
Unmapviewoffile (pbfile );
}
If (hfilemapping! = NULL)
{
Closehandle (hfilemapping );
Hfilemapping = NULL;
}
# Else

// The deleted code is incorrect in vs2008. The main type is the peek function.
Fstream infile;
Char Buf [0x400];
Locale: Global (locale (""));
Infile. Open (lpszfilename, ios_base: In );
If (infile. is_open ())
{
While (infile. Peek ()! =-1)
{
Infile. Getline (BUF, 0x400 );
Add (BUF );
}
}
Infile. Close ();
Locale: Global (locale ("C "));

 

// The following code is the changed code

Char tmpchar;
Char Buf [2] = {0 };
Int COUNT = sizeof (BUF );
Int ncount = 0;
File * fp = fopen (lpszfilename, "R ");
If (FP)
{
Fread (BUF, Count, 1, FP );
Tmpchar = Buf [0];
Ncount ++;

While (fread (BUF, Count, 1, FP) & ncount <4095)
{
If (tmpchar = 0x0d)
{
Break;
} Else {
If (tmpchar = 0x0a) // Apple Type // 0x0a
Break;
}
Tmpchar = Buf [0];
Ncount ++;
}
Fclose (FP );
If (tmpchar = 0x0d | tmpchar = 0x0a)
{
Fstream infile;
Char Buf [0x400];
Locale: Global (locale (""));
Infile. Open (lpszfilename, ios_base: In );
If (infile. is_open ())
{
While (infile. Getline (BUF, 0x400, tmpchar ))
{
Add (BUF );
}
}
Infile. Close ();
Locale: Global (locale ("C "));
}
}

# Endif
}

 

Follow-up:

I provide this class to add more efficient classes than Borland tstringlist In the MFC development program. The memory ing processing in the above Code can be improved and can be read in blocks.

 

Author's statement:
My ddddfw888 is copyrighted. For more information, see the source.

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.