Next: 108 odd tricks for Linux programming-7 (lock-free experiment)
This section adds the test_and_set primitive and a complete implementation of the spinlock (refer to nginx spin lock). The main change is that the _ ASM _ ("pause") command is inserted, the number of inserts is the power of the number of attempts to lock, which helps reduce the number of retries. Through this change, we can see that the CPU usage of has been mitigated. The test_and_set command will be further mentioned in the blog in the future. CAS, Tas, and fetch_and_add are the only three atomic operations required in this series.
The syntax of test_and_set is to input an address and set it to 1 unconditionally, and return the original value of this address value. If it is 1, 1 is returned. If it is not 1, 1 is returned, this provides the benefit of closing a door. If the variable is not 1, the variable is automatically closed after entering the critical section. All subsequent attempts fail because it is while (TAS (v) = 1) after the critical section is used, if this value is set to zero (reset), one of them will be obtained in the spin process, but this kind of scheduling is unexpected, therefore, hunger may occur, and the more concurrency, the more hunger the spin is, the less frequent the spin is. This article sets it to 4096, which can be adjusted according to the actual environment, see nginx source code.
The following is the experiment code:
/*************************************** **********************************
// Compilation Method
// G ++ test. cpp-O test_cas_imp-D cas_imp-lpthread
// G ++ test. cpp-O test_cas_imp_all-D cas_imp_all-lpthread
// G ++ test. cpp-O test_tas_imp-D tas_imp-lpthread
// G ++ test. cpp-O test_tas_imp_all-D cas_imp_all-lpthread
/*************************************** ***********************************
# Include <stdio. h>
# Include <pthread. h>
# Include <string. h>
# Include <stdlib. h>
# Include <unistd. h>
# Include <syscall. h>
# If defined (_ x86_64 __)
# Define atomicops_word_suffix "Q"
# Else
# Define atomicops_word_suffix "L"
# Endif
Static inline bool compare_and_swap (volatile size_t * P, size_t val_old, size_t val_new)
{
Char ret;
_ ASM _ volatile _ ("lock; cmpxchg" atomicops_word_suffix "% 3, % 0; setz % 1"
: "= M" (* P), "= Q" (RET)
: "M" (* P), "R" (val_new), "a" (val_old)
: "Memory ");
Return (bool) ret;
}
Static inline size_t fetch_and_add (volatile size_t * P, size_t add)
{
Unsigned int ret;
_ ASM _ volatile _ ("lock; xaddl % 0, % 1"
: "= R" (RET), "= m" (* P)
: "0" (ADD), "M" (* P)
: "Memory ");
Return ret;
};
Static inline int test_and_set (volatile int * s)/* tested */
{Int R;
_ ASM _ volatile __(
"Xchgl % 0, % 1"
: "= R" (R), "= m" (* s)
: "0" (1), "M" (* s)
: "Memory ");
Return R;
}
Static inline int reset (volatile int * s)
{
* S = 0;
}
Volatile size_t g_ucount;
Pthread_mutex_t g_tlck = pthread_mutex_initializer;
Const size_t cnt_num = 10000000;
Volatile int tas_lock = 0;
Void * sum_with_cas_imp (void *)
{
For (INT I = 0; I <cnt_num; ++ I ){
For (;;){
Size_t u = g_ucount;
If (compare_and_swap (& g_ucount, u, u + 1 )){
Break;
}
}
}
}
Void * sum_with_tas_imp (void *)
{
For (INT I = 0; I <cnt_num; ++ I ){
While (test_and_set (& tas_lock) = 1 ){}
++ G_ucount;
Reset (& tas_lock );
}
}
Void * sum_with_cas_imp_all (void *)
{
For (INT I = 0; I <cnt_num ;){
For (;;)
{
Size_t u = g_ucount;
If (compare_and_swap (& g_ucount, u, u + 1 )){
Goto L1;
}
For (size_t n = 1; n <4096; n <= 1 ){
For (size_t I = 0; I <n; I ++ ){
_ ASM _ ("pause ");
}
U = g_ucount;
If (compare_and_swap (& g_ucount, u, u + 1 )){
Goto L1;
}
}
Syscall (sys_sched_yield );
}
L1: ++ I;
}
}
// Reference: http://nginx.sourcearchive.com/documentation/0.7.59-1/ngx__spinlock_8c-source.html
Void * sum_with_tas_imp_all (void *)
{
For (INT I = 0; I <cnt_num; ++ I ){
For (size_t n = 1; (test_and_set (& tas_lock) = 1; n <= 1 ){
If (n <4096 ){
For (size_t I = 0; I <n; I ++ ){
_ ASM _ ("pause ");
}
}
Else {
Syscall (sys_sched_yield );
N = 1;
}
}
++ G_ucount;
Reset (& tas_lock );
}
}
Void * sum (void *)
{
# Ifdef cas_imp
Sum_with_cas_imp (null );
# Endif
# Ifdef tas_imp
Sum_with_tas_imp (null );
# Endif
# Ifdef cas_imp_all
Sum_with_cas_imp_all (null );
# Endif
# Ifdef tas_imp_all
Sum_with_tas_imp_all (null );
# Endif
};
Int main ()
{
Pthread_t * thread = (pthread_t *) malloc (10 * sizeof (pthread_t ));
For (INT I = 0; I <10; ++ I)
{
Pthread_create (& Thread [I], null, sum, null );
}
For (INT I = 0; I <10; ++ I)
{
Pthread_join (thread [I], null );
}
Printf ("g_ucount: % d/N", g_ucount );
Free (thread );
}
I searched for some good articles for your reference.
(1) http://student.csdn.net/space.php? Uid = 45153 & Do = thread & id = 7403
(2) http://www.ibm.com/developerworks/cn/linux/l-rwlock_writing/