#include <stdint.h>#include <time.h>#include <pthread.h>#include <string.h>#include <stdlib.h>#include <stdio.h>namespace detail {class Sleeper {static const uint32_t kMaxActiveSpin = 4000;uint32_t spinCount;public:Sleeper() : spinCount(0) {}void wait() {if (spinCount < kMaxActiveSpin) {++spinCount;asm volatile("pause");} else {struct timespec ts = { 0, 500000 };nanosleep(&ts, NULL);}}};}struct MicroSpinLock {enum { FREE = 0, LOCKED = 1 };uint8_t lock_;bool cas(uint8_t compare, uint8_t newVal) {bool out;asm volatile("lock; cmpxchgb %2, (%3);""setz %0;": "=r" (out): "a" (compare), "q" (newVal), "r" (&lock_): "memory", "flags");return out;}/* try compare 8bit lock with 64bit lockuint64_t lock_;bool cas(uint64_t compare, uint64_t newVal) {bool out;asm volatile("lock; cmpxchgq %2, (%3);""setz %0;": "=r" (out): "a" (compare), "q" (newVal), "r" (&lock_): "memory", "flags");return out;}*/
void init() {lock_ = FREE;}bool try_lock() {return cas(FREE, LOCKED);}void lock() {detail::Sleeper sleeper;do {while (lock_ != FREE) {asm volatile("" : : : "memory");sleeper.wait();}} while (!try_lock());}void unlock() {asm volatile("" : : : "memory");lock_ = FREE; // release barrier on x86}};size_t __attribute__((aligned(64))) g_uCount = 0;MicroSpinLock __attribute__((aligned(64))) locker;void* sum(void*){#ifdef NO_LOCKfor(int i=0;i<250000;++i)g_uCount ++ ;#endif#ifdef SMALL_LOCK for(int i=0;i<250000;++i){locker.lock();g_uCount++;locker.unlock();}#endif};int main(void){locker.init();pthread_t* thread = (pthread_t*) malloc(10*sizeof( pthread_t)); for(int i=0;i<10;++i){ pthread_create(&thread[i],NULL,sum,NULL); } for(int i=0;i<10;++i){ pthread_join(thread[i],NULL); }free(thread); //printf("g_uCount:%d\n",g_uCount);}
以上是實驗代碼,結果比較滿意,不細說了,一個lock只需8個bit,不錯,對比了4位元組的lock沒有明顯問題。本部落格留給自己和感興趣的朋友們。