標籤:mic 不能 .com cti ptr http drop reads 插入
Windows和POSIX中都提供了自旋鎖,我們也可以通過C++11的atomic來實現自旋鎖。那麼兩者效能上面是什麼關係?先引入實現代碼:
#ifndef __spinlock_h__#define __spinlock_h__#include <atomic>#ifdef _WIN32#include <Windows.h>class spinlock_mutex{public: static constexpr DWORD SPINLOCK_COUNT = -1;public: // 在初始化時,會出現資源不足的問題,這裡忽略這個問題 // 具體參考Critical Sections and Error Handling(Windows via C/C++) spinlock_mutex() { InitializeCriticalSectionAndSpinCount(&m_cs, SPINLOCK_COUNT); } ~spinlock_mutex() { DeleteCriticalSection(&m_cs); } void lock() { EnterCriticalSection(&m_cs); } bool try_lock() { return TryEnterCriticalSection(&m_cs) == TRUE; } void unlock() { LeaveCriticalSection(&m_cs); }private: CRITICAL_SECTION m_cs;};#elif defined(_POSIX_C_SOURCE)#include <pthread.h>class spinlock_mutex{public: // 這裡不處理可能出現的調用錯誤 spinlock_mutex() { pthread_spin_init(&m_cs, PTHREAD_PROCESS_PRIVATE); } ~spinlock_mutex() { pthread_spin_destroy(&m_cs); } void lock() { pthread_spin_lock(&m_cs); } bool try_lock() { return pthread_spin_trylock(&m_cs) == 0; } void unlock() { pthread_spin_unlock(&m_cs); }private: pthread_spinlock_t m_cs;};#elseclass spinlock_mutex{ std::atomic_flag flag;public: spinlock_mutex() : flag{ ATOMIC_FLAG_INIT } {} void lock() { while (flag.test_and_set(std::memory_order_acquire)); } void unlock() { flag.clear(std::memory_order_release); } bool try_lock() { return !flag.test_and_set(std::memory_order_acquire); }};#endif#endif // __spinlock_h__
下面給出一個簡單測試,兩組線程,一組用來插入,另外一組用來取出。測試結果顯示:
(1)無論是Windows,還是POSIX提供的C語言版本的自旋鎖,都和C++11使用atomic構建的自旋鎖效率相近。
(2)在插入線程數和取出線程數相同的情況下,線程數越多,效率越低。
下面是測試代碼:
#include <memory>#include <cassert>#include <iostream>#include <vector>#include <thread>#include <future>#include <random>#include <chrono>#include "spinlock.h"#include <forward_list>struct student_name{ student_name(int age = 0) : age(age), next(nullptr) { } int age; student_name* next;};spinlock_mutex g_mtx;std::forward_list<int> g_students;std::atomic<int> g_inserts; // insert num (successful)std::atomic<int> g_drops; // drop num (successful)std::atomic<int> g_printNum; // as same as g_dropsstd::atomic<long long> g_ageInSum; // age sum when producing student_namestd::atomic<long long> g_ageOutSum; // age sum when consuming student_namestd::atomic<bool> goOn(true);constexpr int INSERT_THREAD_NUM = 1;constexpr int DROP_THREAD_NUM = 1;constexpr int ONE_THREAD_PRODUCE_NUM = 5000000; // when testing, no more than this number, you know 20,000,00 * 100 * 10 ~= MAX_INT if thread num <= 10inline void printOne(student_name* t){ g_printNum.fetch_add(1, std::memory_order_relaxed); g_ageOutSum.fetch_add(t->age, std::memory_order_relaxed); g_drops.fetch_add(1, std::memory_order_relaxed); delete t;}void insert_students(int idNo){ std::default_random_engine dre(time(nullptr)); std::uniform_int_distribution<int> ageDi(1, 99); for (int i = 0; i < ONE_THREAD_PRODUCE_NUM; ++i) { int newAge = ageDi(dre); g_ageInSum.fetch_add(newAge, std::memory_order_relaxed); { std::lock_guard<spinlock_mutex> lock(g_mtx); g_students.push_front(newAge); } // use memory_order_relaxed avoiding affect folly memory order g_inserts.fetch_add(1, std::memory_order_relaxed); }}void drop_students(int idNo){ while (auto go = goOn.load(std::memory_order_consume)) { { std::forward_list<int> tmp; { std::lock_guard<spinlock_mutex> lock(g_mtx); std::swap(g_students, tmp); } auto it = tmp.begin(); while (it != tmp.end()) { g_printNum.fetch_add(1, std::memory_order_relaxed); g_ageOutSum.fetch_add(*it, std::memory_order_relaxed); g_drops.fetch_add(1, std::memory_order_relaxed); ++it; } } }}int main(){ auto start = std::chrono::system_clock::now(); std::vector<std::future<void>> insert_threads; std::vector<std::future<void>> drop_threads; for (auto i = 0; i != INSERT_THREAD_NUM; ++i) { insert_threads.push_back(std::async(std::launch::async, insert_students, i)); } for (auto i = 0; i != DROP_THREAD_NUM; ++i) { drop_threads.push_back(std::async(std::launch::async, drop_students, i)); } for (auto& thread : insert_threads) { thread.get(); } std::this_thread::sleep_for(std::chrono::milliseconds(1000)); goOn.store(false, std::memory_order_release); for (auto& thread : drop_threads) { thread.get(); } { std::forward_list<int> tmp; { std::lock_guard<spinlock_mutex> lock(g_mtx); std::swap(g_students, tmp); } auto it = tmp.begin(); while (it != tmp.end()) { g_printNum.fetch_add(1, std::memory_order_relaxed); g_ageOutSum.fetch_add(*it, std::memory_order_relaxed); g_drops.fetch_add(1, std::memory_order_relaxed); ++it; } } auto end = std::chrono::system_clock::now(); std::chrono::duration<double> diff = end - start; std::cout << "Time to insert and drop is: " << diff.count() << " s\n"; std::cout << "insert count1: " << g_inserts.load() << std::endl; std::cout << "drop count1: " << g_drops.load() << std::endl; std::cout << "print num1: " << g_printNum.load() << std::endl; std::cout << "age in1: " << g_ageInSum.load() << std::endl; std::cout << "age out1: " << g_ageOutSum.load() << std::endl; std::cout << std::endl;}
關於自選鎖,還有以下內容需要說明:
(1)應用程式層用spinlock的最大問題是不能跟kernel一樣的關中斷(cli/sti),假設並發稍微多點,線程1在lock之後unlock之前發生了時鐘中斷,
* 一段時間後才會被切回來調用unlock,那麼這段時間中另一個調用lock的線程不就得空跑while了?這才是最浪費cpu時間的地方。
* 所以不能關中斷就只能sleep了,怎麼著都存在巨大的衝突代價。
(2)具體參考:https://www.zhihu.com/question/55764216
Windows和pthread中提供的自旋鎖