原文網址:http://www.yuanma.org/data/2009/0326/article_3585.htm
200行C代碼實現簡單線程池 原以為實現一個線程池一定是個規模和難度都是超大的項目。這陣子有些閑空,就複習了一下pthreads的知識。光理論複習不夠的,還得上手寫點東西才行。這就想著實現一個線程池試試看吧。沒想到200行不到就出來了一個可用的東西。
基本想法是這樣的:
1、預建立的線程通過mutex休眠線上程池中。這樣,通過unlock該mutex就可以喚醒該線程了;
2、出於簡單性的目標,一個線程池內的所有線程的屬性都是相同的。這個屬性在建立線程池可以指定;
3、一般來講,線程池內的線程不能被取消、或者調用pthread_exit()退出。這些管理性工作是由線程池本身完成的。即,在使用線程池借出的線程時,函數返回應該只用return。
4、從線程池“借出”的線程,可以歸還給線程池。實際上也必須歸還給線程池,這樣線程池可以完成最後的清理工作。
5、如果實在需要取消一個線程,那麼好吧,只是別忘了告訴線程池你取消了它的手下。
#include "threadpool.h" /* #include了所有必要的系統標頭檔 */
#define THWK_F_CLEAN 1 /* 設定此標誌著threadpool進行中清理操作,此時線程退出。 */
#define THWK_F_RUNNING 2 /* 設定這個標誌主要是為了避免一個race condition,後述。 */
struct thread_worker_arg {
void (*action)(void*); /* user programmer指定的實際函數 */
void *what; /* action的參數 */
};
struct thread_worker {
pthread_t id; /* just as its name */
struct thread_worker_arg arg; /* 用於給sleepy_wrapper()傳送參數,後述。 */
pthread_mutex_t lock; /* 用於實現線程池內空閑線程的休眠,它實際上並不保護什麼臨界區。 */
struct thread_worker *next; /* 用於鏈表線程池內的其他線程 */
unsigned long long delay; /* 未用,計劃用於測量調度延遲。 */
unsigned long flags; /* 標誌,後述。 */
};
struct thread_pool {
pthread_mutex_t lock; /* 用於同步對於thread_pool自身的訪問操作 */
struct thread_worker *first; /* 所有線程連結於此 */
int total; /* 匯流排程數 */
int current_nr; /* 池內空閑線程數 */
};
/* 未用,計劃用於測量調度延遲。 */
inline unsigned long long get_ticks(void)
{
// __asm__ ("rdtsc");
return 0ULL;
}
/* 用於支援線程在被取消時的必要清理操作。 */
static void sleepy_wrapper_cleanup(void *voidp)
{
struct thread_worker *worker = voidp;
pthread_mutex_unlock(&worker->lock);
free(worker);
}
/* 這就是線程池內線程的執行函數了。 */
static void* sleepy_wrapper(void *voidp)
{
struct thread_worker *worker = voidp;
while (1) {
pthread_cleanup_push(sleepy_wrapper_cleanup, worker); /* 預設定上一個清理函數,防止線程取消時記憶體流失。 */
pthread_mutex_lock(&worker->lock); /* 空閑線程應該休眠於此,這個mutex在建立thread pool時就鎖住了。或者本迴圈結束時鎖住。 */
worker->delay = get_ticks() - worker->delay; /* 暫時無用。 */
if (THWK_F_CLEAN & worker->flags) /* 線程池正在清理本身,所以線程至此就退出了。 */
goto done; /* 你可能覺得這個goto用得有些多餘,但如果不這樣編譯就會提示句法錯誤,因為pthread_cleanup_{push,pop}是用宏實現的!你可以參考一下它們的實現。 */
worker->flags |= THWK_F_RUNNING; /* 後述。 */
if (worker->arg.action) /* 進行線程實際的工作 */
worker->arg.action(worker->arg.what);
done:
pthread_mutex_unlock(&worker->lock); /* 解鎖這個mutex,允許這個thread的下一次使用 */
pthread_cleanup_pop(0);
if (THWK_F_CLEAN & worker->flags) /* 清理線程池 */
break;
pthread_mutex_lock(&worker->lock); /* 先鎖住這個鎖,以讓本迴圈開頭的pthread_mutex_lock()使線程進入休眠。這個調用應該是成功的,否則就會引用deadlock。 */
worker->flags &= ~THWK_F_RUNNING; /* 設計這個標誌的意義在於防止有線程啟用操作在以上unlock/lock之間發生,如果這樣的話,就會引起deadlock,啟用操作的實現後述。 */
}
pthread_exit(0);
}
/* 無需廢話的函數。 */
pthread_t thread_pool_rawid(struct thread_worker *worker)
{
return worker->id;
}
/* 如果線程被取消了,通知線程池忘記它,目前的實現很簡單。*/
void thread_pool_forget(struct thread_pool *pool, struct thread_worker *worker)
{
pool->total--;
}
/* 線程啟用操作 */
void thread_pool_activate(struct thread_worker *worker)
{
worker->delay = get_ticks();
while (thread_pool_is_running(worker)) /* 防止出現deadlock */
;
pthread_mutex_unlock(&worker->lock); /* 使sleepy_wrapper()內迴圈開頭部分的lock()操作返回,即線程得以喚醒執行實際的action(what)。 */
}
/* 另一個無須廢話的函數 */
int thread_pool_is_running(struct thread_worker *worker)
{
return (worker->flags & THWK_F_RUNNING);
}
/* 從線程池中借出一個線程,其實就是一個從鏈表頭中摘出thread_worker的簡單函數 */
int thread_pool_lend(struct thread_pool *pool, void (*action)(void*), void* what, struct thread_worker **worker)
{
if (!action || !pool || !worker)
return -EINVAL;
pthread_mutex_lock(&pool->lock);
*worker = pool->first;
if (worker) {
(*worker)->arg.action = action;
(*worker)->arg.what = what;
pool->first = (*worker)->next;
(*worker)->next = NULL;
pool->current_nr--;
}
pthread_mutex_unlock(&pool->lock);
return 0;
}
/* 向線程池裡歸還一個thread,頭插法插入thread_worker鏈表。 */
int thread_pool_giveback(struct thread_pool *pool, struct thread_worker *worker)
{
if (!pool || !worker)
return -EINVAL;
while (thread_pool_is_running(worker))
;
pthread_mutex_lock(&pool->lock);
worker->next = pool->first;
pool->first = worker;
worker->arg.action = NULL;
worker->arg.what = NULL;
pool->current_nr++;
pthread_mutex_unlock(&pool->lock);
return 0;
}
/* 雖然有點長,但仍然是無須廢話:線程池建立 */
struct thread_pool* thread_pool_create(int nr_to_create, pthread_attr_t *attr)
{
struct thread_pool *pool;
struct thread_worker *worker;
int i, chk;
if (!nr_to_create)
return NULL;
pool = malloc(sizeof(struct thread_pool));
if (!pool)
return NULL;
pool->first = NULL;
pool->total = 0;
pthread_mutex_init(&pool->lock, NULL);
for (i=0; i<nr_to_create; ++i) {
worker = malloc(sizeof(struct thread_worker));
if (!worker)
break;
memset(worker, 0, sizeof(struct thread_worker));
pthread_mutex_init(&worker->lock, NULL);
pthread_mutex_lock(&worker->lock);
chk = pthread_create(&worker->id, attr, sleepy_wrapper, (void*)worker);
if (chk) {
pthread_mutex_unlock(&worker->lock);
pthread_mutex_destroy(&worker->lock);
free(worker);
break;
}
worker->next = pool->first;
pool->first = worker;
}
pool->total = i;
pool->current_nr = i;
if (0 == i) {
pthread_mutex_destroy(&pool->lock);
free(pool);
pool = NULL;
}
return pool;
}
/* 清理線程池。 */
int thread_pool_clean(struct thread_pool *pool)
{
struct thread_worker *worker;
pthread_mutex_lock(&pool->lock);
if (pool->total != pool->current_nr) {
pthread_mutex_unlock(&pool->lock);
return -EBUSY;
}
while (NULL != (worker = pool->first)) {
worker->flags = THWK_F_CLEAN; /* this is =, rather than |= ! */
pthread_mutex_unlock(&worker->lock);
pthread_join(worker->id, NULL);
pool->first = worker->next;
pthread_mutex_destroy(&worker->lock);
free(worker);
}
pthread_mutex_unlock(&pool->lock);
pthread_mutex_destroy(&pool->lock);
free(pool);
return 0;
}
/* 這是一個使用例子。 */
/* 在我的P4雙核機器上,可以比單線程版本快20%,但複雜性遠高於20%! */
#include "threadpool.h"
unsigned long long sum(unsigned long long start, unsigned long long end)
{
unsigned long long sum;
sum = 0;
for (; start<=end; ++start)
sum += start;
return sum;
}
struct per_sum {
unsigned long long sum, start, end;
pthread_mutex_t lock;
pthread_cond_t cond;
};
void threaded_sum(void *voidp)
{
struct per_sum *per_sum = voidp;
printf("thread %p start\n", voidp);
if (!per_sum) {
// printf("per_sum == NULL\n");
return;
}
per_sum->sum = sum(per_sum->start, per_sum->end);
per_sum->start = per_sum->end = 0;
pthread_mutex_lock(&per_sum->lock);
printf("thread %p exit, end=%lld\n", voidp, per_sum->end);
pthread_cond_signal(&per_sum->cond);
pthread_mutex_unlock(&per_sum->lock);
}
int main(void)
{
#define NR_THREADS 2
struct thread_worker* workers[NR_THREADS];
struct per_sum per_sums[NR_THREADS];
struct thread_pool *pool;
int i;
unsigned long long start, end;
unsigned long long result = 0;
unsigned long long delta = 0x10ffffff;
// printf("mutli threading ... ");
pool = thread_pool_create(NR_THREADS, NULL);
if (!pool)
exit(-1);
for (i=0; i<NR_THREADS; ++i) {
if (pthread_mutex_init(&per_sums[i].lock, NULL)) {
printf("failed init mutex\n");
exit(3);
}
if (pthread_cond_init(&per_sums[i].cond, NULL)) {
printf("failed init cond\n");
exit(4);
}
if (thread_pool_lend(pool, threaded_sum, (void*)&per_sums[i], &workers[i])) {
printf("failed to lend thread %d\n", i);
exit(5);
}
}
start = 0;
/* activate threads */
for (i=0; i<NR_THREADS; i++) {
per_sums[i].start = start;
per_sums[i].end = per_sums[i].start + delta;
start = per_sums[i].end + 1;
thread_pool_activate(workers[i]);
}
for (i=0; i<NR_THREADS; i++) {
pthread_mutex_lock(&per_sums[i].lock);
while (per_sums[i].end != 0)
pthread_cond_wait(&per_sums[i].cond, &per_sums[i].lock);
result += per_sums[i].sum;
pthread_mutex_unlock(&per_sums[i].lock);
}
/* activate threads again */
for (i=0; i<NR_THREADS; i++) {
per_sums[i].start = start;
per_sums[i].end = per_sums[i].start + delta;
start = per_sums[i].end + 1;
thread_pool_activate(workers[i]);
}
end = per_sums[NR_THREADS-1].end;
for (i=0; i<NR_THREADS; i++) {
pthread_mutex_lock(&per_sums[i].lock);
while (per_sums[i].end != 0)
pthread_cond_wait(&per_sums[i].cond, &per_sums[i].lock);
result += per_sums[i].sum;
pthread_mutex_unlock(&per_sums[i].lock);
}
for (i=0; i<NR_THREADS; ++i) {
if (thread_pool_giveback(pool, workers[i])) {
printf("failed to giveback thread %d\n", i);
exit(6);
}
pthread_mutex_destroy(&per_sums[i].lock);
pthread_cond_destroy(&per_sums[i].cond);
}
thread_pool_clean(pool);
printf("sum = %lld\n\n", result);
return 0;
}
PS: 是在Linux寫的這個程式。完善的話,比如可以根據系統負載調整線程池中線程的數量;增加更完整的效能測量功能,調試功能;提供更方便的線程屬性設定介面;在Linux平台上,可以使用clone()提供更為靈活的資源策略,等等。