這段時間試著實現了一個簡易的C語言垃圾收集器,代碼不多,但對我的經驗而言,確實費了不少心思。
這裡感謝雲風兄的開源:雲風:blog.codingnow.com/2008/06/gc_for_c.html
以及LOGOS兄的對yfgc的解析和實現的一個概念版本:LOGOS:www.cppblog.com/darkdestiny
話說我知道自己的實現裡面,有很多考慮不足的地方。但還是決定貼出來,講述的我實現的思想,有不足的地方大家指出來也好糾正。
大家知道垃圾收集技術主要分為以下幾種,他們有各自的優點與缺點:
1、引用技術(Reference Counting)
2、標記清除(Mark-Sweep)
3、標記整理/緊縮(Mark-Compact)
4、結點複製(Copying)
5、分代收集(Generational Collecting)
這裡我就不詳述各種思想了,google上有。我的需求是:要求垃圾收集能自然的解決循環參考的問題,佔用的空間不要太多,輕量級的實現,解決記憶體片段的問題。
這裡我選用的是標記清除演算法,記憶體配置使用記憶體池。
首先還是給出記憶體池的介面,實現在我之前的文章中有講,就不貼出來了。記憶體池的實現借鑒了sgi stl的實現思路。 Code: #ifndef _MEM_POOL_H #define _MEM_POOL_H #include <stddef.h> void *mem_malloc(size_t n); void mem_free(void *p, size_t n); void *mem_realloc(void* ptr, size_t new_sz, size_t old_sz); #endif
然後看gc的介面: Code: #ifndef GARBAGE_COLLECTOR_H #define GARBAGE_COLLECTOR_H #include <stddef.h> #define my_malloc mem_malloc #define my_free mem_free #define my_realloc mem_realloc /* define the level of node */ #define NODE_ROOT_LEVEL 0 /* the node is global variable or in main() */ #define NODE_NOW_LEVEL 1 /* the node is just use in present fucntion */ #define NODE_PRE_LEVEL 2 /* the node is be referenced by call function */ void gc_init(); void gc_exit(); void func_end(void *p, ...); void gc_link(void *p, int level); /* gc will mark the memory in gc_enter/gc_leave for collect later */ void gc_enter(); void gc_leave(); void gc_collect(); void *gc_malloc(size_t sz, void (*finalizer)(void *)); void *gc_realloc(void *p, size_t sz); #endif
用到的資料結構: Code: struct node { int mark; /* mark for gc collect */ int level; /* the node leavel */ struct { void *mem; /* the pointer point to memory*/ int size; /* the size of memrory */ void (*finalizer)(void *); /* destruction when the mem be free */ }n; }; static struct { struct node *pool; /* an array for store nodes */ int size; /* the size of pool */ int free; /* the next free node in pool */ struct stack *stack; /* the stack used for store pointer in fuction */ } E;
這裡 level 取值為:NODE_ROOT_LEVEL、NODE_NOW_LEVEL、NODE_PRE_LEVEL。 基於這樣的考慮:我們知道動態分配一塊記憶體,如果要延長其生命期,要麼通過函數傳回值傳回,要麼通過多級指標,或者直接掛到全域變數上。所以這個gc基於這樣的策略:首先使用者指派的記憶體塊所在的結點 level 值初始化為NODE_NOW_LEVEL,如果使用者需要延長其生命期到上一級函數或全域變數,那麼調用 gc_link 並傳入相應的 level 值。僅在其生命期需要延長至上一級函數時需要在函數結尾處(通過傳回值傳遞動態記憶體時需要在 return 前)調用 func_end。func_end的作用是將該記憶體塊的 level 值設定為NODE_NOW_LEVEL。
知道了結點的生命期,標記就簡單了。gc_leave負責將當前函數棧中的evel為NODE_NOW_LEVEL的結點標記為MARK_COLLECT,從而在 gc_collect 中回收。這裡需要說的是main()函數中分配的記憶體和掛到全域變數的記憶體會在gc_exit中釋放。
大致過程知道了,下面就是具體實現了: Code: #include <stdio.h> #include <stdlib.h> #include <stdarg.h> #include <assert.h> #include "stack.h" #include "mem_pool.h" #include "gc.h" #define POOL_INITIAL_NUMBER 1024 /* the initial number of nodes in pool */ #define POOL_MAX_NUMBER 4096 /* the max number of nodes in pool */ #define STACK_SECTION_TAG NULL /* the tag to section the stack */ #define MARK_INITIAL -1 /* the node initialed */ #define MARK_RESERVE 0 /* the node marked for reserve */ #define MARK_COLLECT 1 /* the node marked for collect */ struct node { int mark; /* mark for gc collect */ int level; /* the node leavel */ struct { void *mem; /* the pointer point to memory*/ int size; /* the size of memrory */ void (*finalizer)(void *); /* destruction when the mem be free */ }n; }; static struct { struct node *pool; /* an array for store the pointer of node */ int size; /* the size of pool */ int free; /* the next free node in pool */ struct stack *stack; /* the stack used for store pointer in fuction */ } E; static bool pool_compact() { int i, j; struct node temp; for (i = 0; i < E.free; i++) { if (E.pool[i].mark == MARK_INITIAL) { temp = E.pool[i]; for (j = E.free; j > i; j--) { if (E.pool[j].mark != MARK_INITIAL) { E.pool[i] = E.pool[j]; E.pool[j] = temp; break; } } } } for (i = 0; i < E.size; i++) { if (E.pool[i].mark == MARK_INITIAL) { E.free = i; break; } } return E.free >= E.size ? true : false; } static void node_init() { int i; for (i = E.free; i < E.size; i++) { E.pool[i].mark = MARK_INITIAL; E.pool[i].level = NODE_NOW_LEVEL; E.pool[i].n.mem = NULL; E.pool[i].n.finalizer = NULL; } } static void pool_expand() { int expand_size; bool expand = false; expand_size = E.size * 2; if (expand_size >= POOL_MAX_NUMBER * sizeof(struct node)) { expand = pool_compact(); } if (expand) { E.pool = (struct node *)my_realloc(E.pool, expand_size * sizeof(struct node), E.size * sizeof(struct node)); E.free = E.size; E.size = expand_size; /* init the node */ node_init(); } } static void node_alloc(void *p, size_t sz, void (*finalizer)(void *)) { if (E.free >= E.size) { pool_expand(); } E.pool[E.free].mark = MARK_RESERVE; E.pool[E.free].level = NODE_NOW_LEVEL; E.pool[E.free].n.mem = p; E.pool[E.free].n.size = sz; // for mem_free E.pool[E.free].n.finalizer = finalizer; E.free++; } static void pool_init() { E.pool = (struct node *)my_malloc(POOL_INITIAL_NUMBER * sizeof(struct node)); E.free = 0; E.size = POOL_INITIAL_NUMBER; /* init the node */ node_init(); } void gc_init() { E.pool = NULL; E.size = 0; E.free = -1; E.stack = init_stack(); pool_init(); } void gc_link(void *p, int level) { int i; for (i = 0; i < E.free; i++) { if (E.pool[i].n.mem == p) { E.pool[i].level = level; break; } } } void gc_enter() { push(E.stack, STACK_SECTION_TAG); } /* accordind to the level of nodes, mark nodes. if in present stack section * of function, there are some nodes' life extend father function's life * which callthe present function, then push these nodes in stack section * of father's function. */ void gc_leave() { void *p; struct stack *stack_temp; stack_temp = init_stack(); while ((p = top(E.stack)) != STACK_SECTION_TAG) { int i; /* whether mark for gc collect or not by searching for the node &