redis源碼分析（2）—

redis源碼分析（2）——事件迴圈

最後更新：2015-01-14 來源：互聯網

上載者：User

創建阿里雲帳戶，並獲得超過 40 款產品的免費試用版；而企業帳戶則可以享有總值 $1200 的免費試用版。立即註冊！

標籤：redis 事件迴圈

redis作為伺服器程式，網路IO處理是關鍵。redis不像memcached使用libevent，它實現了自己的IO事件架構，並且很簡單、小巧。可以選擇select、epoll、kqueue等實現。作為 IO事件架構，需要抽象多種IO模型的共性，將整個過程主要抽象為： 1）初始化 2）添加、刪除事件 3）等待事件發生下面也按照這個步驟分析代碼。（1）初始化回憶一下redis的初始化過程中，initServer函數會調用aeCreateEventLoop建立event loop對象，對事件迴圈進行初始化。下面看一下aeEventLoop結構，儲存事件迴圈相關的屬性。

typedef struct aeEventLoop {    int maxfd;   /* highest file descriptor currently registered */    int setsize; /* max number of file descriptors tracked */    long long timeEventNextId;    // <MM>    // 存放的是上次觸發定時器事件的時間    // </MM>    time_t lastTime;     /* Used to detect system clock skew */    aeFileEvent *events; /* Registered events */    aeFiredEvent *fired; /* Fired events */    // <MM>    // 所有定時器事件組織成鏈表    // </MM>    aeTimeEvent *timeEventHead;    // <MM>    // 是否停止eventLoop    // </MM>    int stop;    void *apidata; /* This is used for polling API specific data */    // <MM>    // 事件迴圈每一次迭代都會調用beforesleep    // </MM>    aeBeforeSleepProc *beforesleep;} aeEventLoop;

setsize：指定事件迴圈要監聽的檔案描述符集合的大小。這個值與設定檔中得maxclients有關。 events：存放所有註冊的讀寫事件，是大小為setsize的數組。核心會保證建立串連的fd是當前可用描述符的最小值，所以最多監聽setsize個描述符，那麼最大的fd就是setsize - 1。這種組織方式的好處是，可以以fd為下標，索引到對應的事件，在事件觸發後根據fd快速尋找到對應的事件。 fired：存放觸發的讀寫事件。同樣是setsize大小的數組。 timeEventHead：redis將定時器事件組織成鏈表，這個屬性指向表頭。 apidata：存放epoll、select等實現相關的資料。 beforesleep：事件迴圈在每次迭代前會調用beforesleep執行一些非同步處理。
io模型初始化的抽象函數為aeApiCreate。aeCreateEventLoop函數建立並初始化全域事件迴圈結構，並調用aeApiCreate初始化具體實現依賴的資料結構。

aeEventLoop *aeCreateEventLoop(int setsize) {    aeEventLoop *eventLoop;    int i;    // <MM>    // setsize指定事件迴圈監聽的fd的數目    // 由於核心保證新建立的fd是最小的正整數，所以直接建立setsize大小    // 的數組，存放對應的event    // </MM>    if ((eventLoop = zmalloc(sizeof(*eventLoop))) == NULL) goto err;    eventLoop->events = zmalloc(sizeof(aeFileEvent)*setsize);    eventLoop->fired = zmalloc(sizeof(aeFiredEvent)*setsize);    if (eventLoop->events == NULL || eventLoop->fired == NULL) goto err;    eventLoop->setsize = setsize;    eventLoop->lastTime = time(NULL);    eventLoop->timeEventHead = NULL;    eventLoop->timeEventNextId = 0;    eventLoop->stop = 0;    eventLoop->maxfd = -1;    eventLoop->beforesleep = NULL;    if (aeApiCreate(eventLoop) == -1) goto err;    /* Events with mask == AE_NONE are not set. So let‘s initialize the     * vector with it. */    for (i = 0; i < setsize; i++)        eventLoop->events[i].mask = AE_NONE;    return eventLoop;err:    if (eventLoop) {        zfree(eventLoop->events);        zfree(eventLoop->fired);        zfree(eventLoop);    }    return NULL;}

以epoll為例，aeApiCreate主要是建立epoll的fd，以及要監聽的epoll_event，這些資料定義在：

typedef struct aeApiState {    int epfd;    struct epoll_event *events;} aeApiState;

這裡，監聽到的事件組織方式與event_loop中監聽事件一樣，同樣是setsize大小的資料，以fd為下標。aeApiCreate會初始化這些屬性，並將aeApiState結構存放到eventLoop->apidata。

static int aeApiCreate(aeEventLoop *eventLoop) {    aeApiState *state = zmalloc(sizeof(aeApiState));    if (!state) return -1;    state->events = zmalloc(sizeof(struct epoll_event)*eventLoop->setsize);    if (!state->events) {        zfree(state);        return -1;    }    state->epfd = epoll_create(1024); /* 1024 is just a hint for the kernel */    if (state->epfd == -1) {        zfree(state->events);        zfree(state);        return -1;    }    eventLoop->apidata = state;    return 0;}

（2）添加、刪除事件redis支援兩類事件，網路io事件和定時器事件。定時器事件的添加、刪除相對簡單些，主要是維護定時器事件列表。首先看一下表示定時器事件的結構：

/* Time event structure */typedef struct aeTimeEvent {    long long id; /* time event identifier. */    long when_sec; /* seconds */    long when_ms; /* milliseconds */    aeTimeProc *timeProc;    aeEventFinalizerProc *finalizerProc;    void *clientData;    struct aeTimeEvent *next;} aeTimeEvent;

when_sec和when_ms：表示定時器觸發的事件戳，在事件迴圈迭代返回後，如果目前時間戳大於這個值就會回調事件處理函數。 timeProc：事件處理函數。 finalizerProc：清理函數，在刪除定時器時調用。 clientData：需要傳入事件處理函數的參數。 next：定時器事件組織成鏈表，next指向下一個事件。
aeCreateTimeEvent函數用於添加定時器事件，邏輯很簡單，根據目前時間計算下一次觸發的事件，對事件屬性賦值，並插入到定時器鏈表表頭之前。刪除通過aeDeleteTimeEvent函數，根據id找到事件並從鏈表刪除該節點，回調清理函數。具體定時器事件的處理見後文，下面看一下io事件。io事件的添加通過aeCreateFileEvent，邏輯很簡單，根據要註冊的fd，擷取其event，設定屬性，會調用aeApiAddEvent函數添加到底層的io模型。

int aeCreateFileEvent(aeEventLoop *eventLoop, int fd, int mask,        aeFileProc *proc, void *clientData){    if (fd >= eventLoop->setsize) {        errno = ERANGE;        return AE_ERR;    }    aeFileEvent *fe = &eventLoop->events[fd];    if (aeApiAddEvent(eventLoop, fd, mask) == -1)        return AE_ERR;    fe->mask |= mask;    if (mask & AE_READABLE) fe->rfileProc = proc;    if (mask & AE_WRITABLE) fe->wfileProc = proc;    fe->clientData = clientData;    if (fd > eventLoop->maxfd)        eventLoop->maxfd = fd;    return AE_OK;}

mask：指定註冊的事件類型，可以是讀或寫。 proc：事件處理函數。
下面是io事件的結構，包括註冊的事件類型mask，讀寫事件處理函數，以及對應的參數。

/* File event structure */typedef struct aeFileEvent {    int mask; /* one of AE_(READABLE|WRITABLE) */    aeFileProc *rfileProc;    aeFileProc *wfileProc;    void *clientData;} aeFileEvent;

下面看一下epoll添加事件的實現，主要是調用epoll_ctl。

static int aeApiAddEvent(aeEventLoop *eventLoop, int fd, int mask) {    aeApiState *state = eventLoop->apidata;    struct epoll_event ee;    /* If the fd was already monitored for some event, we need a MOD     * operation. Otherwise we need an ADD operation. */    int op = eventLoop->events[fd].mask == AE_NONE ?            EPOLL_CTL_ADD : EPOLL_CTL_MOD;    ee.events = 0;    mask |= eventLoop->events[fd].mask; /* Merge old events */    if (mask & AE_READABLE) ee.events |= EPOLLIN;    if (mask & AE_WRITABLE) ee.events |= EPOLLOUT;    ee.data.u64 = 0; /* avoid valgrind warning */    ee.data.fd = fd;    if (epoll_ctl(state->epfd,op,fd,&ee) == -1) return -1;    return 0;}

struct epll_event用於指定要監聽的事件，以及該檔案描述符綁定的data，在事件觸發時可以返回。這裡將data直接存為fd，通過這個資料，便可以找到對應的事件，然後調用其處理函數。 epoll的刪除與添加類似，不再贅述。（3）等待事件觸發通過調用aeMain函數進入事件迴圈：

void aeMain(aeEventLoop *eventLoop) {    eventLoop->stop = 0;    while (!eventLoop->stop) {        if (eventLoop->beforesleep != NULL)            eventLoop->beforesleep(eventLoop);        aeProcessEvents(eventLoop, AE_ALL_EVENTS);    }}

函數內部就是一個while迴圈，不斷的調用aeProcessEvents函數，等待事件發生。在每次迭代前會調用會調用beforesleep函數，處理非同步任務，後續會和serverCron一起介紹。 aeProcessEvents函數首先會處理定時器事件，然後是io事件，下面介紹這個函數的實現。首先，聲明變數記錄處理的事件個數，以及觸發的事件。flags表示此輪需要處理的事件類型，如果不需要處理定時器事件和io事件直接返回。

    int processed = 0, numevents;    /* Nothing to do? return ASAP */    if (!(flags & AE_TIME_EVENTS) && !(flags & AE_FILE_EVENTS)) return 0;

redis中的定時器事件是通過epoll實現的。大體思路是，在每次事件迭代調用epoll_wait時需要指定此輪sleep的時間。如果沒有io事件發生，則在sleep時間到了之後會返回。通過算出下一次最先發生的事件，到目前時間的間隔，用這個值設為sleep，這樣就可以保證在事件到達後回調其處理函數。但是，由於每次返回後，還有處理io事件，所以定時器的觸發事件是不精確的，一定是比預定的觸發時間晚的。下面看下具體實現。首先是，尋找下一次最先發生的定時器事件，以確定sleep的事件。如果沒有定時器事件，則根據傳入的flags，選擇是一直阻塞指導io事件發生，或者是不阻塞，檢查完立即返回。通過調用aeSearchNearestTimer函數尋找最先發生的事件，採用的是線性尋找的方式，複雜度是O(n)，可以將定時器事件組織成堆，加快尋找。不過，redis中只有一個serverCron定時器事件，所以暫時不需最佳化。

    /* Note that we want call select() even if there are no     * file events to process as long as we want to process time     * events, in order to sleep until the next time event is ready     * to fire. */    // <MM>    // 在兩種情況下進入poll，阻塞等待事件發生：    // 1）在有需要監聽的描述符時（maxfd != -1）    // 2）需要處理定時器事件，並且DONT_WAIT開關關閉的情況下    // </MM>    if (eventLoop->maxfd != -1 ||        ((flags & AE_TIME_EVENTS) && !(flags & AE_DONT_WAIT))) {        int j;        aeTimeEvent *shortest = NULL;        struct timeval tv, *tvp;        // <MM>        // 根據最快發生的定時器事件的發生時間，確定此次poll阻塞的時間        // </MM>        if (flags & AE_TIME_EVENTS && !(flags & AE_DONT_WAIT))            // <MM>            // 線性尋找最快發生的定時器事件            // </MM>            shortest = aeSearchNearestTimer(eventLoop);        if (shortest) {            // <MM>            // 如果有定時器事件，則根據它觸發的時間，計算sleep的時間（ms單位）            // </MM>            long now_sec, now_ms;            /* Calculate the time missing for the nearest             * timer to fire. */            aeGetTime(&now_sec, &now_ms);            tvp = &tv;            tvp->tv_sec = shortest->when_sec - now_sec;            if (shortest->when_ms < now_ms) {                tvp->tv_usec = ((shortest->when_ms+1000) - now_ms)*1000;                tvp->tv_sec --;            } else {                tvp->tv_usec = (shortest->when_ms - now_ms)*1000;            }            if (tvp->tv_sec < 0) tvp->tv_sec = 0;            if (tvp->tv_usec < 0) tvp->tv_usec = 0;        } else {            // <MM>            // 如果沒有定時器事件，則根據情況是立即返回，或者永遠阻塞            // </MM>            /* If we have to check for events but need to return             * ASAP because of AE_DONT_WAIT we need to set the timeout             * to zero */            if (flags & AE_DONT_WAIT) {                tv.tv_sec = tv.tv_usec = 0;                tvp = &tv;            } else {                /* Otherwise we can block */                tvp = NULL; /* wait forever */            }        }

接著，調用aeApiPoll函數，傳入前面計算的sleep時間，等待io事件放生。在函數返回後，觸發的事件已經填充到eventLoop的fired數組中。epoll的實現如下，就是調用epoll_wait，函數返回後，會將觸發的事件存放到state->events數組中的前numevents個元素。接下來，填充fired數組，設定每個觸發事件的fd，以及事件類型。

static int aeApiPoll(aeEventLoop *eventLoop, struct timeval *tvp) {    aeApiState *state = eventLoop->apidata;    int retval, numevents = 0;    // <MM>    // 調用epoll_wait，state->events存放返回的發生事件的fd    // </MM>    retval = epoll_wait(state->epfd,state->events,eventLoop->setsize,            tvp ? (tvp->tv_sec*1000 + tvp->tv_usec/1000) : -1);    if (retval > 0) {        int j;        numevents = retval;        // <MM>        // 有事件發生，將發生的事件存放於fired數組        // </MM>        for (j = 0; j < numevents; j++) {            int mask = 0;            struct epoll_event *e = state->events+j;            if (e->events & EPOLLIN) mask |= AE_READABLE;            if (e->events & EPOLLOUT) mask |= AE_WRITABLE;            if (e->events & EPOLLERR) mask |= AE_WRITABLE;            if (e->events & EPOLLHUP) mask |= AE_WRITABLE;            eventLoop->fired[j].fd = e->data.fd;            eventLoop->fired[j].mask = mask;        }    }    return numevents;}

在事件返回後，需要處理事件。遍曆fired數組，取得fd對應的事件，並根據觸發的事件類型，回調其處理函數。

        for (j = 0; j < numevents; j++) {            // <MM>            // poll返回後，會將所有觸發的時間存放於fired數組            // </MM>            aeFileEvent *fe = &eventLoop->events[eventLoop->fired[j].fd];            int mask = eventLoop->fired[j].mask;            int fd = eventLoop->fired[j].fd;            int rfired = 0;            /* note the fe->mask & mask & ... code: maybe an already processed             * event removed an element that fired and we still didn‘t             * processed, so we check if the event is still valid. */            // <MM>            // 回調發生事件的fd，註冊的事件處理函數            // </MM>            if (fe->mask & mask & AE_READABLE) {                rfired = 1;                fe->rfileProc(eventLoop,fd,fe->clientData,mask);            }            if (fe->mask & mask & AE_WRITABLE) {                if (!rfired || fe->wfileProc != fe->rfileProc)                    fe->wfileProc(eventLoop,fd,fe->clientData,mask);            }            processed++;        }

以上便是，io事件的處理，下面看一下定時器事件的處理。會調用processTimeEvents函數處理定時器事件。首先會校正是否發生系統時鐘偏差（system clock skew，修改系統事件會發生？把事件調到過去），如果發生就將所有事件的發生時間置為0，立即觸發。

    /* If the system clock is moved to the future, and then set back to the     * right value, time events may be delayed in a random way. Often this     * means that scheduled operations will not be performed soon enough.     *     * Here we try to detect system clock skews, and force all the time     * events to be processed ASAP when this happens: the idea is that     * processing events earlier is less dangerous than delaying them     * indefinitely, and practice suggests it is. */    if (now < eventLoop->lastTime) {        te = eventLoop->timeEventHead;        while(te) {            te->when_sec = 0;            te = te->next;        }    }    eventLoop->lastTime = now;

接下來遍曆所有定時器事件，尋找觸發的事件，然後回調處理函數。定時器事件處理函數的傳回值，決定這個事件是一次性的，還是周期性的。如果返回AE_NOMORE，則是一次性事件，在調用完後會刪除該事件。否則的話，傳回值指定的是下一次觸發的時間。

    te = eventLoop->timeEventHead;    maxId = eventLoop->timeEventNextId-1;    while(te) {        long now_sec, now_ms;        long long id;        if (te->id > maxId) {            te = te->next;            continue;        }        aeGetTime(&now_sec, &now_ms);        if (now_sec > te->when_sec ||            (now_sec == te->when_sec && now_ms >= te->when_ms))        {            // <MM>            // 定時器事件的觸發時間已過，則回調註冊的事件處理函數            // </MM>            int retval;            id = te->id;            retval = te->timeProc(eventLoop, id, te->clientData);            processed++;            /* After an event is processed our time event list may             * no longer be the same, so we restart from head.             * Still we make sure to don‘t process events registered             * by event handlers itself in order to don‘t loop forever.             * To do so we saved the max ID we want to handle.             *             * FUTURE OPTIMIZATIONS:             * Note that this is NOT great algorithmically. Redis uses             * a single time event so it‘s not a problem but the right             * way to do this is to add the new elements on head, and             * to flag deleted elements in a special way for later             * deletion (putting references to the nodes to delete into             * another linked list). */            // <MM>            // 根據定時器事件處理函數的傳回值，決定是否將該定時器刪除。            // 如果retval不等於-1（AE_NOMORE），則更改定時器的觸發時間為            // now + retval(ms)            // </MM>            if (retval != AE_NOMORE) {                aeAddMillisecondsToNow(retval,&te->when_sec,&te->when_ms);            } else {                // <MM>                // 如果返回AE_NOMORE，則刪除該定時器                // </MM>                aeDeleteTimeEvent(eventLoop, id);            }            te = eventLoop->timeEventHead;        } else {            te = te->next;        }    }

在回調處理函數時，有可能會添加新的定時器事件，如果不斷加入，存在死迴圈的風險，所以需要避免這種情況，每次迴圈不處理新添加的事件，這是通過下面的代碼實現的。

        if (te->id > maxId) {            te = te->next;            continue;        }

事件迴圈部分分析到此結束，感覺比較直觀、清晰，完全可以抽出來，作為一個獨立的庫使用。下面一節，會介紹請求的處理。

redis源碼分析（2）——事件迴圈

本文章原先以中文撰寫並發佈於 aliyun.com，亦設英文版本，僅作資訊用途。本網站不對文章的準確性，完整性或可靠性或其任何翻譯作出任何明示或暗示的陳述或保證。如對該文章有任何疑慮或投訴，請傳送電郵至 info-contact@alibabacloud.com 並提供相關疑慮或投訴的詳細說明。職員會於 5 個工作天內與您聯絡，一經驗證之後，即會刪除該侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

Get Started for Free

Sales Support

1 on 1 presale consultation

Chat Contact Sales
After-Sales Support

24/7 Technical Support 6 Free Tickets per Quarter Faster Response

Open a Ticket
Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.

Learn More