memcached set命令的大致處理邏輯筆記

來源:互聯網
上載者:User

標籤:blog   使用   資料   os   art   cti   

這次選項組機的主要邏輯,跟蹤set命令的執行流程,暫不涉及到記憶體申請這一塊,下面內容基本都是代碼注釋

首先還是補充瞭解下客戶串連在發送資料到資料被處理並返回過程中conn的各種狀態的表示

enum conn_states {conn_listening, /** 只有監聽串連的socket才會是這種狀態*/conn_new_cmd, /** 等待下一條命令,建立的用戶端串連初始化也是這種狀態 */conn_waiting, /** 等待讀資料 */conn_read, /** 正在讀取命令資料,最開始的讀取 */conn_parse_cmd, /** 嘗試從讀取到的buffer中解析出一個命令 */conn_write, /** 等待輸出返回的結果 */conn_nread, /** 正在/準備讀取n byte 的資料,也就是說已經計算好要讀取多少,set命令時讀取value會用到 */conn_swallow, /** 暫時沒遇到這種情況 swallowing unnecessary bytes w/o storing */conn_closing, /** 正在關閉串連 */conn_mwrite, /** 按順序返回多個item */conn_closed, /** 標識串連已經關閉 */conn_max_state /**< Max state value (used for assertion) */};

下面從狀態機器開始,具體是從libevent通知開始接收資料,這時候會跳轉到conn_read狀態;

static void drive_machine(conn *c) {bool stop = false;int sfd;socklen_t addrlen;struct sockaddr_storage addr;int nreqs = settings.reqs_per_event;int res;const char *str;while (!stop) {switch(c->state) {case conn_listening:... /** 處理串連部分邏輯*/break;case conn_waiting:/** 這種狀態下只能是向libevent註冊可讀事件了*/if (!update_event(c, EV_READ | EV_PERSIST)) {if (settings.verbose > 0)fprintf(stderr, "Couldn‘t update event\n");conn_set_state(c, conn_closing);break;}/**註冊可讀之後就是將下一個狀態設定為讀取資料*/conn_set_state(c, conn_read);stop = true; /** 得跳出狀態機器了,等事件發生再回來,回來入口就是conn_read,現在去吧*/break;/** 通知收到資料了,這裡開始讀取資料*/case conn_read:/**這裡關注tcp傳輸方式,跳到try_read_network那裡去看看,滾動滑鼠到下面找到這個函數的注釋*/res = IS_UDP(c->transport) ? try_read_udp(c) : try_read_network(c);switch (res) {
case READ_NO_DATA_RECEIVED: /** 沒有擷取到資料等下一次可讀事件觸發*/conn_set_state(c, conn_waiting);break;case READ_DATA_RECEIVED: /** 擷取到資料*/conn_set_state(c, conn_parse_cmd); /** 跳轉到命令解析狀態, 跳轉吧*/break;case READ_ERROR: /** 讀取資料異常,關閉串連*/conn_set_state(c, conn_closing);break;case READ_MEMORY_ERROR: /* rbuf擴容失敗 Failed to allocate more memory *//* State already set by try_read_network */break;}/** 注意這裡不會跳出狀態機器,繼續根據上面設定的狀態(其實大可能在上面就跳了)跳轉*/break;/** 上面讀取資料正常就會跳到這裡了*/case conn_parse_cmd :/** 跳到命令解析函數*/if (try_read_command(c) == 0) {/* 嘗試讀取命令失敗了,系統認為需要更多的資料 wee need more data! */conn_set_state(c, conn_waiting); /** 註冊讀事件去*/}/** 同樣,這裡不需要跳出狀態機器*//** 解析命令成功則跳轉到conn_nread準備擷取nbytes的資料*/break;case conn_new_cmd:/* Only process nreqs at a time to avoid starving otherconnections *//** 每次I/O複用返回之後處理的事件不超過nreqs個,這樣可以防止其他客戶串連一直不能被處理到*/--nreqs; /** 每處理一個新的*/if (nreqs >= 0) {/** 計數nreqs*/reset_cmd_handler(c); /* 重新進入while迴圈進入狀態機器,剛開始串連時其實沒有資料所以下一個狀態是conn_waiting, 跳到該狀態邏輯*/} else {pthread_mutex_lock(&c->thread->stats.mutex);c->thread->stats.conn_yields++;pthread_mutex_unlock(&c->thread->stats.mutex);if (c->rbytes > 0) {/* We have already read in data into the input buffer,so libevent will most likely not signal read eventson the socket (unless more data is available. As ahack we should just put in a request to write data,because that should be possible ;-)*//***由於已經讀取到了資料,但是又因為每次不會處理那麼多事件,而libevent在下一次事件*擷取中並不會再次擷取到之前已經擷取到但是又未被處理的事件(linux epoll ET),所以這裡使用*一個技巧,即向libevent註冊了可寫事件,這樣當下一次事件通知時memcached優先處理可讀,*將已讀取到的資料進行處理,之後再考慮寫事件*/if (!update_event(c, EV_WRITE | EV_PERSIST)) {if (settings.verbose > 0)fprintf(stderr, "Couldn‘t update event\n");conn_set_state(c, conn_closing);break;}}stop = true; /** 之後nreqs 會被重設為 ettings.reqs_per_event 預設為20*/}break;/** 解析命令成功之後就會跳轉到這裡*/case conn_nread:if (c->rlbytes == 0) {complete_nread(c); /** 不需要再讀取資料了, 直接跳到complete_nread, 之後out_string() 會改變c的狀態可能去到write*/break; /** out_string() 在下面有注釋*/}/* 異常 Check if rbytes < 0, to prevent crash */if (c->rlbytes < 0) {if (settings.verbose) {fprintf(stderr, "Invalid rlbytes to read: len %d\n", c->rlbytes);}conn_set_state(c, conn_closing);break;}/* first check if we have leftovers in the conn_read buffer *//** 經過上面判斷還需要讀取資料,則看看是否還有尚未解析的資料*/if (c->rbytes > 0) {int tocopy = c->rbytes > c->rlbytes ? c->rlbytes : c->rbytes; /**確認需要的長度*/if (c->ritem != c->rcurr) {/**將尚未解析的tocopy長度資料複製到c->ritem,* 由於c->item指向了向記憶體申請的item的data部分,因此可以省去一次記憶體複製!?*/memmove(c->ritem, c->rcurr, tocopy);}c->ritem += tocopy;c->rlbytes -= tocopy;c->rcurr += tocopy;c->rbytes -= tocopy;if (c->rlbytes == 0) {break; /** 不需要讀取資料了則跳出switch重新來到conn_nread,在上面complete_nread()之後跳出while*/}}/** 上面讀取的資料不夠,繼續從socket中讀取*//* now try reading from the socket *//** 直接讀取到c->ritem!*/res = read(c->sfd, c->ritem, c->rlbytes);if (res > 0) {pthread_mutex_lock(&c->thread->stats.mutex);c->thread->stats.bytes_read += res;pthread_mutex_unlock(&c->thread->stats.mutex);if (c->rcurr == c->ritem) {c->rcurr += res;}c->ritem += res;c->rlbytes -= res; /** 迴圈讀取直到rlbytes == 0 */break;}if (res == 0) { /* end of stream */conn_set_state(c, conn_closing);break;}if (res == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) {if (!update_event(c, EV_READ | EV_PERSIST)) {if (settings.verbose > 0)fprintf(stderr, "Couldn‘t update event\n");conn_set_state(c, conn_closing);break;}stop = true;break;}/* otherwise we have a real error, on which we close the connection */if (settings.verbose > 0) {fprintf(stderr, "Failed to read, and not due to blocking:\n""errno: %d %s \n""rcurr=%lx ritem=%lx rbuf=%lx rlbytes=%d rsize=%d\n",errno, strerror(errno),(long)c->rcurr, (long)c->ritem, (long)c->rbuf,(int)c->rlbytes, (int)c->rsize);}conn_set_state(c, conn_closing);break;case conn_swallow:/* we are reading sbytes and throwing them away */if (c->sbytes == 0) {conn_set_state(c, conn_new_cmd);break;}/* first check if we have leftovers in the conn_read buffer */if (c->rbytes > 0) {int tocopy = c->rbytes > c->sbytes ? c->sbytes : c->rbytes;c->sbytes -= tocopy;c->rcurr += tocopy;c->rbytes -= tocopy;break;}/* now try reading from the socket */res = read(c->sfd, c->rbuf, c->rsize > c->sbytes ? c->sbytes : c->rsize);if (res > 0) {pthread_mutex_lock(&c->thread->stats.mutex);c->thread->stats.bytes_read += res;pthread_mutex_unlock(&c->thread->stats.mutex);c->sbytes -= res;break;}if (res == 0) { /* end of stream */conn_set_state(c, conn_closing);break;}if (res == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) {if (!update_event(c, EV_READ | EV_PERSIST)) {if (settings.verbose > 0)fprintf(stderr, "Couldn‘t update event\n");conn_set_state(c, conn_closing);break;}stop = true;break;}/* otherwise we have a real error, on which we close the connection */if (settings.verbose > 0)fprintf(stderr, "Failed to read, and not due to blocking\n");conn_set_state(c, conn_closing);break;/**通常調用out_string()處理返回結果都會來到這裡*/case conn_write:/** We want to write out a simple response. If we haven‘t already,* assemble it into a msgbuf list (this will be a single-entry* list for TCP or a two-entry list for UDP).*/if (c->iovused == 0 || (IS_UDP(c->transport) && c->iovused == 1)) {if (add_iov(c, c->wcurr, c->wbytes) != 0) {if (settings.verbose > 0)fprintf(stderr, "Couldn‘t build response\n");conn_set_state(c, conn_closing);break;}}/* fall through... *//**沒有break 直接走下面conn_mwrite邏輯*/case conn_mwrite:if (IS_UDP(c->transport) && c->msgcurr == 0 && build_udp_headers(c) != 0) {if (settings.verbose > 0)fprintf(stderr, "Failed to build UDP headers\n");conn_set_state(c, conn_closing);break;}/**下面調用transmit()真正的傳輸資料*/switch (transmit(c)) {case TRANSMIT_COMPLETE:if (c->state == conn_mwrite) {conn_release_items(c);/* XXX: I don‘t know why this wasn‘t the general case */if(c->protocol == binary_prot) {conn_set_state(c, c->write_and_go);} else {conn_set_state(c, conn_new_cmd);}} else if (c->state == conn_write) {if (c->write_and_free) {free(c->write_and_free);c->write_and_free = 0;}conn_set_state(c, c->write_and_go); /** 設定為寫完之後的狀態啦*/} else {if (settings.verbose > 0)fprintf(stderr, "Unexpected state %d\n", c->state);conn_set_state(c, conn_closing);}break;case TRANSMIT_INCOMPLETE:case TRANSMIT_HARD_ERROR:break; /* Continue in state machine. */case TRANSMIT_SOFT_ERROR:stop = true;break;}break;case conn_closing:if (IS_UDP(c->transport))conn_cleanup(c);elseconn_close(c);stop = true;break;case conn_closed:/* This only happens if dormando is an idiot. */abort();break;case conn_max_state:assert(false);break;}}return;}/** read from network as much as we can, handle buffer overflow and connection* close.* before reading, move the remaining incomplete fragment of a command* (if any) to the beginning of the buffer.** To protect us from someone flooding a connection with bogus data causing* the connection to eat up all available memory, break out and start looking* at the data I‘ve got after a number of reallocs...** @return enum try_read_result*//*** 從socket中盡量的讀取資料,處理buffer溢出以及用戶端串連* 在開始讀取資料之前,將剩餘的為解析的命令資料片段移到buffer的前面(尾巴)** 為了防止有些人發送大量的偽造資料消耗可用記憶體,這裡限制了每次重新分配記憶體的次數**(翻譯得不是很好-_-)*/static enum try_read_result try_read_network(conn *c) {enum try_read_result gotdata = READ_NO_DATA_RECEIVED; /** 初始狀態為未讀取到資料*/int res;int num_allocs = 0; /** 記錄重新分配rbuf的次數*/assert(c != NULL);/** 如果有可解析的資料*/if (c->rcurr != c->rbuf) {if (c->rbytes != 0) /* 如果有剩餘未解析的資料則將這些資料接到rbuf的尾巴 otherwise there‘s nothing to copy */memmove(c->rbuf, c->rcurr, c->rbytes);c->rcurr = c->rbuf; /** rcurr指向rbuf,這樣rcurr就有了當前所有的資料*/}/** 下面開始讀資料*/while (1) {if (c->rbytes >= c->rsize) { /** 若尚未解析的資料長度大於rbuf的總的長度,這時候要重新分配rbuf的空間*/if (num_allocs == 4) { /** 最多5次重新分配的機會,5次完了不管有沒有讀完直接返回已經讀取到資料*/return gotdata; /** 按總5次分配,基數為2048即2k,預設最多能達到64k */}++num_allocs;char *new_rbuf = realloc(c->rbuf, c->rsize * 2); /** 將原來空間擴大到2倍*/if (!new_rbuf) { /** 分配失敗了*/STATS_LOCK();stats.malloc_fails++;STATS_UNLOCK();if (settings.verbose > 0) {fprintf(stderr, "Couldn‘t realloc input buffer\n");}c->rbytes = 0; /* ignore what we read */out_of_memory(c, "SERVER_ERROR out of memory reading request");c->write_and_go = conn_closing;return READ_MEMORY_ERROR;}/** 新空間分配成功*/c->rcurr = c->rbuf = new_rbuf;c->rsize *= 2; /** 設定大小*/}int avail = c->rsize - c->rbytes; /**計算可用空間,總的長度-尚未解析的長度*/res = read(c->sfd, c->rbuf + c->rbytes, avail);if (res > 0) {pthread_mutex_lock(&c->thread->stats.mutex);c->thread->stats.bytes_read += res; /** 統計系統資料的讀取量*/pthread_mutex_unlock(&c->thread->stats.mutex);gotdata = READ_DATA_RECEIVED;c->rbytes += res; /** 更新尚未解析的長度*/if (res == avail) {/**繼續讀取資料, 這時候需要擴大rbuf的空間*/continue;} else {break; /**讀完了, 返回READ_DATA_RECEIVED*/}}if (res == 0) { /** 讀取出錯*/return READ_ERROR;}if (res == -1) {/** 對端取消連結?*/if (errno == EAGAIN || errno == EWOULDBLOCK) {break;}return READ_ERROR;}}return gotdata;}/** if we have a complete line in the buffer, process it.*//*** 上面的官方注釋說rbuf有完整的行可開始處理已有的這些資料*/static int try_read_command(conn *c) {assert(c != NULL);assert(c->rcurr <= (c->rbuf + c->rsize));assert(c->rbytes > 0);/** 暫時跳過*/if (c->protocol == negotiating_prot || c->transport == udp_transport) {...}/** 暫時跳過*/if (c->protocol == binary_prot) {...} else {/** 直接調轉到 c->protocol == ascii_prot 的情況*/char *el, *cont;if (c->rbytes == 0) /** 沒有可解析資料*/return 0;/** "set bico 0 0 5\r\nhello\r\n" 第一個‘\n‘分離了value*/el = memchr(c->rcurr, ‘\n‘, c->rbytes); /** 嘗試找到\n符號表示命令段結束*/if (!el) { /** 找不到*/if (c->rbytes > 1024) {/** We didn‘t have a ‘\n‘ in the first k. This _has_ to be a* large multiget, if not we should just nuke the connection.*/char *ptr = c->rcurr;while (*ptr == ‘ ‘) { /* ignore leading whitespaces */++ptr;}if (ptr - c->rcurr > 100 ||(strncmp(ptr, "get ", 4) && strncmp(ptr, "gets ", 5))) {conn_set_state(c, conn_closing);return 1;}}return 0;}/** 找到命令結束符*/cont = el + 1; /** cont 指向el之後的內容*/if ((el - c->rcurr) > 1 && *(el - 1) == ‘\r‘) {el--; /**el 指向了 ‘\r‘ 的位置 */}/***將‘\r‘位置重設為‘\0‘ 這樣rcurr暫時能解析到的資料就只有第一個‘\n‘ 之前的資料* 比如set命令 "set bico 0 0 5\r\nhello\r\n" rcurr指向的資料只有"set bico 0 0 5"*/*el = ‘\0‘;assert(cont <= (c->rcurr + c->rbytes));c->last_cmd_time = current_time;/** 跳到命令處理 c->rcurr只包含前段資料了 eg:"set bico 0 0 5" */process_command(c, c->rcurr); /** 跳到process_command() */c->rbytes -= (cont - c->rcurr); /** 更新未解析資料長度*/c->rcurr = cont; /** 上面用cont來分段,這裡將後段剩餘資料放回到rcurr*/assert(c->rcurr <= (c->rbuf + c->rsize));}return 1;}/*** process_command 主要邏輯為判斷命令的合法性並根據命令跳轉到對應的處理函數*/static void process_command(conn *c, char *command) {token_t tokens[MAX_TOKENS]; /***/size_t ntokens;int comm;/** for commands set/add/replace, we build an item and read the data* directly into it, then continue in nread_complete().*/c->msgcurr = 0;c->msgused = 0;c->iovused = 0;if (add_msghdr(c) != 0) {out_of_memory(c, "SERVER_ERROR out of memory preparing response");return;}/***tokenize_command 比較簡單,根據空格符將command進行瓜分,逐個放入到tokens數組,最多8個token* eg "set bico 0 0 5" 按順序set是命令 bico是key 0是flag 0是expire 5是value的長度*這樣 toekens = {"set", "bico", "0", "0", "5", NULL};*/ntokens = tokenize_command(command, tokens, MAX_TOKENS);if (ntokens >= 3 &&((strcmp(tokens[COMMAND_TOKEN].value, "get") == 0) ||(strcmp(tokens[COMMAND_TOKEN].value, "bget") == 0))) {/** get命令的處理*/process_get_command(c, tokens, ntokens, false);} else if ((ntokens == 6 || ntokens == 7) &&(strcmp(tokens[COMMAND_TOKEN].value, "set") == 0 && (comm = NREAD_SET)) ||...(strcmp(tokens[COMMAND_TOKEN].value, "append") == 0 && (comm = NREAD_APPEND)) )) {/** set 命令會跳轉到這, 進入process_update_command*/process_update_command(c, tokens, ntokens, comm, false);} else ...}return;}/*** 真正處理命令的地方,涉及到記憶體申請操作*/static void process_update_command(conn *c, token_t *tokens, const size_t ntokens, int comm, bool handle_cas) {char *key;size_t nkey;unsigned int flags;int32_t exptime_int = 0;time_t exptime;int vlen;/** 判斷key的長度是否超過系統規定最大長度 250 bytes*/if (tokens[KEY_TOKEN].length > KEY_MAX_LENGTH) {out_string(c, "CLIENT_ERROR bad command line format");return;}/** 擷取key的值*/key = tokens[KEY_TOKEN].value;nkey = tokens[KEY_TOKEN].length;/** 擷取其他值 flags exptime value_length*/if (! (safe_strtoul(tokens[2].value, (uint32_t *)&flags)&& safe_strtol(tokens[3].value, &exptime_int)&& safe_strtol(tokens[4].value, (int32_t *)&vlen))) {out_string(c, "CLIENT_ERROR bad command line format");return;}/* Ubuntu 8.04 breaks when I pass exptime to safe_strtol */exptime = exptime_int;/* Negative exptimes can underflow and end up immortal. realtime() willimmediately expire values that are greater than REALTIME_MAXDELTA, but lessthan process_started, so lets aim for that. */if (exptime < 0)exptime = REALTIME_MAXDELTA + 1;// does cas value exist?if (handle_cas) {if (!safe_strtoull(tokens[5].value, &req_cas_id)) {out_string(c, "CLIENT_ERROR bad command line format");return;}}vlen += 2; /** vlen+2 因為最後有‘\r\n‘ */if (vlen < 0 || vlen - 2 < 0) {out_string(c, "CLIENT_ERROR bad command line format");return;}.../** 分配一個item ,大致邏輯就是去申請一片記憶體空間,這裡跟記憶體管理有關係,後面筆記會記錄解析,這裡暫不深入*/it = item_alloc(key, nkey, flags, realtime(exptime), vlen);if (it == 0) { /** 分配失敗*/if (! item_size_ok(nkey, flags, vlen))out_string(c, "SERVER_ERROR object too large for cache");elseout_of_memory(c, "SERVER_ERROR out of memory storing object");/* swallow the data line */c->write_and_go = conn_swallow;c->sbytes = vlen;/* Avoid stale data persisting in cache because we failed alloc.* Unacceptable for SET. Anywhere else too? */if (comm == NREAD_SET) {
it = item_get(key, nkey); if (it) { item_unlink(it); item_remove(it); }}return;}/** 分配成功*/ITEM_set_cas(it, req_cas_id);c->item = it;c->ritem = ITEM_data(it); /** 這裡將ritem 指向item的data地址,非常有用,可以減少記憶體複製*/c->rlbytes = it->nbytes; /** nbytes表示value的長度,具體可以看看item資料結構,後面筆記會做注釋*/c->cmd = comm; /** 當前正在處理的命令*/conn_set_state(c, conn_nread); /** 設定狀態為讀取it ->nbytes 資料, 調回到process_command*/}/** 返回命令處理的結果到用戶端*/static void out_string(conn *c, const char *str) {size_t len;.../* Nuke a partial output... */c->msgcurr = 0;c->msgused = 0;c->iovused = 0;add_msghdr(c);len = strlen(str);if ((len + 2) > c->wsize) {/* ought to be always enough. just fail for simplicity */str = "SERVER_ERROR output line too long";len = strlen(str);}memcpy(c->wbuf, str, len); /** 複製返回資料到wbuf*/memcpy(c->wbuf + len, "\r\n", 2);c->wbytes = len + 2;c->wcurr = c->wbuf;conn_set_state(c, conn_write); /** 設定寫狀態,狀態機器去到寫邏輯*/c->write_and_go = conn_new_cmd; /**設定寫完之後下一個狀態*/return;}

以上的注釋主要是依據set這條比較簡單的命令,這樣比較容易理清整個邏輯過程,

下一次隨筆主要是關注記憶體管理這一塊。

才疏學淺,理解上可能會有偏差出錯,歡迎指出,謝謝!

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.