硬碟寫滿後redis的處理機制,硬碟redis機制

來源:互聯網
上載者:User

硬碟寫滿後redis的處理機制,硬碟redis機制

前些天一台redis機器硬碟寫滿了,主要是由於程式bug導致備份量激增,而恰好監控程式的通知機制也罷工了,於是第一次體驗到了redis的罷工(唯讀不寫)。
現在我們來看下在磁碟寫滿後redis的處理機制:


save流程:serverCron->rdbSaveBackground->rdbSave
save後流程:serverCron->backgroundSaveDoneHandler
上述流程產生的結果就是server.lastbgsave_status = REDIS_ERR,

受其影響,processCommand和luaRedisGenericCommand中判斷如果是寫操作,則直接返回REDIS_OK,而沒有實際寫入


1.rdbSave所有的寫出錯都會返回REDIS_ERR

int rdbSave(char *filename) {    dictIterator *di = NULL;    dictEntry *de;    char tmpfile[256];    char magic[10];    int j;    long long now = mstime();    FILE *fp;    rio rdb;    uint64_t cksum;    snprintf(tmpfile,256,"temp-%d.rdb", (int) getpid());    fp = fopen(tmpfile,"w");    if (!fp) {        redisLog(REDIS_WARNING, "Failed opening .rdb for saving: %s",            strerror(errno));        return REDIS_ERR;    }    rioInitWithFile(&rdb,fp);    if (server.rdb_checksum)        rdb.update_cksum = rioGenericUpdateChecksum;    snprintf(magic,sizeof(magic),"REDIS%04d",REDIS_RDB_VERSION);    if (rdbWriteRaw(&rdb,magic,9) == -1) goto werr;    for (j = 0; j < server.dbnum; j++) {        redisDb *db = server.db+j;        dict *d = db->dict;        if (dictSize(d) == 0) continue;        di = dictGetSafeIterator(d);        if (!di) {            fclose(fp);            return REDIS_ERR;        }        /* Write the SELECT DB opcode */        if (rdbSaveType(&rdb,REDIS_RDB_OPCODE_SELECTDB) == -1) goto werr;        if (rdbSaveLen(&rdb,j) == -1) goto werr;        /* Iterate this DB writing every entry */        while((de = dictNext(di)) != NULL) {            sds keystr = dictGetKey(de);            robj key, *o = dictGetVal(de);            long long expire;            initStaticStringObject(key,keystr);            expire = getExpire(db,&key);            if (rdbSaveKeyValuePair(&rdb,&key,o,expire,now) == -1) goto werr;        }        dictReleaseIterator(di);    }    di = NULL; /* So that we don't release it again on error. */    /* EOF opcode */    if (rdbSaveType(&rdb,REDIS_RDB_OPCODE_EOF) == -1) goto werr;    /* CRC64 checksum. It will be zero if checksum computation is disabled, the     * loading code skips the check in this case. */    cksum = rdb.cksum;    memrev64ifbe(&cksum);    if (rioWrite(&rdb,&cksum,8) == 0) goto werr;    /* Make sure data will not remain on the OS's output buffers */    if (fflush(fp) == EOF) goto werr;    if (fsync(fileno(fp)) == -1) goto werr;    if (fclose(fp) == EOF) goto werr;    /* Use RENAME to make sure the DB file is changed atomically only     * if the generate DB file is ok. */    if (rename(tmpfile,filename) == -1) {        redisLog(REDIS_WARNING,"Error moving temp DB file on the final destination: %s", strerror(errno));        unlink(tmpfile);        return REDIS_ERR;    }    redisLog(REDIS_NOTICE,"DB saved on disk");    server.dirty = 0;    server.lastsave = time(NULL);    server.lastbgsave_status = REDIS_OK;    return REDIS_OK;werr:    fclose(fp);    unlink(tmpfile);    redisLog(REDIS_WARNING,"Write error saving DB on disk: %s", strerror(errno));    if (di) dictReleaseIterator(di);    return REDIS_ERR;}


2.rdbSaveBackground中,如果子進程調用rdbsave返回REDIS_ERR,那麼子進程exit(1)

int rdbSaveBackground(char *filename) {    pid_t childpid;    long long start;    if (server.rdb_child_pid != -1) return REDIS_ERR;    server.dirty_before_bgsave = server.dirty;    server.lastbgsave_try = time(NULL);    start = ustime();    if ((childpid = fork()) == 0) {        int retval;        /* Child */        closeListeningSockets(0);        redisSetProcTitle("redis-rdb-bgsave");        retval = rdbSave(filename);        if (retval == REDIS_OK) {            size_t private_dirty = zmalloc_get_private_dirty();            if (private_dirty) {                redisLog(REDIS_NOTICE,                    "RDB: %zu MB of memory used by copy-on-write",                    private_dirty/(1024*1024));            }        }        exitFromChild((retval == REDIS_OK) ? 0 : 1);       //進程退出時返回0/1    } else {        /* Parent */        server.stat_fork_time = ustime()-start;        if (childpid == -1) {            server.lastbgsave_status = REDIS_ERR;            redisLog(REDIS_WARNING,"Can't save in background: fork: %s",                strerror(errno));            return REDIS_ERR;        }        redisLog(REDIS_NOTICE,"Background saving started by pid %d",childpid);        server.rdb_save_time_start = time(NULL);        server.rdb_child_pid = childpid;        updateDictResizePolicy();        return REDIS_OK;    }    return REDIS_OK; /* unreached */}

3.bgsave完成後,serverCron中得到bgsave子進程的返回碼進行後續處理

    /* Check if a background saving or AOF rewrite in progress terminated. */    if (server.rdb_child_pid != -1 || server.aof_child_pid != -1) {        int statloc;        pid_t pid;        if ((pid = wait3(&statloc,WNOHANG,NULL)) != 0) {            int exitcode = WEXITSTATUS(statloc);            int bysignal = 0;            if (WIFSIGNALED(statloc)) bysignal = WTERMSIG(statloc);            if (pid == server.rdb_child_pid) {                backgroundSaveDoneHandler(exitcode,bysignal);    //根據bgsave子進程的exitcode以及是否由訊號結束的標籤進行後續處理            } else if (pid == server.aof_child_pid) {                backgroundRewriteDoneHandler(exitcode,bysignal);            } else {                redisLog(REDIS_WARNING,                    "Warning, detected child with unmatched pid: %ld",                    (long)pid);            }            updateDictResizePolicy();        }    }

4.如果子進程非訊號結束,並且exitcode非0,那麼設定bgsave狀態為REDIS_ERR

void backgroundSaveDoneHandler(int exitcode, int bysignal) {    if (!bysignal && exitcode == 0) {        redisLog(REDIS_NOTICE,            "Background saving terminated with success");        server.dirty = server.dirty - server.dirty_before_bgsave;        server.lastsave = time(NULL);        server.lastbgsave_status = REDIS_OK;    } else if (!bysignal && exitcode != 0) {        redisLog(REDIS_WARNING, "Background saving error");        server.lastbgsave_status = REDIS_ERR;      //狀態轉換    } else {        mstime_t latency;        redisLog(REDIS_WARNING,            "Background saving terminated by signal %d", bysignal);        latencyStartMonitor(latency);        rdbRemoveTempFile(server.rdb_child_pid);        latencyEndMonitor(latency);        latencyAddSampleIfNeeded("rdb-unlink-temp-file",latency);        /* SIGUSR1 is whitelisted, so we have a way to kill a child without         * tirggering an error conditon. */        if (bysignal != SIGUSR1)            server.lastbgsave_status = REDIS_ERR;    }    server.rdb_child_pid = -1;    server.rdb_save_time_last = time(NULL)-server.rdb_save_time_start;    server.rdb_save_time_start = -1;    /* Possibly there are slaves waiting for a BGSAVE in order to be served     * (the first stage of SYNC is a bulk transfer of dump.rdb) */    updateSlavesWaitingBgsave((!bysignal && exitcode == 0) ? REDIS_OK : REDIS_ERR);}

5.processCommand中判定cmd是寫操作的話,直接返回REDIS_OK
    /* Don't accept write commands if there are problems persisting on disk     * and if this is a master instance. */    if (((server.stop_writes_on_bgsave_err &&          server.saveparamslen > 0 &&          server.lastbgsave_status == REDIS_ERR) ||          server.aof_last_write_status == REDIS_ERR) &&        server.masterhost == NULL &&        (c->cmd->flags & REDIS_CMD_WRITE ||         c->cmd->proc == pingCommand))    {        flagTransaction(c);        if (server.aof_last_write_status == REDIS_OK)            addReply(c, shared.bgsaveerr);        else            addReplySds(c,                sdscatprintf(sdsempty(),                "-MISCONF Errors writing to the AOF file: %s\r\n",                strerror(server.aof_last_write_errno)));        return REDIS_OK;    }

6.luaRedisGenericCommand中判定cmd是寫操作的話,屏蔽

    /* Write commands are forbidden against read-only slaves, or if a     * command marked as non-deterministic was already called in the context     * of this script. */    if (cmd->flags & REDIS_CMD_WRITE) {        if (server.lua_random_dirty) {            luaPushError(lua,                "Write commands not allowed after non deterministic commands");            goto cleanup;        } else if (server.masterhost && server.repl_slave_ro &&                   !server.loading &&                   !(server.lua_caller->flags & REDIS_MASTER))        {            luaPushError(lua, shared.roslaveerr->ptr);            goto cleanup;        } else if (server.stop_writes_on_bgsave_err &&                   server.saveparamslen > 0 &&                   server.lastbgsave_status == REDIS_ERR)        {            luaPushError(lua, shared.bgsaveerr->ptr);            goto cleanup;        }    }cleanup:    /* Clean up. Command code may have changed argv/argc so we use the     * argv/argc of the client instead of the local variables. */    for (j = 0; j < c->argc; j++) {        robj *o = c->argv[j];        /* Try to cache the object in the cached_objects array.         * The object must be small, SDS-encoded, and with refcount = 1         * (we must be the only owner) for us to cache it. */        if (j < LUA_CMD_OBJCACHE_SIZE &&            o->refcount == 1 &&            o->encoding == REDIS_ENCODING_RAW &&            sdslen(o->ptr) <= LUA_CMD_OBJCACHE_MAX_LEN)        {            struct sdshdr *sh = (void*)(((char*)(o->ptr))-(sizeof(struct sdshdr)));            if (cached_objects[j]) decrRefCount(cached_objects[j]);            cached_objects[j] = o;            cached_objects_len[j] = sh->free + sh->len;        } else {            decrRefCount(o);        }    }    if (c->argv != argv) {        zfree(c->argv);        argv = NULL;    }    if (raise_error) {        /* If we are here we should have an error in the stack, in the         * form of a table with an "err" field. Extract the string to         * return the plain error. */        lua_pushstring(lua,"err");        lua_gettable(lua,-2);        return lua_error(lua);    }    return 1;  



怎配置redis在記憶體寫滿的情況下,新進來的資料寫到磁碟中

IO:輸入輸出。
從記憶體讀取資料叫輸出,將資料寫入記憶體叫輸入。
大資料IO就是指在磁碟與記憶體之間傳輸大量資料的意思咯。只不過因為資料太大記憶體容納不下需要進行多次部分寫入。
資料在磁碟上是無法完成尋找的,要麼被調入記憶體,要麼有磁碟資料的索引(索引調入記憶體)。
電腦所有操作都是在記憶體中進行的,磁碟是外設。
 
怎設定硬碟為唯讀狀態,怎禁用硬碟自身的壞扇區處理機制?

.修複邏輯壞道
首先從最簡單的方法入手。藉助Windows下的磁碟掃描工具,在資源管理員中選中盤符後單擊滑鼠右鍵,在彈出的磁碟機屬性視窗中依次選擇“工具→開始檢查”,將掃描類型設定為完全掃描,並選擇自動修複錯誤,然後點擊開始,掃描時間會因磁碟容量及掃描選項的不同而有所差異。
如果邏輯壞道存在於系統區導致無法正常啟動,我們可以使用Windows 98/Me的啟動盤,在DOS提示符下鍵入:Scandisk 盤符,按斷行符號,一旦發現壞道,程式會提示你是否要Fix it(修複),選擇Yes開始修複,許多因系統區出現邏輯壞道無法正常啟動Windows的問題一般都可以用此方法解決.

2.用Scandisk檢查物理壞道
對於物理壞道Scandisk就無能為力了,它只能將其標記為壞道以後不再對這塊地區進行讀寫操作,物理壞道具有“傳染性”向周邊擴散,導致儲存於坏道附近的資料也處於危險境地。
用Scandisk時在查到壞道時停止,注意觀察Scandisk停止時會數值,如22%,假設硬碟總容量為2GB,2GB×22%=0.44GB,硬碟出現壞道的起始位置大致為440MB處,由於硬碟壞道易向周邊擴散,所以必須留足夠的緩衝區,將硬碟第一個分區容量設定為400MB,其餘1.6GB按200MB為單位分為8個區,使用Scandisk檢查所有分區,將無法通過Scandisk檢測的分區刪除或隱藏,以確保系統不再讀寫這些地區。其餘相鄰的分區可合并後使用。分區、隱藏、刪除、合并等操作可使用圖形化介面的PartitionMagic或DiskMan等工具軟體進行。
 

相關文章

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.