mongos是mongodb提供的自動分區組件,在提供分區功能的mongodb系統中,幾乎所有的請求都將通過mongos轉寄到mongod中,然後mongos再匯總,最後返回給用戶端.本來就來分析分析mongos的初始化,為後面通過mongos的查詢,刪除,修改,增加記錄 mapreduce aggregate以及mongos的自動分區與負載平衡做準備.下面來看代碼,其入口為mongo\s\server.cpp中的main,去掉其中處理命令列參數部分,直接從其正式的開始函數runMongosServer入手.
static bool runMongosServer( bool doUpgrade ) { pool.addHook( new ShardingConnectionHook( false ) );//mongos使用串連池管理串連,這裡設定串連池的回呼函數,對於ShardingConnectionHook(false)時僅在建立新的串連時做驗證工作,驗證使用者名稱以及密碼 pool.setName( "mongos connectionpool" ); shardConnectionPool.addHook( new ShardingConnectionHook( true ) );//對於這裡ShardingConnectionHook參數為true則需要根據串連的類型,若為master(單伺服器) set(replset模式)則需要向串連預設發送命令setShardVersion,傳遞自己的ServerID(一個OID類型,自動產生),後面將會用到這個ServerID,將mongos錯誤轉寄的資料轉寄回mongs中,讓其正確的轉寄到相應的mongod中,這個部分後面專門會用一篇文章談到. shardConnectionPool.setName( "mongos shardconnection connectionpool" ); // Mongos shouldn't lazily kill cursors, otherwise we can end up with extras from migration DBClientConnection::setLazyKillCursor( false ); ReplicaSetMonitor::setConfigChangeHook( boost::bind( &ConfigServer::replicaSetChange , &configServer , _1 ) ); if ( ! configServer.init( configdbs ) ) {//configServer的初始化,設定configserver的地址. return false; if ( ! configServer.ok( true ) ) {//串連configserver,確保能夠串連configserver return false; } { class CheckConfigServers : public task::Task { virtual string name() const { return "CheckConfigServers"; } virtual void doWork() { configServer.ok(true); } }; task::repeat(new CheckConfigServers, 60*1000);//這裡每過60秒檢查一次configserver是否可串連 } int configError = configServer.checkConfigVersion( doUpgrade );//更新config版本,只能從2版本升級到3版本,動作簡單來說是備份原來的資料 if ( configError ) {//然後讀取其資料,根據最新的3版本將資料寫入到collection中,不再深入分析 return false; } configServer.reloadSettings();//讀取configserver中的設定,確保部分資料建立了索引. init();#if !defined(_WIN32) CmdLine::launchOk();#endif if ( !noHttpInterface ) boost::thread web( boost::bind(&webServerThread, new NoAdminAccess() /* takes ownership */) ); MessageServer::Options opts; opts.port = cmdLine.port; opts.ipList = cmdLine.bind_ip; start(opts);//開始啟動服務. // listen() will return when exit code closes its socket. dbexit( EXIT_NET_ERROR ); return true;}
runMongoServer->ConfigServer::init
bool ConfigServer::init( vector<string> configHosts ) { string hn = getHostName(); set<string> hosts; for ( size_t i=0; i<configHosts.size(); i++ ) { string host = configHosts[i]; hosts.insert( getHost( host , false ) );//伺服器位址,不帶連接埠 configHosts[i] = getHost( host , true );//伺服器位址,帶連接埠 } for ( set<string>::iterator i=hosts.begin(); i!=hosts.end(); i++ ) { string host = *i; bool ok = false; for ( int x=10; x>0; x-- ) {//通過getnameinfo解析伺服器, if ( ! hostbyname( host.c_str() ).empty() ) { ok = true; break; } sleepsecs( 10 ); } if ( ! ok ) return false; } _config = configHosts;//儲存配置 string fullString; joinStringDelim( configHosts, &fullString, ',' );//將所有伺服器位址串連起來 _primary.setAddress( ConnectionString( fullString , ConnectionString::SYNC ) );//10gen建議生產環境中不要只使用一台configserver,這裡多台server按照sync模式工作,就是所有請求處理同時發往這裡設定的多台伺服器上. return true; }
runMongoServer->ConfigServer::ok
bool ConfigServer::ok( bool checkConsistency ) { if ( ! _primary.ok() )//init時已經設定了地址 return false; if ( checkConsistency ) { string errmsg; if ( ! checkConfigServersConsistent( errmsg ) ) {//檢測每一個configserver是否可達,且configserver中資料是否一致 return false; } } return true; }
runMongoServer->ConfigServer::ok->checkConfigServersConsistent
bool ConfigServer::checkConfigServersConsistent( string& errmsg , int tries ) const { unsigned firstGood = 0; int up = 0; vector<BSONObj> res; for ( unsigned i=0; i<_config.size(); i++ ) { BSONObj x; try { scoped_ptr<ScopedDbConnection> conn(//pool串連池中分配一個串連,逾時設定為30s ScopedDbConnection::getInternalScopedDbConnection( _config[i], 30.0 ) ); // check auth conn->get()->update("config.foo.bar", BSONObj(), BSON("x" << 1));//檢查是否有許可權 conn->get()->simpleCommand( "admin", &x, "getlasterror"); if (x["err"].type() == String && x["err"].String() == "unauthorized") { errmsg = "not authorized, did you start with --keyFile?"; return false; } if ( ! conn->get()->simpleCommand( "config" , &x , "dbhash" ) )//對config資料庫執行dbhash命令得到其 x = BSONObj(); else { x = x.getOwned(); if ( up == 0 ) firstGood = i; up++; } conn->done(); } res.push_back(x); } if ( _config.size() == 1 ) return true; if ( up == 0 ) { errmsg = "no config servers reachable"; return false; } if ( up == 1 ) return true; BSONObj base = res[firstGood];//比照從第一個可達的configserver到最後一個中其所有的collections.chunks collection中的hash是否一致 for ( unsigned i=firstGood+1; i<res.size(); i++ ) {//不一致則configserver處於不一致的狀態. if ( res[i].isEmpty() ) continue; string c1 = base.getFieldDotted( "collections.chunks" ); string c2 = res[i].getFieldDotted( "collections.chunks" ); string d1 = base.getFieldDotted( "collections.databases" ); string d2 = res[i].getFieldDotted( "collections.databases" ); if ( c1 == c2 && d1 == d2 ) continue; stringstream ss; ss << "config servers " << _config[firstGood] << " and " << _config[i] << " differ"; log( LL_WARNING ) << ss.str(); if ( tries <= 1 ) { errmsg = ss.str(); return false; } return checkConfigServersConsistent( errmsg , tries - 1 ); } return true; }
回到runMongosServer函數繼續分析reloadSettings.
void ConfigServer::reloadSettings() { set<string> got; scoped_ptr<ScopedDbConnection> conn( ScopedDbConnection::getInternalScopedDbConnection( _primary.getConnString(),30.0 ) ); try {//尋找configserver中的settings中的資料. auto_ptr<DBClientCursor> c = conn->get()->query( ShardNS::settings , BSONObj() ); while ( c->more() ) { //得到chunksize值,預設為64M BSONObj o = c->next(); string name = o["_id"].valuestrsafe(); got.insert( name ); if ( name == "chunksize" ) { int csize = o["value"].numberInt(); // validate chunksize before proceeding if ( csize == 0 ) { // setting was not modified; mark as such got.erase(name); } else { Chunk::MaxChunkSize = csize * 1024 * 1024; } } } if ( ! got.count( "chunksize" ) ) {//預設一個chunk大小為64M conn->get()->insert( ShardNS::settings, BSON( "_id" << "chunksize" << "value" << (Chunk::MaxChunkSize / ( 1024 * 1024 ) ) ) ); } // indexes,建立索引,後面將會用到. conn->get()->ensureIndex( ShardNS::chunk, BSON( "ns" << 1 << "min" << 1 ), true ); conn->get()->ensureIndex( ShardNS::chunk, BSON( "ns" << 1 << "shard" << 1 << "min" << 1 ), true ); conn->get()->ensureIndex( ShardNS::chunk, BSON( "ns" << 1 << "lastmod" << 1 ), true ); conn->get()->ensureIndex( ShardNS::shard, BSON( "host" << 1 ), true ); conn->done(); } }
繼續runMongosServer->init
void init() { serverID.init();//serverID的產生 setupSIGTRAPforGDB(); setupCoreSignals(); setupSignals( false ); Logstream::get().addGlobalTee( new RamLog("global") ); }
最後到了start函數.runMongoServer->start
void start( const MessageServer::Options& opts ) { balancer.go();//平衡線程,shard中資料的平衡就是這個線程負責,其按照一定的policy將資料在各個伺服器間盡量處理平衡 cursorCache.startTimeoutThread();//cursorCache緩衝cursor,這裡開啟一個線程,10分鐘內沒請求將cursor清除. PeriodicTask::theRunner->go();//作為服務的提供方,提供task執行的線程. ShardedMessageHandler handler;//mongos socket實際的處理端,在分析mongod時分析過這種結構,createServer代一個Handler的流程, MessageServer * server = createServer( opts , &handler );這裡不再分析,將直接渠道ShardedMessageHandler的訊息處理部分. server->setAsTimeTracker(); server->run(); }
下面繼續分析mongos對於socket的請求處理的函數:ShardedMessageHandler::process.
virtual void process( Message& m , AbstractMessagingPort* p , LastError * le) { Request r( m , p ); lastError.startRequest( m , le ); try { r.init(); r.process(); }}
到這裡mongos的初始化工作完成,最後來看看一個典型的從串連池中請求一個串連的過程.上面的文章已經看到,其典型的代碼如下:
scoped_ptr<ScopedDbConnection> conn( ScopedDbConnection::getInternalScopedDbConnection( _primary.getConnString(),30.0 ) );
進入getInternalScopeDbConnection
ScopedDbConnection* ScopedDbConnection::getInternalScopedDbConnection(const string& host,double socketTimeout) { return getScopedDbConnection( host, socketTimeout ); }ScopedDbConnection* ScopedDbConnection::getScopedDbConnection(const string& host,double socketTimeout) { return new ScopedDbConnection(host, socketTimeout); }explicit ScopedDbConnection(const string& host, double socketTimeout = 0) : _host(host), _conn( pool.get(host, socketTimeout) ), _socketTimeout( socketTimeout ) {//注意這裡的_conn,其是從全域的串連池中取出一個串連. _setSocketTimeout(); }
下面繼續pool.get.
DBClientBase* DBConnectionPool::get(const string& host, double socketTimeout) { DBClientBase * c = _get( host , socketTimeout );//尋找串連池中是否已經存在串連 if ( c ) {//存在則取出,然後調用這裡串連池設定的hook函數. onHandedOut( c ); return c; } string errmsg; ConnectionString cs = ConnectionString::parse( host , errmsg );//建立一個新的串連 c = cs.connect( errmsg, socketTimeout ); return _finishCreate( host , socketTimeout , c ); }
DBClientBase* DBConnectionPool::_get(const string& ident , double socketTimeout ) { scoped_lock L(_mutex); PoolForHost& p = _pools[PoolKey(ident,socketTimeout)]; return p.get( this , socketTimeout ); }
繼續PoolForHost::get
DBClientBase * PoolForHost::get( DBConnectionPool * pool , double socketTimeout ) { time_t now = time(0); while ( ! _pool.empty() ) { StoredConnection sc = _pool.top(); _pool.pop(); if ( ! sc.ok( now ) ) {//30分鐘沒有請求的串連關閉 pool->onDestroy( sc.conn );//關閉前回調hook函數. delete sc.conn; continue; } return sc.conn; } return NULL; }
繼續DBConnectionPool::get->ConnectionString::parse
ConnectionString ConnectionString::parse( const string& host , string& errmsg ) { string::size_type i = host.find( '/' ); if ( i != string::npos && i != 0) {//replset模式 // replica set return ConnectionString( SET , host.substr( i + 1 ) , host.substr( 0 , i ) ); } int numCommas = str::count( host , ',' ); if( numCommas == 0 )//只有一台機器,master模式 return ConnectionString( HostAndPort( host ) ); if ( numCommas == 1 ) return ConnectionString( PAIR , host ); if ( numCommas == 2 ) return ConnectionString( SYNC , host ); errmsg = (string)"invalid hostname [" + host + "]"; return ConnectionString(); // INVALID }
ConnectionString( ConnectionType type , const string& s , const string& setName = "" ) { _type = type; _setName = setName; _fillServers( s );//解析s,將其分成一個一個的server地址 _finishInit();}
繼續DBConnectionPool::get->ConnectionString::connect
DBClientBase* ConnectionString::connect( string& errmsg, double socketTimeout ) const { switch ( _type ) {//根據不同類型建立不同串連. case MASTER: { DBClientConnection * c = new DBClientConnection(true); c->setSoTimeout( socketTimeout ); return c; } case PAIR: case SET: { DBClientReplicaSet * set = new DBClientReplicaSet( _setName , _servers , socketTimeout ); return set; } case SYNC: { // TODO , don't copy list<HostAndPort> l; for ( unsigned i=0; i<_servers.size(); i++ ) l.push_back( _servers[i] ); SyncClusterConnection* c = new SyncClusterConnection( l, socketTimeout ); return c; } case CUSTOM: { // Lock in case other things are modifying this at the same time scoped_lock lk( _connectHookMutex ); // Allow the replacement of connections with other connections - useful for testing. // Double-checked lock, since this will never be active during normal operation DBClientBase* replacementConn = _connectHook->connect( *this, errmsg, socketTimeout ); return replacementConn; } case INVALID: throw UserException( 13421 , "trying to connect to invalid ConnectionString" ); break; } return 0; }
DBConnectionPool::get->DBConnectionPool::_finishCreate
DBClientBase* DBConnectionPool::_finishCreate( const string& host , double socketTimeout , DBClientBase* conn ) { { scoped_lock L(_mutex); PoolForHost& p = _pools[PoolKey(host,socketTimeout)]; p.createdOne( conn );//建立的串連數加一,這裡還未儲存串連,當以後這個串連conn釋放時會將其儲存到PoolForHost中. }//當調用的串連釋放時會將其儲存到這裡的PoolForHost中. onCreate( conn );//串連建立的hook函數,對於pool為ShardingConnectionHook. onHandedOut( conn ); return conn; }
繼續來看這裡的hook函數.
void ShardingConnectionHook::onCreate( DBClientBase * conn ) { if( !noauth ) {//認證過程. string err; bool result = conn->auth( "local",internalSecurity.user,internalSecurity.pwd,err,false ); if ( conn->type() == ConnectionString::SYNC ) { // Connections to the config servers should always have full access. conn->setAuthenticationTable( AuthenticationTable::getInternalSecurityAuthenticationTable() ); } }//isVersionableCB在串連為master或者set模式時為true if ( _shardedConnections && versionManager.isVersionableCB( conn ) ) {//這裡對於shardConnectionPool,_shardedConnections為true // We must initialize sharding on all connections, so that we get exceptions if sharding is enabled on // the collection. BSONObj result; bool ok = versionManager.initShardVersionCB( conn, result );//發送setShardVersion命令,傳遞給地方自己的serverID // assert that we actually successfully setup sharding } }
繼續來看串連的釋放.ScopedDbConnection::done
void done() { _conn->clearAuthenticationTable(); pool.release(_host, _conn); _conn = 0;}
void DBConnectionPool::release(const string& host, DBClientBase *c) { if ( c->isFailed() ) { onDestroy( c ); delete c; return; } scoped_lock L(_mutex); _pools[PoolKey(host,c->getSoTimeout())].done(this,c);//將空閑串連加入到pool中. }
到這裡串連被回收,本文內容完畢.
原文連結:mongodb源碼分析(十九)mongos的初始化以及串連池的分配回收 作者: yhjj0108,楊浩