這是一個建立於 的文章,其中的資訊可能已經有所發展或是發生改變。
Node.runManager()函數會啟動一個manager:
func (n *Node) runManager(ctx context.Context, securityConfig *ca.SecurityConfig, ready chan struct{}) error { for { n.waitRole(ctx, ca.ManagerRole) if ctx.Err() != nil { return ctx.Err() } remoteAddr, _ := n.remotes.Select(n.nodeID) m, err := manager.New(&manager.Config{ ForceNewCluster: n.config.ForceNewCluster, ProtoAddr: map[string]string{ "tcp": n.config.ListenRemoteAPI, "unix": n.config.ListenControlAPI, }, AdvertiseAddr: n.config.AdvertiseRemoteAPI, SecurityConfig: securityConfig, ExternalCAs: n.config.ExternalCAs, JoinRaft: remoteAddr.Addr, StateDir: n.config.StateDir, HeartbeatTick: n.config.HeartbeatTick, ElectionTick: n.config.ElectionTick, }) if err != nil { return err } done := make(chan struct{}) go func() { m.Run(context.Background()) // todo: store error close(done) }() n.Lock() n.manager = m n.Unlock() connCtx, connCancel := context.WithCancel(ctx) go n.initManagerConnection(connCtx, ready) // this happens only on initial start if ready != nil { go func(ready chan struct{}) { select { case <-ready: n.remotes.Observe(api.Peer{NodeID: n.nodeID, Addr: n.config.ListenRemoteAPI}, picker.DefaultObservationWeight) case <-connCtx.Done(): } }(ready) ready = nil } n.waitRole(ctx, ca.AgentRole) n.Lock() n.manager = nil n.Unlock() select { case <-done: case <-ctx.Done(): err = ctx.Err() m.Stop(context.Background()) <-done } connCancel() if err != nil { return err } }}
(1)
n.waitRole(ctx, ca.ManagerRole) if ctx.Err() != nil { return ctx.Err() }
首先runManager()函數會阻塞在waitRole()函數。一旦獲得manager角色,就會往下執行。
(2)
remoteAddr, _ := n.remotes.Select(n.nodeID) m, err := manager.New(&manager.Config{ ForceNewCluster: n.config.ForceNewCluster, ProtoAddr: map[string]string{ "tcp": n.config.ListenRemoteAPI, "unix": n.config.ListenControlAPI, }, AdvertiseAddr: n.config.AdvertiseRemoteAPI, SecurityConfig: securityConfig, ExternalCAs: n.config.ExternalCAs, JoinRaft: remoteAddr.Addr, StateDir: n.config.StateDir, HeartbeatTick: n.config.HeartbeatTick, ElectionTick: n.config.ElectionTick, }) if err != nil { return err } done := make(chan struct{}) go func() { m.Run(context.Background()) // todo: store error close(done) }() n.Lock() n.manager = m n.Unlock()
a)remoteAddr, _ := n.remotes.Select(n.nodeID)作用是從當前cluster的manager中(當然需要排除掉當前node)選出一個leader,賦給remoteAddr。如果當前node是cluster中的第一個manager,則remoteAddr就是一個“空的”值:{NodeID: "", Addr: ""};
b)在使用manager.New()函數建立manager時,要注意n.config.AdvertiseRemoteAPI是一直為""的。 manager.New()最後會返回一個Manager結構體:
func New(config *Config) (*Manager, error) { ...... m := &Manager{ config: config, listeners: listeners, caserver: ca.NewServer(RaftNode.MemoryStore(), config.SecurityConfig), Dispatcher: dispatcher.New(RaftNode, dispatcherConfig), server: grpc.NewServer(opts...), localserver: grpc.NewServer(opts...), RaftNode: RaftNode, started: make(chan struct{}), stopped: make(chan struct{}), } return m, nil}
其中的listeners包含監聽listen-remote-api(tcp)和listen-control-api(unix)的兩個socket。
c)m.Run()是實際運行manager的函數,連作者自己都覺得複雜(“This function is *way* too complex.”)。可以把這個函數邏輯分成下面幾塊:
i)如果當前manager被選為leader,就做一大堆初始化的動作,包括為scheduler,allocator等分配資源,啟動goroutine等等;如果不是leader,就做一大堆收尾工作,停掉goroutine,釋放資源。
ii)接下來對manager.localserver和manager.server做一大堆設定,主要是authentication和proxy的方面;然後二者分別監聽manager.listeners中的Unix和TCP socket,處理相應的資料。
(3)
connCtx, connCancel := context.WithCancel(ctx) go n.initManagerConnection(connCtx, ready)
其中Node.initManagerConnection()實現如下:
func (n *Node) initManagerConnection(ctx context.Context, ready chan<- struct{}) error { opts := []grpc.DialOption{} insecureCreds := credentials.NewTLS(&tls.Config{InsecureSkipVerify: true}) opts = append(opts, grpc.WithTransportCredentials(insecureCreds)) // Using listen address instead of advertised address because this is a // local connection. addr := n.config.ListenControlAPI opts = append(opts, grpc.WithDialer( func(addr string, timeout time.Duration) (net.Conn, error) { return net.DialTimeout("unix", addr, timeout) })) conn, err := grpc.Dial(addr, opts...) if err != nil { return err } state := grpc.Idle for { s, err := conn.WaitForStateChange(ctx, state) if err != nil { n.setControlSocket(nil) return err } if s == grpc.Ready { n.setControlSocket(conn) if ready != nil { close(ready) ready = nil } } else if state == grpc.Shutdown { n.setControlSocket(nil) } state = s }}
功能就是建立一個同本地listen-control-api(unix) socket的一個串連,用來監控node的狀態。
(4)把當前node也加入remotes的監控列表中:
// this happens only on initial start if ready != nil { go func(ready chan struct{}) { select { case <-ready: n.remotes.Observe(api.Peer{NodeID: n.nodeID, Addr: n.config.ListenRemoteAPI}, picker.DefaultObservationWeight) case <-connCtx.Done(): } }(ready) ready = nil }
(5)阻塞在下列代碼,等待角色變化:
n.waitRole(ctx, ca.AgentRole)