Swarmkit筆記(9)——manager

來源:互聯網
上載者:User
這是一個建立於 的文章,其中的資訊可能已經有所發展或是發生改變。

Node.runManager()函數會啟動一個manager

func (n *Node) runManager(ctx context.Context, securityConfig *ca.SecurityConfig, ready chan struct{}) error {    for {        n.waitRole(ctx, ca.ManagerRole)        if ctx.Err() != nil {            return ctx.Err()        }        remoteAddr, _ := n.remotes.Select(n.nodeID)        m, err := manager.New(&manager.Config{            ForceNewCluster: n.config.ForceNewCluster,            ProtoAddr: map[string]string{                "tcp":  n.config.ListenRemoteAPI,                "unix": n.config.ListenControlAPI,            },            AdvertiseAddr:  n.config.AdvertiseRemoteAPI,            SecurityConfig: securityConfig,            ExternalCAs:    n.config.ExternalCAs,            JoinRaft:       remoteAddr.Addr,            StateDir:       n.config.StateDir,            HeartbeatTick:  n.config.HeartbeatTick,            ElectionTick:   n.config.ElectionTick,        })        if err != nil {            return err        }        done := make(chan struct{})        go func() {            m.Run(context.Background()) // todo: store error            close(done)        }()        n.Lock()        n.manager = m        n.Unlock()        connCtx, connCancel := context.WithCancel(ctx)        go n.initManagerConnection(connCtx, ready)        // this happens only on initial start        if ready != nil {            go func(ready chan struct{}) {                select {                case <-ready:                    n.remotes.Observe(api.Peer{NodeID: n.nodeID, Addr: n.config.ListenRemoteAPI}, picker.DefaultObservationWeight)                case <-connCtx.Done():                }            }(ready)            ready = nil        }        n.waitRole(ctx, ca.AgentRole)        n.Lock()        n.manager = nil        n.Unlock()        select {        case <-done:        case <-ctx.Done():            err = ctx.Err()            m.Stop(context.Background())            <-done        }        connCancel()        if err != nil {            return err        }    }}

(1)

        n.waitRole(ctx, ca.ManagerRole)        if ctx.Err() != nil {            return ctx.Err()        }

首先runManager()函數會阻塞在waitRole()函數。一旦獲得manager角色,就會往下執行。

(2)

        remoteAddr, _ := n.remotes.Select(n.nodeID)        m, err := manager.New(&manager.Config{            ForceNewCluster: n.config.ForceNewCluster,            ProtoAddr: map[string]string{                "tcp":  n.config.ListenRemoteAPI,                "unix": n.config.ListenControlAPI,            },            AdvertiseAddr:  n.config.AdvertiseRemoteAPI,            SecurityConfig: securityConfig,            ExternalCAs:    n.config.ExternalCAs,            JoinRaft:       remoteAddr.Addr,            StateDir:       n.config.StateDir,            HeartbeatTick:  n.config.HeartbeatTick,            ElectionTick:   n.config.ElectionTick,        })        if err != nil {            return err        }        done := make(chan struct{})        go func() {            m.Run(context.Background()) // todo: store error            close(done)        }()        n.Lock()        n.manager = m        n.Unlock()

a)remoteAddr, _ := n.remotes.Select(n.nodeID)作用是從當前clustermanager中(當然需要排除掉當前node)選出一個leader,賦給remoteAddr。如果當前nodecluster中的第一個manager,則remoteAddr就是一個“空的”值:{NodeID: "", Addr: ""}
b)在使用manager.New()函數建立manager時,要注意n.config.AdvertiseRemoteAPI是一直為""的。 manager.New()最後會返回一個Manager結構體:

func New(config *Config) (*Manager, error) {    ......    m := &Manager{        config:      config,        listeners:   listeners,        caserver:    ca.NewServer(RaftNode.MemoryStore(), config.SecurityConfig),        Dispatcher:  dispatcher.New(RaftNode, dispatcherConfig),        server:      grpc.NewServer(opts...),        localserver: grpc.NewServer(opts...),        RaftNode:    RaftNode,        started:     make(chan struct{}),        stopped:     make(chan struct{}),    }    return m, nil}

其中的listeners包含監聽listen-remote-api(tcp)listen-control-api(unix)的兩個socket

c)m.Run()是實際運行manager的函數,連作者自己都覺得複雜(“This function is *way* too complex.”)。可以把這個函數邏輯分成下面幾塊:
i)如果當前manager被選為leader,就做一大堆初始化的動作,包括為schedulerallocator等分配資源,啟動goroutine等等;如果不是leader,就做一大堆收尾工作,停掉goroutine,釋放資源。
ii)接下來對manager.localservermanager.server做一大堆設定,主要是authenticationproxy的方面;然後二者分別監聽manager.listeners中的UnixTCP socket,處理相應的資料。

(3)

        connCtx, connCancel := context.WithCancel(ctx)        go n.initManagerConnection(connCtx, ready)

其中Node.initManagerConnection()實現如下:

func (n *Node) initManagerConnection(ctx context.Context, ready chan<- struct{}) error {    opts := []grpc.DialOption{}    insecureCreds := credentials.NewTLS(&tls.Config{InsecureSkipVerify: true})    opts = append(opts, grpc.WithTransportCredentials(insecureCreds))    // Using listen address instead of advertised address because this is a    // local connection.    addr := n.config.ListenControlAPI    opts = append(opts, grpc.WithDialer(        func(addr string, timeout time.Duration) (net.Conn, error) {            return net.DialTimeout("unix", addr, timeout)        }))    conn, err := grpc.Dial(addr, opts...)    if err != nil {        return err    }    state := grpc.Idle    for {        s, err := conn.WaitForStateChange(ctx, state)        if err != nil {            n.setControlSocket(nil)            return err        }        if s == grpc.Ready {            n.setControlSocket(conn)            if ready != nil {                close(ready)                ready = nil            }        } else if state == grpc.Shutdown {            n.setControlSocket(nil)        }        state = s    }}

功能就是建立一個同本地listen-control-api(unix) socket的一個串連,用來監控node的狀態。

(4)把當前node也加入remotes的監控列表中:

    // this happens only on initial start        if ready != nil {            go func(ready chan struct{}) {                select {                case <-ready:                    n.remotes.Observe(api.Peer{NodeID: n.nodeID, Addr: n.config.ListenRemoteAPI}, picker.DefaultObservationWeight)                case <-connCtx.Done():                }            }(ready)            ready = nil        }

(5)阻塞在下列代碼,等待角色變化:

n.waitRole(ctx, ca.AgentRole)

 

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.