Swarmkit筆記(6)——Agent運行

來源:互聯網
上載者:User
這是一個建立於 的文章,其中的資訊可能已經有所發展或是發生改變。

Node.runAgent()函數實現如下:

func (n *Node) runAgent(ctx context.Context, db *bolt.DB, creds credentials.TransportAuthenticator, ready chan<- struct{}) error {    var manager api.Peer    select {    case <-ctx.Done():    case manager = <-n.remotes.WaitSelect(ctx):    }    if ctx.Err() != nil {        return ctx.Err()    }    picker := picker.NewPicker(n.remotes, manager.Addr)    conn, err := grpc.Dial(manager.Addr,        grpc.WithPicker(picker),        grpc.WithTransportCredentials(creds),        grpc.WithBackoffMaxDelay(maxSessionFailureBackoff))    if err != nil {        return err    }    agent, err := New(&Config{        Hostname:         n.config.Hostname,        Managers:         n.remotes,        Executor:         n.config.Executor,        DB:               db,        Conn:             conn,        Picker:           picker,        NotifyRoleChange: n.roleChangeReq,    })    if err != nil {        return err    }    if err := agent.Start(ctx); err != nil {        return err    }    n.Lock()    n.agent = agent    n.Unlock()    defer func() {        n.Lock()        n.agent = nil        n.Unlock()    }()    go func() {        <-agent.Ready()        close(ready)    }()    // todo: manually call stop on context cancellation?    return agent.Err(context.Background())}

上面函數解釋如下:

(1)case manager = <-n.remotes.WaitSelect(ctx):首先獲得manager
(2)接下來調用grpc.Dial()去串連這個manager

    picker := picker.NewPicker(n.remotes, manager.Addr)    conn, err := grpc.Dial(manager.Addr,        grpc.WithPicker(picker),        grpc.WithTransportCredentials(creds),        grpc.WithBackoffMaxDelay(maxSessionFailureBackoff))    if err != nil {        return err    }

(3)產生並運行一個Agent

    agent, err := New(&Config{        Hostname:         n.config.Hostname,        Managers:         n.remotes,        Executor:         n.config.Executor,        DB:               db,        Conn:             conn,        Picker:           picker,        NotifyRoleChange: n.roleChangeReq,    })    if err != nil {        return err    }    if err := agent.Start(ctx); err != nil {        return err    }

關於Agent結構體定義:

// Agent implements the primary node functionality for a member of a swarm// cluster. The primary functionality is to run and report on the status of// tasks assigned to the node.type Agent struct {    config *Config    // The latest node object state from manager    // for this node known to the agent.    node *api.Node    keys []*api.EncryptionKey    sessionq chan sessionOperation    worker   Worker    started chan struct{}    ready   chan struct{}    stopped chan struct{} // requests shutdown    closed  chan struct{} // only closed in run    err     error         // read only after closed is closed}

其中Config結構體定義:

// Config provides values for an Agent.type Config struct {    // Hostname the name of host for agent instance.    Hostname string    // Managers provides the manager backend used by the agent. It will be    // updated with managers weights as observed by the agent.    Managers picker.Remotes    // Conn specifies the client connection Agent will use.    Conn *grpc.ClientConn    // Picker is the picker used by Conn.    // TODO(aaronl): This is only part of the config to allow resetting the    // GRPC connection. This should be refactored to address the coupling    // between Conn and Picker.    Picker *picker.Picker    // Executor specifies the executor to use for the agent.    Executor exec.Executor    // DB used for task storage. Must be open for the lifetime of the agent.    DB *bolt.DB    // NotifyRoleChange channel receives new roles from session messages.    NotifyRoleChange chan<- api.NodeRole}

注釋都很清楚,不必贅述。

Agent.Start()會調到Agent.Run(),實現如下:

func (a *Agent) run(ctx context.Context) {    ctx, cancel := context.WithCancel(ctx)    defer cancel()    defer close(a.closed) // full shutdown.    ctx = log.WithLogger(ctx, log.G(ctx).WithField("module", "agent"))    log.G(ctx).Debugf("(*Agent).run")    defer log.G(ctx).Debugf("(*Agent).run exited")    var (        backoff    time.Duration        session    = newSession(ctx, a, backoff) // start the initial session        registered = session.registered        ready      = a.ready // first session ready        sessionq   chan sessionOperation    )    if err := a.worker.Init(ctx); err != nil {        log.G(ctx).WithError(err).Error("worker initialization failed")        a.err = err        return // fatal?    }    // setup a reliable reporter to call back to us.    reporter := newStatusReporter(ctx, a)    defer reporter.Close()    a.worker.Listen(ctx, reporter)    for {        select {        case operation := <-sessionq:            operation.response <- operation.fn(session)        case msg := <-session.tasks:            if err := a.worker.Assign(ctx, msg.Tasks); err != nil {                log.G(ctx).WithError(err).Error("task assignment failed")            }        case msg := <-session.messages:            if err := a.handleSessionMessage(ctx, msg); err != nil {                log.G(ctx).WithError(err).Error("session message handler failed")            }        case <-registered:            log.G(ctx).Debugln("agent: registered")            if ready != nil {                close(ready)            }            ready = nil            registered = nil // we only care about this once per session            backoff = 0      // reset backoff            sessionq = a.sessionq        case err := <-session.errs:            // TODO(stevvooe): This may actually block if a session is closed            // but no error was sent. Session.close must only be called here            // for this to work.            if err != nil {                log.G(ctx).WithError(err).Error("agent: session failed")                backoff = initialSessionFailureBackoff + 2*backoff                if backoff > maxSessionFailureBackoff {                    backoff = maxSessionFailureBackoff                }            }            if err := session.close(); err != nil {                log.G(ctx).WithError(err).Error("agent: closing session failed")            }            sessionq = nil            // if we're here before <-registered, do nothing for that event            registered = nil            // Bounce the connection.            if a.config.Picker != nil {                a.config.Picker.Reset()            }        case <-session.closed:            log.G(ctx).Debugf("agent: rebuild session")            // select a session registration delay from backoff range.            delay := time.Duration(rand.Int63n(int64(backoff)))            session = newSession(ctx, a, delay)            registered = session.registered            sessionq = a.sessionq        case <-a.stopped:            // TODO(stevvooe): Wait on shutdown and cleanup. May need to pump            // this loop a few times.            return        case <-ctx.Done():            if a.err == nil {                a.err = ctx.Err()            }            return        }    }}

其中重要的是session這個概念,通過“session = newSession(ctx, a, backoff)”這行代碼將sessionAgent關聯起來。

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.