This is a creation in Article, where the information may have evolved or changed.
Node.runAgent()
The function is implemented as follows:
Func (n *node) runagent (CTX context. Context, DB *bolt. DB, creds credentials. Transportauthenticator, Ready chan<-struct{}) error {var Manager API. Peer Select {Case <-ctx. Done (): Case manager = <-n.remotes.waitselect (CTX):} if ctx. ERR ()! = nil {return CTX. ERR ()} Picker: = Picker. Newpicker (N.remotes, manager. ADDR) conn, err: = Grpc. Dial (manager. Addr, Grpc. Withpicker (Picker), Grpc. Withtransportcredentials (creds), Grpc. Withbackoffmaxdelay (Maxsessionfailurebackoff)) if err! = Nil {return err} agent, err: = New (&config{ Hostname:n.config.hostname, Managers:n.remotes, Executor:n.config.executo R, Db:db, Conn:conn, Picker:picker, notifyrolechange:n . Rolechangereq,}) if err! = Nil {return err} If err: = Agent. Start (CTX); Err! = Nil {return err} n.lock ()N.agent = Agent N.unlock () defer func () {n.lock () N.agent = Nil N.unlock ()} () go func () {<-agent. Ready () Close (Ready)} ()//todo:manually-stop on context cancellation? Return agent. ERR (context. Background ())}
The above functions are explained as follows:
(1) case manager = <-n.remotes.WaitSelect(ctx)
: first obtained manager
;
(2) Next call grpc.Dial()
to connect this manager
:
picker := picker.NewPicker(n.remotes, manager.Addr) conn, err := grpc.Dial(manager.Addr, grpc.WithPicker(picker), grpc.WithTransportCredentials(creds), grpc.WithBackoffMaxDelay(maxSessionFailureBackoff)) if err != nil { return err }
(3) Build and run a Agent
:
agent, err := New(&Config{ Hostname: n.config.Hostname, Managers: n.remotes, Executor: n.config.Executor, DB: db, Conn: conn, Picker: picker, NotifyRoleChange: n.roleChangeReq, }) if err != nil { return err } if err := agent.Start(ctx); err != nil { return err }
About Agent
struct Definitions:
// Agent implements the primary node functionality for a member of a swarm// cluster. The primary functionality is to run and report on the status of// tasks assigned to the node.type Agent struct { config *Config // The latest node object state from manager // for this node known to the agent. node *api.Node keys []*api.EncryptionKey sessionq chan sessionOperation worker Worker started chan struct{} ready chan struct{} stopped chan struct{} // requests shutdown closed chan struct{} // only closed in run err error // read only after closed is closed}
Where the Config
structure defines:
// Config provides values for an Agent.type Config struct { // Hostname the name of host for agent instance. Hostname string // Managers provides the manager backend used by the agent. It will be // updated with managers weights as observed by the agent. Managers picker.Remotes // Conn specifies the client connection Agent will use. Conn *grpc.ClientConn // Picker is the picker used by Conn. // TODO(aaronl): This is only part of the config to allow resetting the // GRPC connection. This should be refactored to address the coupling // between Conn and Picker. Picker *picker.Picker // Executor specifies the executor to use for the agent. Executor exec.Executor // DB used for task storage. Must be open for the lifetime of the agent. DB *bolt.DB // NotifyRoleChange channel receives new roles from session messages. NotifyRoleChange chan<- api.NodeRole}
The comments are clear and need not be mentioned.
Agent.start ()
is tuned to Agent.run ()
, implemented as follows:
Func (a *agent) run (CTX context. Context) {CTX, Cancel: = context. Withcancel (CTX) defer cancel () defer close (a.closed)//full shutdown. CTX = log. Withlogger (CTX, log. G (CTX). Withfield ("module", "Agent")) log. G (CTX). DEBUGF ("(*agent). Run") defer log. G (CTX). DEBUGF ("(*agent). Run exited") Var (backoff time. Duration session = NewSession (CTX, A, Backoff)//Start the initial session registered = Session.register ed ready = A.ready//First session ready Sessionq Chan sessionoperation) if err: = A.worker.i NIT (CTX); Err! = Nil {log. G (CTX). Witherror (ERR). Error ("Worker initialization failed") A.err = err return//fatal? }//Setup a reliable reporter to call back to us. Reporter: = newstatusreporter (CTX, a) defer reporter. Close () A.worker.listen (CTX, reporter) for {select {case operation: = <-sessionq:oper Ation.response <-Operation.fn (session) Case MSG: = <-session.tasks:if Err: = A.worker.assign (CTX, MSG. Tasks); Err! = Nil {log. G (CTX). Witherror (ERR). Error ("task assignment Failed")} case msg: = <-session.messages:if err: = A.handlesessio Nmessage (CTX, msg); Err! = Nil {log. G (CTX). Witherror (ERR). Error ("Session message Handler failed")} case <-registered:log. G (CTX). Debugln ("agent:registered") if ready! = Nil {close (ready)} ready = Nil Registered = NIL//We only care about this once per session Backoff = 0//Reset Backoff Sessionq = A.sessionq Case ERR: = <-session.errs://TODO (Stevvooe): This may actually block if A session is closed//But no error was sent. Session.close must only is called here//for the work. If err! = Nil {log. G (CTX). WitherroR (Err). Error ("Agent:session failed") Backoff = Initialsessionfailurebackoff + 2*backoff if Backoff > Maxsessionfailurebackoff {backoff = Maxsessionfailurebackoff}} If err: = Session.close (); Err! = Nil {log. G (CTX). Witherror (ERR). Error ("agent:closing session Failed")} Sessionq = nil//If we ' re here before <-regi Stered, do nothing for that event registered = NIL//Bounce the connection. If A.config.picker! = nil {a.config.picker.reset ()} Case <-session.closed: Log. G (CTX). DEBUGF ("agent:rebuild session")//Select a session registration delay from Backoff range. Delay: = time. Duration (Rand. Int63n (Int64 (Backoff)) session = NewSession (CTX, A, delay) registered = session.registered Sessionq = A.sessionq Case <-a.stopped://TODO (Stevvooe): Wait on shutdown and cleanup. may need to pump//This loop a few times. return case <-ctx. Done (): if A.err = = Nil {A.err = ctx. ERR ()} Return}}}
The important thing is session
this concept, through " session = NewSession (CTX, A, Backoff)
" This line of code will session
and Agent
to associate.