Linux核心–網路通訊協定棧深入分析(五)–通訊端的綁定、監聽、串連和斷開

來源:互聯網
上載者:User

本文分析基於Linux Kernel 3.2.1

原創作品,轉載請標明http://blog.csdn.net/yming0221/article/details/7996528

更多請查看專欄http://blog.csdn.net/column/details/linux-kernel-net.html

作者:閆明

1、通訊端的綁定

建立完通訊端伺服器端會在應用程式層使用bind函數進行通訊端的綁定,這時會產生系統調用,sys_bind核心功能進行通訊端。

系統調用函數的具體實現

SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen){struct socket *sock;struct sockaddr_storage address;int err, fput_needed;sock = sockfd_lookup_light(fd, &err, &fput_needed);if (sock) {err = move_addr_to_kernel(umyaddr, addrlen, (struct sockaddr *)&address);if (err >= 0) {err = security_socket_bind(sock,   (struct sockaddr *)&address,   addrlen);if (!err)err = sock->ops->bind(sock,      (struct sockaddr *)      &address, addrlen);}fput_light(sock->file, fput_needed);}return err;}

首先調用函數sockfd_lookup_light()函數通過檔案描述符來尋找對應的通訊端sock。

static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed){struct file *file;struct socket *sock;*err = -EBADF;file = fget_light(fd, fput_needed);if (file) {sock = sock_from_file(file, err);if (sock)return sock;fput_light(file, *fput_needed);}return NULL;}

上面函數中先調用fget_light函數通過檔案描述符返回對應的檔案結構,然後調用函數sock_from_file函數返回該檔案對應的通訊端結構體地址,它儲存在file->private_data屬性中。

再回到sys_bind函數,在返回了對應的通訊端結構之後,調用move_addr_to_kernel將使用者地址空間的socket拷貝到核心空間。

然後調用INET協議族的操作集中bind函數inet_bind函數將socket地址(核心空間)和socket綁定。

int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len){struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;struct sock *sk = sock->sk;struct inet_sock *inet = inet_sk(sk);unsigned short snum;int chk_addr_ret;int err;//RAW類型通訊端若有自己的bind函數,則使用之if (sk->sk_prot->bind) {err = sk->sk_prot->bind(sk, uaddr, addr_len);goto out;}err = -EINVAL;.....................        //地址合法性檢查chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);/* Not specified by any standard per-se, however it breaks too * many applications when removed.  It is unfortunate since * allowing applications to make a non-local bind solves * several problems with systems using dynamic addressing. * (ie. your servers still start up even if your ISDN link *  is temporarily down) */err = -EADDRNOTAVAIL;if (!sysctl_ip_nonlocal_bind &&    !(inet->freebind || inet->transparent) &&    addr->sin_addr.s_addr != htonl(INADDR_ANY) &&    chk_addr_ret != RTN_LOCAL &&    chk_addr_ret != RTN_MULTICAST &&    chk_addr_ret != RTN_BROADCAST)goto out;snum = ntohs(addr->sin_port);err = -EACCES;if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))goto out;/*      We keep a pair of addresses. rcv_saddr is the one *      used by hash lookups, and saddr is used for transmit. * *      In the BSD API these are the same except where it *      would be illegal to use them (multicast/broadcast) in *      which case the sending device address is used. */lock_sock(sk);/* Check these errors (active socket, double bind). */err = -EINVAL;if (sk->sk_state != TCP_CLOSE || inet->inet_num)//如果sk的狀態是CLOSE或者本地連接埠已經被綁定goto out_release_sock;inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr;//設定源地址if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)inet->inet_saddr = 0;  /* Use device *//* Make sure we are allowed to bind here. */if (sk->sk_prot->get_port(sk, snum)) {inet->inet_saddr = inet->inet_rcv_saddr = 0;err = -EADDRINUSE;goto out_release_sock;}if (inet->inet_rcv_saddr)sk->sk_userlocks |= SOCK_BINDADDR_LOCK;if (snum)sk->sk_userlocks |= SOCK_BINDPORT_LOCK;inet->inet_sport = htons(inet->inet_num);//設定源連接埠號碼,標明該連接埠已經被佔用inet->inet_daddr = 0;inet->inet_dport = 0;sk_dst_reset(sk);err = 0;out_release_sock:release_sock(sk);out:return err;}

這樣通訊端綁定結束。

2、通訊端的監聽

SYSCALL_DEFINE2(listen, int, fd, int, backlog){struct socket *sock;int err, fput_needed;int somaxconn;sock = sockfd_lookup_light(fd, &err, &fput_needed);if (sock) {......................err = security_socket_listen(sock, backlog);if (!err)err = sock->ops->listen(sock, backlog);fput_light(sock->file, fput_needed);}return err;}

該函數先通過檔案描述符尋找到對應的通訊端結構,然後調用inet_listen函數對將通訊端sk的狀態設定為TCP_LISTEN。

int inet_listen(struct socket *sock, int backlog){struct sock *sk = sock->sk;unsigned char old_state;int err;lock_sock(sk);err = -EINVAL;if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)goto out;old_state = sk->sk_state;if (!((1 << old_state) & (TCPF_CLOSE | TCPF_LISTEN)))goto out;if (old_state != TCP_LISTEN) {err = inet_csk_listen_start(sk, backlog);//該函數將sk的狀態設定為TCP_LISTENif (err)goto out;}sk->sk_max_ack_backlog = backlog;err = 0;out:release_sock(sk);return err;}

3、通訊端的串連和接受串連

3.1、申請串連

SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,int, addrlen){struct socket *sock;struct sockaddr_storage address;int err, fput_needed;sock = sockfd_lookup_light(fd, &err, &fput_needed);if (!sock)goto out;err = move_addr_to_kernel(uservaddr, addrlen, (struct sockaddr *)&address);if (err < 0)goto out_put;err =    security_socket_connect(sock, (struct sockaddr *)&address, addrlen);if (err)goto out_put;err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen, sock->file->f_flags);out_put:fput_light(sock->file, fput_needed);out:return err;}

還是先調用sockfd_lookup_light函數獲得socket指標,然後將使用者空間地址移到核心空間,然後調用函數inet_stream_connect函數。

int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,int addr_len, int flags){struct sock *sk = sock->sk;int err;long timeo;if (addr_len < sizeof(uaddr->sa_family))return -EINVAL;lock_sock(sk);......................switch (sock->state) {default:err = -EINVAL;goto out;case SS_CONNECTED:err = -EISCONN;goto out;case SS_CONNECTING:err = -EALREADY;/* Fall out of switch with err, set for this state */break;case SS_UNCONNECTED:err = -EISCONN;if (sk->sk_state != TCP_CLOSE)goto out;err = sk->sk_prot->connect(sk, uaddr, addr_len);if (err < 0)goto out;sock->state = SS_CONNECTING;err = -EINPROGRESS;break;}timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {/* Error code is set above */if (!timeo || !inet_wait_for_connect(sk, timeo))goto out;err = sock_intr_errno(timeo);if (signal_pending(current))goto out;}/* Connection was closed by RST, timeout, ICMP error * or another process disconnected us. */if (sk->sk_state == TCP_CLOSE)goto sock_error;sock->state = SS_CONNECTED;err = 0;out:release_sock(sk);return err;sock_error:err = sock_error(sk) ? : -ECONNABORTED;sock->state = SS_UNCONNECTED;if (sk->sk_prot->disconnect(sk, flags))sock->state = SS_DISCONNECTING;goto out;}

調用函數tcp_v4_connect函數後然後將sock的狀態置SS_CONNECTING。

int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len){struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;struct inet_sock *inet = inet_sk(sk);struct tcp_sock *tp = tcp_sk(sk);__be16 orig_sport, orig_dport;__be32 daddr, nexthop;struct flowi4 *fl4;struct rtable *rt;int err;struct ip_options_rcu *inet_opt;        //合法性檢查if (addr_len < sizeof(struct sockaddr_in))return -EINVAL;if (usin->sin_family != AF_INET)return -EAFNOSUPPORT;        //記錄嚇一跳地址和目的地址nexthop = daddr = usin->sin_addr.s_addr;inet_opt = rcu_dereference_protected(inet->inet_opt,     sock_owned_by_user(sk));if (inet_opt && inet_opt->opt.srr) {if (!daddr)return -EINVAL;nexthop = inet_opt->opt.faddr;}        //本地連接埠和目的連接埠orig_sport = inet->inet_sport;orig_dport = usin->sin_port;fl4 = &inet->cork.fl.u.ip4;rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,      RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,      IPPROTO_TCP,      orig_sport, orig_dport, sk, true);//維護路由表if (IS_ERR(rt)) {err = PTR_ERR(rt);if (err == -ENETUNREACH)IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);return err;}        //處理多播或廣播if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {ip_rt_put(rt);return -ENETUNREACH;}if (!inet_opt || !inet_opt->opt.srr)daddr = fl4->daddr;if (!inet->inet_saddr)inet->inet_saddr = fl4->saddr;inet->inet_rcv_saddr = inet->inet_saddr;if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {/* Reset inherited state */tp->rx_opt.ts_recent   = 0;tp->rx_opt.ts_recent_stamp = 0;tp->write_seq   = 0;}if (tcp_death_row.sysctl_tw_recycle &&    !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) {struct inet_peer *peer = rt_get_peer(rt, fl4->daddr);/* * VJ's idea. We save last timestamp seen from * the destination in peer table, when entering state * TIME-WAIT * and initialize rx_opt.ts_recent from it, * when trying new connection. */if (peer) {inet_peer_refcheck(peer);if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;tp->rx_opt.ts_recent = peer->tcp_ts;}}}        //設定通訊端中的目的連接埠和目的地址inet->inet_dport = usin->sin_port;inet->inet_daddr = daddr;inet_csk(sk)->icsk_ext_hdr_len = 0;if (inet_opt)inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;//設定sk的狀態為TCP_SYN_SENTtcp_set_state(sk, TCP_SYN_SENT);err = inet_hash_connect(&tcp_death_row, sk);if (err)goto failure;rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,       inet->inet_sport, inet->inet_dport, sk);if (IS_ERR(rt)) {err = PTR_ERR(rt);rt = NULL;goto failure;}/* OK, now commit destination to socket.  */sk->sk_gso_type = SKB_GSO_TCPV4;sk_setup_caps(sk, &rt->dst);if (!tp->write_seq)tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,   inet->inet_daddr,   inet->inet_sport,   usin->sin_port);inet->inet_id = tp->write_seq ^ jiffies;err = tcp_connect(sk);//建立SYN報文並發送,該函數實現過程挺複雜,需進行TCP串連初始化以及發送rt = NULL;if (err)goto failure;return 0;failure://失敗處理tcp_set_state(sk, TCP_CLOSE);ip_rt_put(rt);sk->sk_route_caps = 0;inet->inet_dport = 0;return err;}

3.2、接受串連

系統調用函數sys_accept實現如下:

SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,int __user *, upeer_addrlen){return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);}

調用系統調用sys_accept4

SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,int __user *, upeer_addrlen, int, flags){struct socket *sock, *newsock;struct file *newfile;int err, len, newfd, fput_needed;struct sockaddr_storage address;.......................sock = sockfd_lookup_light(fd, &err, &fput_needed);//根據fd獲得一個socketif (!sock)goto out;err = -ENFILE;newsock = sock_alloc();//重新建立一個新的socketif (!newsock)goto out_put;//複製通訊端部分屬性newsock->type = sock->type;newsock->ops = sock->ops;__module_get(newsock->ops->owner);//給建立的socket分配檔案結構,並返回新的檔案描述符newfd = sock_alloc_file(newsock, &newfile, flags);if (unlikely(newfd < 0)) {err = newfd;sock_release(newsock);goto out_put;}err = security_socket_accept(sock, newsock);if (err)goto out_fd;//調用inet_accept接受串連err = sock->ops->accept(sock, newsock, sock->file->f_flags);if (err < 0)goto out_fd;if (upeer_sockaddr) {//將地址資訊從核心移到使用者空間if (newsock->ops->getname(newsock, (struct sockaddr *)&address,  &len, 2) < 0) {err = -ECONNABORTED;goto out_fd;}err = move_addr_to_user((struct sockaddr *)&address,len, upeer_sockaddr, upeer_addrlen);if (err < 0)goto out_fd;}/* File flags are not inherited via accept() unlike another OSes. *///安裝檔案描述符fd_install(newfd, newfile);err = newfd;out_put:fput_light(sock->file, fput_needed);out:return err;out_fd:fput(newfile);put_unused_fd(newfd);goto out_put;}

該函數建立一個新的通訊端,設定用戶端串連並喚醒用戶端並返回一個新的檔案描述符fd。

下面是inet_accept函數的實現

int inet_accept(struct socket *sock, struct socket *newsock, int flags){struct sock *sk1 = sock->sk;int err = -EINVAL;struct sock *sk2 = sk1->sk_prot->accept(sk1, flags, &err);//調用inet_csk_accept函數從隊列icsk_accept_queue取出已經串連的通訊端if (!sk2)goto do_err;lock_sock(sk2);sock_rps_record_flow(sk2);WARN_ON(!((1 << sk2->sk_state) &  (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE)));sock_graft(sk2, newsock);newsock->state = SS_CONNECTED;//設定通訊端狀態err = 0;release_sock(sk2);do_err:return err;}

4、關閉串連

關閉一個socket串連,系統調用sys_shutdown

SYSCALL_DEFINE2(shutdown, int, fd, int, how){int err, fput_needed;struct socket *sock;sock = sockfd_lookup_light(fd, &err, &fput_needed);if (sock != NULL) {err = security_socket_shutdown(sock, how);if (!err)err = sock->ops->shutdown(sock, how);fput_light(sock->file, fput_needed);}return err;}

函數最後調用inet_shutdown關閉通訊端

int inet_shutdown(struct socket *sock, int how){struct sock *sk = sock->sk;int err = 0;.................lock_sock(sk);if (sock->state == SS_CONNECTING) {if ((1 << sk->sk_state) &    (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_CLOSE))sock->state = SS_DISCONNECTING;elsesock->state = SS_CONNECTED;}switch (sk->sk_state) {case TCP_CLOSE:err = -ENOTCONN;default:sk->sk_shutdown |= how;if (sk->sk_prot->shutdown)sk->sk_prot->shutdown(sk, how);//調用tcp_shutdown強制關閉串連break;/* Remaining two branches are temporary solution for missing * close() in multithreaded environment. It is _not_ a good idea, * but we have no choice until close() is repaired at VFS level. */case TCP_LISTEN:if (!(how & RCV_SHUTDOWN))break;/* Fall through */case TCP_SYN_SENT:err = sk->sk_prot->disconnect(sk, O_NONBLOCK);//調用tcp_disconnect中斷連線sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;//設定通訊端狀態break;}sk->sk_state_change(sk);release_sock(sk);return err;}

後面會詳細分析TCP協議的發送和接收過程。

相關文章

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.