[Network programming] socket creation process

Source: Internet
Author: User
Analyze the socket creation process today About how the user-mode socket enters the kernel state to sys_socke through the system call, so we will not analyze it here. Let's look at the kernel-mode socket entry.

Syscall_define3 (socket, Int, family, Int, type, Int, Protocol)
{
Int retval;
Struct socket * sock;
Int flags;

/* Check the SOCK _ * constants for consistency .*/
Build_bug_on (sock_cloexec! = O_cloexec );
Build_bug_on (sock_max | sock_type_mask )! = Sock_type_mask );
Build_bug_on (sock_cloexec & sock_type_mask );
Build_bug_on (sock_nonblock & sock_type_mask );

Flags = type &~ Sock_type_mask;
If (flags &~ (Sock_cloexec | sock_nonblock ))
Return-einval;
Type & = sock_type_mask;

If (sock_nonblock! = O_nonblock & (flags & sock_nonblock ))
Flags = (flags &~ Sock_nonblock) | o_nonblock; the preceding parameters are checked and configured.

Retval = sock_create (family, type, protocol, & sock); Create sock
If (retval <0)
Goto out;

Retval = sock_map_fd (sock, flags & (o_cloexec | o_nonblock ));
Association with file systems
If (retval <0)
Goto out_release;

Out:
/* It may be already another descriptor 8) Not kernel problem .*/
Return retval;

Out_release:
Sock_release (sock );
Return retval;
}

Int sock_create (INT family, int type, int protocol, struct socket ** res)
{
Return _ sock_create (current-> nsproxy-> net_ns, family, type, protocol, res, 0 );
}

Static int _ sock_create (struct net * Net, int family, int type, int protocol,
Struct socket ** res, int Kern)-> sock = sock_alloc ();-> pF = rcu_dereference (net_families [Family]); obtain the protocol family operation table-> err = PF-> Create (net, Sock, Protocol); call the protocol family operation to create a function
Static struct socket * sock_alloc (void)
{
Struct inode * inode;
Struct socket * sock;

Inode = new_inode (sock_mnt-> mnt_sb); Create an inode structure in the Network File System
If (! Inode)
Return NULL;

Sock = socket_ I (inode); obtain the socket structure from the inode Structure

Kmemcheck_annotate_bitfield (sock, type );
Inode-> I _mode = s_ifsock | s_irwxugo;
Inode-> I _uid = current_fsuid ();
Inode-> I _gid = current_fsgid ();

Percpu_add (sockets_in_use, 1 );
Return sock;
}

-> PF = rcu_dereference (net_families [Family]); The net_families array in the protocol family operation table is registered through sock_register.
Int sock_register (const struct net_proto_family * OPS)
{
Int err;

If (OPS-> family> = nproto ){
Printk (kern_crit "protocol % d> = nproto (% d) \ n", OPS-> family,
Nproto );
Return-enobufs;
}

Spin_lock (& net_family_lock );
If (net_families [OPS-> family])
Err =-eexist;
Else {
Net_families [OPS-> family] = OPS;
Err = 0;
}
Spin_unlock (& net_family_lock );

Printk (kern_info "Net: Registered protocol family % d \ n", OPS-> family );
Return err;
} If we call socket (pf_inet, sock_stream, 0); In inet_init () (void) sock_register (& inet_family_ops ); register static struct net_proto_family inet_family_ops = {
. Family = pf_inet,
. Create = inet_create,
. Owner = this_module,
}; Let's take a look at the inet_create function.

Static int inet_create (struct net * Net, struct socket * sock, int Protocol)
{
Struct sock * SK;
Struct inet_protosw * answer;
Struct inet_sock * inet;
Struct proto * answer_prot;
Unsigned char answer_flags;
Char answer_no_check;
Int try_loading_module = 0;
Int err;

If (unlikely (! Inet_ehash_secret) encryption Item Processing
If (sock-> type! = Sock_raw & sock-> type! = Sock_dgram)
Build_ehash_secret ();

Sock-> state = ss_unconnected; set the socket status to unconnected

/* Look for the requested type/protocol pair .*/
Lookup_protocol:
Err =-esocktnosupport;
Rcu_read_lock ();
List_for_each_entry_rcu (answer, & inetsw [sock-> type], list) {check whether the kernel has registered protocol processing

The inetsw structure is registered by the void inet_register_protosw (struct inet_protosw * P) function.
In inet_init (), for (q = inetsw_array; q <& inetsw_array [inetsw_array_len]; ++ q)

Inet_register_protosw (Q );

Tracing Static struct inet_protosw inetsw_array [] =

{
{
. Type = sock_stream, stream socket
. Protocol = ipproto_tcp,
. Prot = & tcp_prot,
. Ops = & inet_stream_ops,
. Capability =-1,
. No_check = 0,
. Flags = inet_protosw_permanent |
Inet_protosw_icsk,
},

{
. Type = sock_dgram, data PACKET socket
. Protocol = ipproto_udp,
. Prot = & udp_prot,
. Ops = & inet_dgram_ops,
. Capability =-1,
. No_check = udp_csum_default,
. Flags = inet_protosw_permanent,
},

{
. Type = sock_raw, original socket
. Protocol = ipproto_ip,/* wild card */
. Prot = & raw_prot,
. Ops = & inet_sockraw_ops,
. Capability = cap_net_raw,
. No_check = udp_csum_default,
. Flags = inet_protosw_reuse,
}
}

Sock-> Ops = answer-> OPS; that is,. Ops = & inet_stream_ops,

...

Err =-enobufs;
SK = sk_alloc (net, pf_inet, gfp_kernel, answer_prot); Create a sock structure. Take TCP as an example. answer_prot is set as inet_stream_ops to sock's sk_prot: SK-> sk_prot = Sk-> sk_prot_creator = prot;
If (Sk = NULL)
Goto out;

...

Sock_init_data (sock, SK); initialize the sock and socket structures, and associate the two

SK-> sk_destruct = inet_sock_destruct;
SK-> sk_protocol = protocol;
SK-> sk_backlog_rcv = Sk-> sk_prot-> backlog_rcv;

INet-> uc_ttl =-1;
INet-> mc_loop = 1;
INet-> mc_ttl = 1;
INet-> mc_all = 1;
INet-> mc_index = 0;
INet-> mc_list = NULL;

Sk_refcnt_debug_inc (SK );

If (iNet-> num ){
/* It assumes that any protocol which allows
* The user to assign a number at socket
* Creation time automatically
* Shares.
*/
INet-> sport = htons (iNet-> num );
/* Add to Protocol hash chains .*/
SK-> sk_prot-> Hash (SK );
}

If (SK-> sk_prot-> init ){
Err = Sk-> sk_prot-> Init (SK );
If (ERR)
Sk_common_release (SK );
}
Out:
Return err;
Out_rcu_unlock:
Rcu_read_unlock ();
Goto out;
}

Const struct proto_ops inet_stream_ops = {A list of sock operation functions with the structure of TCP, including listen bind accept poll and other functions
. Family = pf_inet,
. Owner = this_module,
. Release = inet_release,
. Bind = inet_bind,
. Connect = inet_stream_connect,
. Socketpair = sock_no_socketpair,
. Accept = inet_accept,
. Getname = inet_getname,
. Poll = tcp_poll,
. IOCTL = inet_ioctl,
. Listen = inet_listen,
. Shutdown = inet_shutdown,
. Setsockopt = sock_common_setsockopt,
. Getsockopt = sock_common_getsockopt,
. Sendmsg = tcp_sendmsg,
. Recvmsg = sock_common_recvmsg,
. MMAP = sock_no_mmap,
. Sendpage = tcp_sendpage,
. Splice_read = tcp_splice_read,
# Ifdef config_compat
. Compat_setsockopt = compat_sock_common_setsockopt,
. Compat_getsockopt = compat_sock_common_getsockopt,
# Endif
}; Struct proto tcp_prot = {protocol layer processing function registered by the TCP protocol
. Name = "TCP ",
. Owner = this_module,
. Close = tcp_close,
. Connect = tcp_v4_connect,
. Disconnect = tcp_disconnect,
. Accept = inet_csk_accept,
. IOCTL = tcp_ioctl,
. Init = tcp_v4_init_sock,
. Destroy = tcp_v4_destroy_sock,
. Shutdown = tcp_shutdown,
. Setsockopt = tcp_setsockopt,
. Getsockopt = tcp_getsockopt,
. Recvmsg = tcp_recvmsg,
. Backlog_rcv = tcp_v4_do_rcv,
. Hash = inet_hash,
. Unhash = inet_unhash,
. Get_port = inet_csk_get_port,
. Enter_memory_pressure = tcp_enter_memory_pressure,
. Sockets_allocated = & tcp_sockets_allocated,
. Orphan_count = & tcp_orphan_count,
. Memory_allocated = & tcp_memory_allocated,
. Memory_pressure = & tcp_memory_pressure,
. Sysctl_mem = sysctl_tcp_mem,
. Sysctl_wmem = sysctl_tcp_wmem,
. Sysctl_rmem = sysctl_tcp_rmem,
. Max_header = max_tcp_header,
. Obj_size = sizeof (struct tcp_sock ),
. Slab_flags = slab_destroy_by_rcu,
. Twsk_prot = & tcp_timewait_sock_ops,
. Rsk_prot = & tcp_request_sock_ops,
. H. hashinfo = & tcp_hashinfo,
# Ifdef config_compat
. Compat_setsockopt = compat_tcp_setsockopt,
. Compat_getsockopt = compat_tcp_getsockopt,
# Endif
}; Sock_create (family, type, protocol, & sock); this is basically the process of creating a socket. inode is applied from the Network File System, and the socket structure is obtained and established, apply for a new sock structure and associate the two based on socket (family, type, flag) the input parameter protocol family type, stream socket, datagram socket, and other Mount corresponding processing function sets. Let's look at the work of sock_map_fd (sock, flags & (o_cloexec | o_nonblock ); Int sock_map_fd (struct socket * sock, int flags)
{
Struct file * newfile;
Int FD = sock_alloc_fd (& newfile, flags); assign the file number and structure to the socket.

If (likely (FD> = 0 )){
Int err = sock_attach_fd (sock, newfile, flags); associate the socket with the file structure

If (unlikely (ERR <0 )){
Put_filp (newfile );
Put_unused_fd (FD );
Return err;
}
Fd_install (FD, newfile); hook the file number with the file}
Return FD;
}

Static int sock_attach_fd (struct socket * sock, struct file * file, int flags)
{
Struct dentry * dentry;
Struct qstr name = {. Name = ""};

Dentry = d_alloc (sock_mnt-> mnt_sb-> s_root, & name); Allocate directory items
If (unlikely (! Dentry ))
Return-enomem;

Dentry-> d_op = & sockfs_dentry_operations;
/*
* We dont want to push this dentry into global dentry hash table.
* We pretend dentry is already hashed, by unsetting dcache_unhashed
* This permits a working/proc/$ PID/FD/XXX on sockets
*/
Dentry-> d_flags & = ~ Dcache_unhashed;
D_instantiate (dentry, sock_inode (sock); Associate directory items with inode

Sock-> file = file;
Init_file (file, sock_mnt, dentry, fmode_read | fmode_write,
& Socket_file_ops); assign values to the file structure members, and update file-> f_op to the operation function set of the socket_file_ops socket file.
Sock_inode (sock)-> I _fop = & socket_file_ops;
File-> f_flags = o_rdwr | (flags & o_nonblock );
File-> f_pos = 0;
File-> private_data = sock; the private data of file is updated to the socket variable.

Return 0;
} Abstract a figure in the source code Scene Analysis of Linux kernel. The main data structure is like this.

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.