Linux poll () Implementation Analysis

Source: Internet
Author: User

Struct pollfd {
Int FD; // current Descriptor
Short events; // event of the descriptor that the process cares about
Short revents; // returned event
};

Asmlinkage long sys_poll (struct pollfd _ User * ufds, unsigned int NFDs,
Long timeout_msecs)
{
S64 timeout_jiffies;

// Timeout Processing
If (timeout_msecs> 0 ){
# If Hz> 1000
/* We can only overflow if Hz> 1000 */
If (timeout_msecs/1000> (s64) 0x7fffffffffffffffull/(s64) Hz)
Timeout_jiffies =-1;
Else
# Endif
Timeout_jiffies = msecs_to_jiffies (timeout_msecs );
} Else {
/* Infinite (<0) or no (0) Timeout */
Timeout_jiffies = timeout_msecs;
}

// Actual processing function
Return do_sys_poll (ufds, NFDs, & timeout_jiffies );
}

Struct poll_list {
Struct poll_list * next;
Int Len;
Struct pollfd entries [0];
};

Int do_sys_poll (struct pollfd _ User * ufds, unsigned int NFDs, s64 * timeout)
{
Struct poll_wqueues table;
Int fdcount, err;
Unsigned int I;
Struct poll_list * head;
Struct poll_list * walk;
/* Allocate small arguments on the stack to save memory and be
Faster-use long to make sure the buffer is aligned properly
On 64 bit archs to avoid unaligned access */
Long stack_pps [poll_stack_alloc/sizeof (long)]; // stack allocation will be faster
Struct poll_list * stack_pp = NULL;

// Check whether the number of descriptors exceeds the System Limit
/* Do a sanity check on NFDs ...*/
If (NFDs> current-> signal-> rlim [rlimit_nofile]. rlim_cur)
Return-einval;

// The first step is initialization. It mainly initializes the function pointer poll_table.
Poll_initwait (& table );

Head = NULL;
Walk = NULL;
I = NFDs;
Err =-enomem;
// The task of this loop is to copy the structure information of multiple pollfd files from the user to the kernel,
// Because the number of possible structures may exceed the storage range of one page of memory, a loop is used to complete the process,
// The number of memories that can be loaded for one page each copy. And then chain them with chain tables.
While (I! = 0 ){
Struct poll_list * PP;
Int num, size;
If (stack_pp = NULL)
Num = n_stack_pps;
Else
Num = pollfd_per_page; // ensure that the space allocated by kmalloc does not exceed one page.
If (Num> I)
Num = I;
Size = sizeof (struct poll_list) + sizeof (struct pollfd) * num;

// If the number of descriptors is relatively small, or when it is relatively large, the stack will be used for storage for the first time
If (! Stack_pp)
Stack_pp = pp = (struct poll_list *) stack_pps;
Else {
Pp = kmalloc (size, gfp_kernel );
If (! Pp)
Goto out_fds;
}
PP-> next = NULL;
PP-> Len = num;
If (Head = NULL)
Head = pp;
Else
Walk-> next = pp;

Walk = pp;
If (copy_from_user (PP-> entries, ufds + NFDs-I,
Sizeof (struct pollfd) * num )){
Err =-efault;
Goto out_fds;
}
I-= PP-> Len;
}

// Real poll operation. The returned result is in the head.
Fdcount = do_poll (NFDs, Head, & table, timeout );

// Double loop: copy the event back to the user space
/* OK, now copy the revents fields back to user space .*/
Walk = head;
Err =-efault;
While (walk! = NULL ){
Struct pollfd * FDS = walk-> entries;
Int J;

For (j = 0; j <walk-> Len; j ++, ufds ++ ){
If (_ put_user (FDS [J]. revents, & ufds-> revents ))
Goto out_fds;
}
Walk = walk-> next;
}
Err = fdcount;
If (! Fdcount & signal_pending (current ))
Err =-eintr;

// Release space
Out_fds:
Walk = head;
While (walk! = NULL ){
Struct poll_list * PP = walk-> next;
If (walk! = Stack_pp)
Kfree (walk );
Walk = pp;
}
Poll_freewait (& table );
Return err;
}

// This function adds the current process to the waiting queue, which is provided by the driver, file system, or network protocol stack.
// This function is indirectly called by calling poll_wait () in the drive file-> poll.
/* Add a new entry */
Static void _ pollwait (struct file * filp, wait_queue_head_t * wait_address,
Poll_table * P)
{
Struct poll_table_entry * entry = poll_get_entry (P );
If (! Entry)
Return;
Get_file (filp );
Entry-> filp = filp;
Entry-> wait_address = wait_address;
Init_waitqueue_entry (& Entry-> wait, current );
Add_wait_queue (wait_address, & Entry-> wait );
}

Void poll_initwait (struct poll_wqueues * pwq)
{
// Initialize it to _ pollwait () in poll (). Note that it will be different in epoll
Init_poll_funcptr (& pwq-> PT, _ pollwait );
Pwq-> error = 0;
Pwq-> table = NULL;
Pwq-> inline_index = 0;
}

========================================================== ===

Static int do_poll (unsigned int NFDs, struct poll_list * List,
Struct poll_wqueues * Wait, s64 * timeout)
{
Int COUNT = 0;
Poll_table * PT = & wait-> pt;

/* Optimise the no-Wait case */
If (! (* Timeout) // No timeout is set for the process.
PT = NULL;

For (;;){
Struct poll_list * walk;
Long _ timeout;

It is also a dual loop to process each file descriptor event
Set_current_state (task_interruptible );
For (walk = List; walk! = NULL; walk = walk-> next ){
Struct pollfd * PFD, * pfd_end;

PFD = walk-> entries;
Pfd_end = PFD + walk-> Len;
For (; PFD! = Pfd_end; PFD ++ ){
/*
* Fish for events. If we found one, record it
* And kill the poll_table, so we don't
* Needlessly register any other waiters after
* This. They'll get immediately Deregistered
* When we break out and return.
*/
If (do_pollfd (PFD, pt) {// process each file descriptor
Count ++;
PT = NULL;
}
}
}

// Timeout Processing
/*
* All waiters have already been registered, so don't provide
* A poll_table to them on the next loop iteration.
*/
PT = NULL;
If (count |! * Timeout | signal_pending (current ))
Break;
Count = wait-> error;
If (count)
Break;

If (* timeout <0 ){
/* Wait indefinitely */
_ Timeout = max_schedule_timeout;
} Else if (unlikely (* timeout> = (s64) MAX_SCHEDULE_TIMEOUT-1 )){
/*
* Wait for longer than max_schedule_timeout. Do it in
* A loop
*/
_ Timeout = max_schedule_timeout-1;
* Timeout-= _ timeout;
} Else {
_ Timeout = * timeout;
* Timeout = 0;
}
// Process switching
_ Timeout = schedule_timeout (_ timeout );
// The process is awakened and continues to run
If (* timeout> = 0)
* Timeout + = _ timeout;
}
_ Set_current_state (task_running );
Return count;
}

/*
* Fish for pollable events on the pollfd-> FD file descriptor. We're only
* Interested in events matching the pollfd-> events mask, and the result
* Matching that mask is both recorded in pollfd-> revents and returned.
* Pwait poll_table will be used by the FD-provided poll handler for waiting,
* If non-null.
*/
Static inline unsigned int do_pollfd (struct pollfd * pollfd, poll_table * pwait)
{
Unsigned int mask;
Int FD;

Mask = 0;
FD = pollfd-> FD;
If (FD> = 0 ){
Int fput_needed;
Struct file * file;

File = fget_light (FD, & fput_needed );
Mask = pollnval;
If (file! = NULL ){
Mask = default_pollmask;

// Call the driver or file system's poll function to determine whether to add the current process to the driver's waiting queue,
// Depends on whether the second parameter file-> poll () is null.
If (file-> f_op & file-> f_op-> poll)
Mask = file-> f_op-> poll (file, pwait );
/* Mask out unneeded events .*/
Mask & = pollfd-> events | pollerr | pollhup;
Fput_light (file, fput_needed );
}
}
Pollfd-> revents = mask; // update the parameter Return Value

Return mask; // if it is readable or written, a non-0 value is returned.
}

======================================
The poll () implementation prototype of the driver or file system:
Test_poll (struct file * filep, poll_table * Wait)
{
...
Poll_wait (filep, & Dev-> wait_queue_head, wait );
...

If (Dev-> readable)
Mask | = Pollin | pollrdnorm;

If (Dev-> writable)
Mask | = pollout | pollwrnorm;

...

}

Static inline void poll_wait (struct file * filp, wait_queue_head_t * wait_address, poll_table * P)
{
If (P & wait_address)
P-> qproc (filp, wait_address, P); // This function is the _ pollwait () initialized by poll_initwait.
}

 

 

 

From:Http://dev.firnow.com/course/6_system/linux/Linuxjs/2008929/146653.html

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.