Linux select and poll implementation mechanism and instance analysis, poll instance analysis

Source: Internet
Author: User

Linux select and poll implementation mechanism and instance analysis, poll instance analysis
We can implement blocking operations until the upper layer combines select and poll for file operations. How exactly is this implemented?

Select Interface:
Int select (int nfds, fd_set * readset, fd_set * writeset,
Fd_set * effectset, struct timeval * timeout );

Where:

Nfds
Number of file descriptors to be checked. The value must be the largest among the three fd_sets.
Larger, not the total number of actual file descriptors.
Readset
A set of file descriptors used to check readability.
Writeset
A set of file descriptors used to check the writability.
Effectset
The file descriptor used to check the unexpected state. (Note: errors are not unexpected)
Timeout
NULL Pointer indicates infinite waiting; otherwise, it is a pointer to the timeval structure, representing
Long wait time. (If both TV _sec and TV _usec are equal to 0, the file descriptor
Is not affected, but the function is not suspended)

Instance:
JNIEXPORT jint JNICALL nativeTtySelect (JNIEnv * env, jclass jclazz, int fd ){

Int select_ret = 0;

Fd_set rfds;

Struct timeval TV;

FD_ZERO (& rfds );

FD_SET (fd, & rfds );

TV. TV _sec = 1;

TV. TV _usec = 0;


Select_ret = select (fd + 1, & rfds, NULL, NULL, & TV );

Return select_ret;

}

After the preceding select statement is called, it will be called to do_select in the kernel, which will block:
Int do_select (int n, fd_set_bits * fds, struct timespec * end_time)
{
Ktime_t expire, * to = NULL;
Struct poll_wqueues table;
Poll_table * wait;
Int retval, I, timed_out = 0;
Unsigned long slack = 0;

Rcu_read_lock ();
Retval = max_select_fd (n, fds );
Rcu_read_unlock ();

If (retval <0)
Return retval;
N = retval;

Poll_initwait (& table );
Wait = & table.pt;
If (end_time &&! End_time-> TV _sec &&! End_time-> TV _nsec ){
Wait-> _ qproc = NULL;
Timed_out = 1;
}

If (end_time &&! Timed_out)
Slack = select_estimate_accuracy (end_time );

Retval = 0;
For (;;){
Unsigned long * rinp, * routp, * rexp, * indium, * outp, * exp;

Indium = fds-> in; outp = fds-> out; exp = fds-> ex;
Rinp = fds-> res_in; routp = fds-> res_out; rexp = fds-> res_ex;

For (I = 0; I <n; ++ rinp, ++ routp, ++ rexp ){
Unsigned long in, out, ex, all_bits, bit = 1, mask, j;
Unsigned long res_in = 0, res_out = 0, res_ex = 0;
Const struct file_operations * f_op = NULL;
Struct file * file = NULL;

In = * indium ++; out = * outp ++; ex = * exp ++;
All_bits = in | out | ex;
If (all_bits = 0 ){
I + = _ NFDBITS;
Continue;
}

For (j = 0; j <_ NFDBITS; ++ j, ++ I, bit <= 1 ){
Int fput_needed;
If (I> = n)
Break;
If (! (Bit & all_bits ))
Continue;
File = fget_light (I, & fput_needed );
If (file ){
F_op = file-> f_op;
Mask = DEFAULT_POLLMASK;
If (f_op & f_op-> poll ){
Wait_key_set (wait, in, out, bit );
Mask = (* f_op-> poll) (file, wait); // call the driver poll interface, which will call poll_waite to add the queue to the linked list.
}
Fput_light (file, fput_needed );
If (mask & POLLIN_SET) & (in & bit )){
Res_in | = bit;
Retval ++;
Wait-> _ qproc = NULL;
}
If (mask & POLLOUT_SET) & (out & bit )){
Res_out | = bit;
Retval ++;
Wait-> _ qproc = NULL;
}
If (mask & POLLEX_SET) & (ex & bit )){
Res_ex | = bit;
Retval ++;
Wait-> _ qproc = NULL;
}
}
}
If (res_in)
* Rinp = res_in;
If (res_out)
* Routp = res_out;
If (res_ex)
* Rexp = res_ex;
Cond_resched (); // sleep and wait for wake up, where is the waeup? The following is an analysis.
}
Wait-> _ qproc = NULL;
If (retval | timed_out | signal_pending (current ))
Break;
If (table. error ){
Retval = table. error;
Break;
}

/*
* If this is the first loop and we have a timeout
* Given, then we convert to ktime_t and set the
* Pointer to the expiry value.
*/
If (end_time &&! To ){
Expire = timespec_to_ktime (* end_time );
To = & expire;
}

If (! Poll_schedule_timeout (& table, TASK_INTERRUPTIBLE,
To, slack ))
Timed_out = 1;
}

Poll_freewait (& table );

Return retval;
}


Fs poll in the kernel:

Struct sysfs_open_dirent {
Atomic_t refcnt;
Atomic_t event;
Wait_queue_head_t poll;
Struct list_head buffers;/* goes through sysfs_buffer.list */
};

Static unsigned int sysfs_poll (struct file * filp, poll_table * wait)
{
Struct sysfs_buffer * buffer = filp-> private_data;
Struct sysfs_dirent * attr_sd = filp-> f_path.dentry-> d_fsdata;
Struct sysfs_open_dirent * od = attr_sd-> s_attr.open;

/* Need parent for the kobj, grab both */
If (! Sysfs_get_active (attr_sd ))
Goto trigger;

Poll_wait (filp, & od-> poll, wait); // add poll wait queue

Sysfs_put_active (attr_sd );

If (buffer-> event! = Atomic_read (& od-> event ))
Goto trigger;

Return DEFAULT_POLLMASK;

Trigger:
Buffer-> needs_read_fill = 1;
Return DEFAULT_POLLMASK | POLLERR | POLLPRI;
}

Wakeup:

Void sysfs_notify_dirent (struct sysfs_dirent * sd)
{
Struct sysfs_open_dirent * od;
Unsigned long flags;

Spin_lock_irqsave (& sysfs_open_di1__lock, flags );

Od = sd-> s_attr.open;
If (od ){
Atomic_inc (& od-> event );
Wake_up_interruptible (& od-> poll); // wake up
}

Spin_unlock_irqrestore (& sysfs_open_di1__lock, flags );
}
EXPORT_SYMBOL_GPL (sysfs_policy_dirent );

/* Wakeup the userspace poll */

Sysfs_notify (kobj, NULL, "xxxx ");


So can the above method implement the current process blocking? Take a look at the commonly used wait_event_interruptible below. After adding the queue to the list, schedule () switches the cpu to execute other processes.

Process sleep, wake up is another process for wake up, so the implementation method is the same, so the combination of select and poll can fully implement process blocking.

# Define _ wait_event_interruptible (wq, condition, ret )\
Do {\
DEFINE_WAIT (_ wait );\
\
For (;;){\
Prepare_to_wait (& wq, & __ wait, TASK_INTERRUPTIBLE); // set wq
Add to queue list
If (condition )\
Break ;\
If (! Signal_pending (current )){\
Schedule (); \ // sleep and waite to wake up
Continue ;\
}\
Ret =-ERESTARTSYS ;\
Break ;\
}\
Finish_wait (& wq, & __ wait );\
} While (0)




Instance: How does one implement blocking reading through the serial port? Contains the underlying analysis.

Tty poll:
Static unsigned int n_tty_poll (struct tty_struct * tty, struct file * file,
Poll_table * wait)
{
Unsigned int mask = 0;

// Add read and write wait queue
Poll_wait (file, & tty-> read_wait, wait );
Poll_wait (file, & tty-> write_wait, wait );
If (input_available_p (tty, TIME_CHAR (tty )? 0: MIN_CHAR (tty )))
Mask | = POLLIN | POLLRDNORM;
If (tty-> packet & tty-> link-> ctrl_status)
Mask | = POLLPRI | POLLIN | POLLRDNORM;
If (test_bit (TTY_OTHER_CLOSED, & tty-> flags ))
Mask | = POLLHUP;
If (tty_hung_up_p (file ))
Mask | = POLLHUP;
If (! (Mask & (POLLHUP | POLLIN | POLLRDNORM ))){
If (MIN_CHAR (tty )&&! TIME_CHAR (tty ))
Tty-> minimum_to_wake = MIN_CHAR (tty );
Else
Tty-> minimum_to_wake = 1;
}
If (tty-> ops-> write &&! Tty_is_writelocked (tty )&&
Tty_chars_in_buffer (tty) <WAKEUP_CHARS &&
Tty_write_room (tty)> 0)
Mask | = POLLOUT | POLLWRNORM;
Return mask;
}



Wakeup:
Wake up the read process through wake_up (& tty-> read_wait.
The following flush_to_ldisc function calls tty_insert_flip_string () Every time data is received and interrupted (),
Then, flush_to_ldisc will be scheduled to push the bufer from the kernel buffer to the upper layer.

/**
* Flush_to_ldisc
* @ Work: tty structure passed from work queue.
*
* This routine is called out of the software interrupt to flush data
* From the buffer chain to the line discipline.
*
* Locking: holds tty-> buf. lock to guard buffer list. Drops the lock
* While invoking the line discipline receive_buf method.
* Receive_buf method is single threaded for each tty instance.
*/

Static void flush_to_ldisc (struct work_struct * work)
{
Struct tty_struct * tty =
Container_of (work, struct tty_struct, buf. work );
Unsigned long flags;
Struct tty_ldisc * disc;

Disc = tty_ldisc_ref (tty );
If (disc = NULL )/*! TTY_LDISC */
Return;

Spin_lock_irqsave (& tty-> buf. lock, flags );

If (! Test_and_set_bit (TTY_FLUSHING, & tty-> flags )){
Struct tty_buffer * head;
While (head = tty-> buf. head )! = NULL ){
Int count;
Char * char_buf;
Unsigned char * flag_buf;

Count = head-> commit-head-> read;
If (! Count ){
If (head-> next = NULL)
Break;
Tty-> buf. head = head-> next;
Tty_buffer_free (tty, head );
Continue;
}
/* Ldisc or user is trying to flush the buffers
We are feeding to the ldisc, stop feeding
Line discipline as we want to empty the queue */
If (test_bit (TTY_FLUSHPENDING, & tty-> flags ))
Break;
If (! Tty-> receive_room)
Break;
If (count> tty-> receive_room)
Count = tty-> receive_room;
Char_buf = head-> char_buf_ptr + head-> read;
Flag_buf = head-> flag_buf_ptr + head-> read;
Head-> read + = count;
Spin_unlock_irqrestore (& tty-> buf. lock, flags );
Disc-> ops-> receive_buf (tty, char_buf,
Flag_buf, count );
Spin_lock_irqsave (& tty-> buf. lock, flags );
}
Clear_bit (TTY_FLUSHING, & tty-> flags );
}

/* We may have a deferred request to flush the input buffer,
If so pull the chain under the lock and empty the queue */
If (test_bit (TTY_FLUSHPENDING, & tty-> flags )){
_ Tty_buffer_flush (tty );
Clear_bit (TTY_FLUSHPENDING, & tty-> flags );
Wake_up (& tty-> read_wait );
}
Spin_unlock_irqrestore (& tty-> buf. lock, flags );

Tty_ldisc_deref (disc );
}



Related Article

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.