Implementation mechanism and example analysis of Linux Select and poll

Source: Internet
Author: User
Tags goto

We're up to the top. With file operations combined with select and poll, blocking operations can be implemented.

Select interface:
int select (int Nfds, fd_set *readset, Fd_set *writeset,
Fd_set *exceptset, struct timeval *timeout);

which

Nfds
The number of file descriptors that need to be checked, the value should be the maximum number in the three groups of Fd_set
Larger, rather than the total number of actual file descriptors.
Readset
A set of file descriptors used to check readability.
Writeset
A set of file descriptors that are used to check for the writable character.
Exceptset
The file descriptor used to check for unexpected status. (Note: The error is not an unexpected state)
Timeout
The null pointer represents an infinite wait, otherwise it is a pointer to the TIMEVAL structure, representing the most
Long wait time. (if both tv_sec and tv_usec are equal to 0, the file descriptor
The state is not affected, but the function is not suspended)

Instance:
Jniexport jint jnicall nativettyselect (jnienv* env, jclass jclazz, int fd) {

int select_ret = 0;

Fd_set RfDs;

struct Timeval TV;

Fd_zero (&rfds);

Fd_set (FD, &rfds);

Tv.tv_sec = 1;

tv.tv_usec = 0;


Select_ret = Select (FD + 1, &rfds, NULL, NULL, &TV);

return select_ret;

}

When select is called above, it is called to Do_select in the kernel, which blocks:
int do_select (int n, fd_set_bits *fds, struct timespec *end_time)
{
ktime_t expire, *to = NULL;
struct Poll_wqueues table;
Poll_table *wait;
int retval, I, timed_out = 0;
unsigned long slack = 0;

Rcu_read_lock ();
retval = MAX_SELECT_FD (n, FDS);
Rcu_read_unlock ();

if (retval < 0)
return retval;
n = retval;

Poll_initwait (&table);
wait = &table.pt;
if (end_time &&!end_time->tv_sec &&!end_time->tv_nsec) {
Wait->_qproc = NULL;
Timed_out = 1;
}

if (end_time &&!timed_out)
Slack = Select_estimate_accuracy (end_time);

retval = 0;
for (;;) {
unsigned long *rinp, *ROUTP, *rexp, *INP, *OUTP, *exp;

INP = fds->in; OUTP = fds->out; Exp = fds->ex;
RINP = fds->res_in; ROUTP = fds->res_out; Rexp = fds->res_ex;

for (i = 0; i < n; ++rinp, ++ROUTP, ++rexp) {
unsigned long in, out, ex, all_bits, bit = 1, mask, J;
unsigned long res_in = 0, res_out = 0, res_ex = 0;
const struct File_operations *f_op = NULL;
struct file *file = NULL;

in = *inp++; out = *outp++; ex = *exp++;
All_bits = in | Out | Ex
if (all_bits = = 0) {
i + = __nfdbits;
Continue
}

for (j = 0; j < __nfdbits; ++j, ++i, bit <<= 1) {
int fput_needed;
if (i >= N)
Break
if (! ( Bit & all_bits))
Continue
File = Fget_light (i, &fput_needed);
if (file) {
F_op = file->f_op;
mask = Default_pollmask;
if (f_op && f_op->poll) {
Wait_key_set (Wait, in, out, bit);
Mask = (*f_op->poll) (file, wait);//Will wait incoming poll
}
Fput_light (file, fput_needed);
if ((Mask & Pollin_set) && (in & bit)) {
res_in |= bit;
retval++;
Wait->_qproc = NULL;
}
if (Mask & Pollout_set) && (out & bit)) {
Res_out |= bit;
retval++;
Wait->_qproc = NULL;
}
if ((Mask & Pollex_set) && (ex & Bit)) {
RES_EX |= bit;
retval++;
Wait->_qproc = NULL;
}
}
}
if (res_in)
*RINP = res_in;
if (res_out)
*ROUTP = Res_out;
if (RES_EX)
*rexp = RES_EX;
Cond_resched ();//sleep and wait for wake up, so where exactly is wakeup? There will be analysis below.
}
Wait->_qproc = NULL;
if (retval | | timed_out | | signal_pending (current))
Break
if (table.error) {
retval = Table.error;
Break
}

/*
* If This is the first loop and we have a timeout
* Given, then we convert to ktime_t and set the
* Pointer to the expiry value.
*/
if (end_time &&!to) {
expire = Timespec_to_ktime (*end_time);
to = &expire;
}

if (!poll_schedule_timeout (&table, Task_interruptible,
To, Slack))
Timed_out = 1;
}

Poll_freewait (&table);

return retval;
}


FS poll in the kernel:

struct Sysfs_open_dirent {
atomic_t refcnt;
atomic_t event;
Wait_queue_head_t poll;
struct List_head buffers; /* goes through sysfs_buffer.list */
};

static unsigned int sysfs_poll (struct file *filp, poll_table *wait)
{
struct Sysfs_buffer * buffer = filp->private_data;
struct Sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata;
struct Sysfs_open_dirent *od = attr_sd->s_attr.open;

/* Need parent for the kobj, grab both */
if (!sysfs_get_active (ATTR_SD))
Goto Trigger;

Poll_wait (FILP, &od->poll, wait);//add poll wait queue

Sysfs_put_active (ATTR_SD);

if (buffer->event! = Atomic_read (&od->event))
Goto Trigger;

return default_pollmask;

Trigger
Buffer->needs_read_fill = 1;
Return default_pollmask| pollerr| Pollpri;
}

Wakeup

void Sysfs_notify_dirent (struct sysfs_dirent *sd)
{
struct Sysfs_open_dirent *od;
unsigned long flags;

Spin_lock_irqsave (&sysfs_open_dirent_lock, flags);

OD = sd->s_attr.open;
if (OD) {
Atomic_inc (&od->event);
Wake_up_interruptible (&od->poll);//Wake up here
}

Spin_unlock_irqrestore (&sysfs_open_dirent_lock, flags);
}
EXPORT_SYMBOL_GPL (sysfs_notify_dirent);

/* Wakeup The userspace poll */
Sysfs_notify (Kobj, NULL, "xxxx");



Example: How does the serial port implement blocking reads? Contains the underlying analysis.

TTY Poll:
static unsigned int n_tty_poll (struct tty_struct *tty, struct file *file,
Poll_table *wait)
{
unsigned int mask = 0;

Add read and write wait queue
Poll_wait (file, &tty->read_wait, wait);
Poll_wait (file, &tty->write_wait, wait);
if (input_available_p (TTY), Time_char (TTY), 0:min_char (TTY)))
Mask |= Pollin | Pollrdnorm;
if (Tty->packet && tty->link->ctrl_status)
Mask |= Pollpri | Pollin | Pollrdnorm;
if (Test_bit (tty_other_closed, &tty->flags))
Mask |= pollhup;
if (tty_hung_up_p (file))
Mask |= pollhup;
if (! ( Mask & (Pollhup | Pollin | Pollrdnorm)) {
if (Min_char (TTY) &&! Time_char (TTY))
Tty->minimum_to_wake = Min_char (TTY);
Else
Tty->minimum_to_wake = 1;
}
if (Tty->ops->write &&!tty_is_writelocked (TTY) &&
Tty_chars_in_buffer (TTY) < Wakeup_chars &&
Tty_write_room (TTY) > 0)
Mask |= Pollout | Pollwrnorm;
return mask;
}



Wakeup
The read process is awakened by WAKE_UP (&tty->read_wait).
The following function, Flush_to_ldisc, is called tty_insert_flip_string () after each time there is data to interrupt the receive read Buferr.
The Flush_to_ldisc is then dispatched from the kernel buffer push Bufer to the upper layer.

/**
* Flush_to_ldisc
* @work: TTY structure passed from work queue.
*
* This routine are called out of the software interrupt to flush data
* from the buffer chain to the line discipline.
*
* Locking:holds Tty->buf.lock to guard buffer list. Drops the Lock
* While invoking the line discipline Receive_buf method. The
* Receive_buf method is a single threaded for each TTY instance.
*/

static void Flush_to_ldisc (struct work_struct *work)
{
struct Tty_struct *tty =
Container_of (work, struct tty_struct, buf.work);
unsigned long flags;
struct Tty_ldisc *disc;

Disc = Tty_ldisc_ref (TTY);
if (disc = = NULL)/*! TTY_LDISC * *
Return

Spin_lock_irqsave (&tty->buf.lock, flags);

if (!test_and_set_bit (tty_flushing, &tty->flags)) {
struct Tty_buffer *head;
while ((head = tty->buf.head) = NULL) {
int count;
Char *char_buf;
unsigned char *flag_buf;

Count = head->commit-head->read;
if (!count) {
if (Head->next = = NULL)
Break
Tty->buf.head = head->next;
Tty_buffer_free (TTY, head);
Continue
}
/* Ldisc or user is trying to flush the buffers
We are feeding to the ldisc, stop feeding the
Line discipline as we want to empty the queue */
if (Test_bit (tty_flushpending, &tty->flags))
Break
if (!tty->receive_room)
Break
if (Count > Tty->receive_room)
Count = tty->receive_room;
Char_buf = head->char_buf_ptr + head->read;
Flag_buf = head->flag_buf_ptr + head->read;
Head->read + = count;
Spin_unlock_irqrestore (&tty->buf.lock, flags);
Disc->ops->receive_buf (TTY, CHAR_BUF,
Flag_buf, Count);
Spin_lock_irqsave (&tty->buf.lock, flags);
}
Clear_bit (tty_flushing, &tty->flags);
}

/* We may have a deferred request to flush the input buffer,
If so pull the chain under the lock and empty the queue */
if (Test_bit (tty_flushpending, &tty->flags)) {
__tty_buffer_flush (TTY);
Clear_bit (tty_flushpending, &tty->flags);
WAKE_UP (&tty->read_wait);
}
Spin_unlock_irqrestore (&tty->buf.lock, flags);

Tty_ldisc_deref (disc);
}

For when wake up, the individual thinks more is achieved at the end of the interruption.

Implementation mechanism and example analysis of Linux Select and poll

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.