linux poll()實現分析

來源:互聯網
上載者:User

struct pollfd {
    int fd;     //當前描述符
    short events;     //進程關心的該描述符的事件
    short revents;    //返回的事件
};

asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds,
            long timeout_msecs)
{
    s64 timeout_jiffies;

//逾時時間處理
    if (timeout_msecs > 0) {
#if HZ > 1000
        /* We can only overflow if HZ > 1000 */
        if (timeout_msecs / 1000 > (s64)0x7fffffffffffffffULL / (s64)HZ)
            timeout_jiffies = -1;
        else
#endif
            timeout_jiffies = msecs_to_jiffies(timeout_msecs);
    } else {
        /* Infinite (< 0) or no (0) timeout */
        timeout_jiffies = timeout_msecs;
    }

//實際處理函數
    return do_sys_poll(ufds, nfds, &timeout_jiffies);
}

struct poll_list {
    struct poll_list *next;
    int len;
    struct pollfd entries[0];
};

int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout)
{
    struct poll_wqueues table;
     int fdcount, err;
     unsigned int i;
    struct poll_list *head;
     struct poll_list *walk;
    /* Allocate small arguments on the stack to save memory and be
       faster - use long to make sure the buffer is aligned properly
       on 64 bit archs to avoid unaligned access */
    long stack_pps[POLL_STACK_ALLOC/sizeof(long)]; //棧的分配會更快
    struct poll_list *stack_pp = NULL;

    //檢查描述符個數是否超過系統的限制
    /* Do a sanity check on nfds ... */
    if (nfds > current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
        return -EINVAL;

    //首先是一個初始化工作, 主要初始化poll_table這個函數指標
    poll_initwait(&table);

    head = NULL;
    walk = NULL;
    i = nfds;
    err = -ENOMEM;
    //這個迴圈所作的工作就是將從使用者傳過來的多個pollfd結構資訊拷貝到核心,
        //由於可能結構的個數可能超過一頁記憶體所能儲存的範圍,所以就用了迴圈來完成,
        //每次拷貝一頁記憶體能裝載的個數。並且再將它們用鏈錶鏈起來。
    while(i!=0) {
        struct poll_list *pp;
        int num, size;
        if (stack_pp == NULL)
            num = N_STACK_PPS;
        else
            num = POLLFD_PER_PAGE; //這裡保證kmalloc分配的空間不會超過一個頁面
        if (num > i)
            num = i;
        size = sizeof(struct poll_list) + sizeof(struct pollfd)*num;

        //如果描述符的個數比較小時,或在比較大的時候,第一次會使用棧來儲存
        if (!stack_pp)
            stack_pp = pp = (struct poll_list *)stack_pps;
        else {
            pp = kmalloc(size, GFP_KERNEL);
            if (!pp)
                goto out_fds;
        }
        pp->next=NULL;
        pp->len = num;
        if (head == NULL)
            head = pp;
        else
            walk->next = pp;

        walk = pp;
        if (copy_from_user(pp->entries, ufds + nfds-i,
                sizeof(struct pollfd)*num)) {
            err = -EFAULT;
            goto out_fds;
        }
        i -= pp->len;
    }

    //真正的POLL操作,返回的結果在head中
    fdcount = do_poll(nfds, head, &table, timeout);

    //雙重迴圈,將事件拷貝回給使用者空間
    /* OK, now copy the revents fields back to user space. */
    walk = head;
    err = -EFAULT;
    while(walk != NULL) {
        struct pollfd *fds = walk->entries;
        int j;

        for (j=0; j < walk->len; j++, ufds++) {
            if(__put_user(fds[j].revents, &ufds->revents))
                goto out_fds;
        }
        walk = walk->next;
    }
    err = fdcount;
    if (!fdcount && signal_pending(current))
        err = -EINTR;

    //以下是釋放空間
out_fds:
    walk = head;
    while(walk!=NULL) {
        struct poll_list *pp = walk->next;
        if (walk != stack_pp)
            kfree(walk);
        walk = pp;
    }
    poll_freewait(&table);
    return err;
}

//這個函數就是將當前進程加入等待隊列,這個等待隊列由驅動或檔案系統或網路通訊協定棧來提供
//這個函數是由驅動的file->poll中調用poll_wait()來間接調用的。
/* Add a new entry */
static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
                poll_table *p)
{
    struct poll_table_entry *entry = poll_get_entry(p);
    if (!entry)
        return;
    get_file(filp);
    entry->filp = filp;
    entry->wait_address = wait_address;
    init_waitqueue_entry(&entry->wait, current);
    add_wait_queue(wait_address,&entry->wait);
}

void poll_initwait(struct poll_wqueues *pwq)
{
    //在poll()中初始化為__pollwait(),注意在epoll中又會不同
    init_poll_funcptr(&pwq->pt, __pollwait);
    pwq->error = 0;
    pwq->table = NULL;
    pwq->inline_index = 0;
}

===========================================

static int do_poll(unsigned int nfds, struct poll_list *list,
           struct poll_wqueues *wait, s64 *timeout)
{
    int count = 0;
    poll_table* pt = &wait->pt;

    /* Optimise the no-wait case */
    if (!(*timeout))   //進程不設逾時
        pt = NULL;

    for (;;) {
        struct poll_list *walk;
        long __timeout;

        也是一個雙重迴圈,處理每個檔案描述符事件
        set_current_state(TASK_INTERRUPTIBLE);
        for (walk = list; walk != NULL; walk = walk->next) {
            struct pollfd * pfd, * pfd_end;

            pfd = walk->entries;
            pfd_end = pfd + walk->len;
            for (; pfd != pfd_end; pfd++) {
                /*
                * Fish for events. If we found one, record it
                * and kill the poll_table, so we don't
                * needlessly register any other waiters after
                * this. They'll get immediately deregistered
                * when we break out and return.
                */
                if (do_pollfd(pfd, pt)) { //處理每個檔案描述符
                    count++;
                    pt = NULL;
                }
            }
        }

        //逾時處理
        /*
        * All waiters have already been registered, so don't provide
        * a poll_table to them on the next loop iteration.
        */
        pt = NULL;
        if (count || !*timeout || signal_pending(current))
            break;
        count = wait->error;
        if (count)
            break;

        if (*timeout < 0) {
            /* Wait indefinitely */
            __timeout = MAX_SCHEDULE_TIMEOUT;
        } else if (unlikely(*timeout >= (s64)MAX_SCHEDULE_TIMEOUT-1)) {
            /*
            * Wait for longer than MAX_SCHEDULE_TIMEOUT. Do it in
            * a loop
            */
            __timeout = MAX_SCHEDULE_TIMEOUT - 1;
            *timeout -= __timeout;
        } else {
            __timeout = *timeout;
            *timeout = 0;
        }
        //進程切換
        __timeout = schedule_timeout(__timeout);
        //進程被喚醒, 繼續執行
        if (*timeout >= 0)
            *timeout += __timeout;
    }
    __set_current_state(TASK_RUNNING);
    return count;
}

/*
* Fish for pollable events on the pollfd->fd file descriptor. We're only
* interested in events matching the pollfd->events mask, and the result
* matching that mask is both recorded in pollfd->revents and returned. The
* pwait poll_table will be used by the fd-provided poll handler for waiting,
* if non-NULL.
*/
static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait)
{
    unsigned int mask;
    int fd;

    mask = 0;
    fd = pollfd->fd;
    if (fd >= 0) {
        int fput_needed;
        struct file * file;

        file = fget_light(fd, &fput_needed);
        mask = POLLNVAL;
        if (file != NULL) {
            mask = DEFAULT_POLLMASK;

            //調用驅動或檔案系統的poll函數, 是否將當前進程加入驅動的等待隊列,
                      //取決是file->poll()第二個參數是否為空白.
            if (file->f_op && file->f_op->poll)
                mask = file->f_op->poll(file, pwait);
            /* Mask out unneeded events. */
            mask &= pollfd->events | POLLERR | POLLHUP;
            fput_light(file, fput_needed);
        }
    }
    pollfd->revents = mask; //更新參數傳回值

    return mask; //如果可讀/寫返回非0值
}

=================================
驅動或檔案系統的poll()實現原型:
test_poll(struct file *filep, poll_table *wait)
{
    ...
    poll_wait(filep, &dev->wait_queue_head, wait);
    ...

    if (dev->readable)
        mask |= POLLIN | POLLRDNORM;

    if (dev->writable)
        mask |= POLLOUT | POLLWRNORM;

    ...

}

static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p)
{
    if (p && wait_address)
        p->qproc(filp, wait_address, p); //這個函數就是上面又poll_initwait()初始化的__pollwait()了.
}

 

 

 

來自:http://dev.firnow.com/course/6_system/linux/Linuxjs/2008929/146653.html

相關文章

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.