標準的檔案讀過程(read)

來源:互聯網
上載者:User

1、準備工作。通過VFS層,及一些初始化操作,為真正的讀操作做準備。

首先是使用者進程通過read系統調用發出一個讀請求:

檔案/fs/read_write.c
asmlinkage ssize_t sys_read(unsigned int fd, char __user * buf, size_t count)
{
    struct file *file;
    ssize_t ret = -EBADF;
    int fput_needed;

    file = fget_light(fd, &fput_needed);
    if (file) {
        loff_t pos = file_pos_read(file);
        ret = vfs_read(file, buf, count, &pos);
        file_pos_write(file, pos);
        fput_light(file, fput_needed);
    }

    return ret;
}

然後通過VFS層操作:
ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
{
    ssize_t ret;

    if (!(file->f_mode & FMODE_READ))
        return -EBADF;
    if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read))
        return -EINVAL;
    if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
        return -EFAULT;

    ret = rw_verify_area(READ, file, pos, count);
    if (!ret) {
        ret = security_file_permission (file, MAY_READ);
        if (!ret) {
            if (file->f_op->read)
                ret = file->f_op->read(file, buf, count, pos);
            else
                ret = do_sync_read(file, buf, count, pos);
            if (ret > 0) {
                fsnotify_access(file->f_dentry);
                current->rchar += ret;
            }
            current->syscr++;
        }
    }

    return ret;
}

do_sync_read最後還是調用了aio_read()介面函數來完成讀操作,即在2.6中,aio_read()為同步和非同步讀操作的通用介面。
ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
{
    struct kiocb kiocb;
    ssize_t ret;

    init_sync_kiocb(&kiocb, filp);
    kiocb.ki_pos = *ppos;
    ret = filp->f_op->aio_read(&kiocb, buf, len, kiocb.ki_pos);
    if (-EIOCBQUEUED == ret)
        ret = wait_on_sync_kiocb(&kiocb);
    *ppos = kiocb.ki_pos;
    return ret;
}

對於ext2檔案系統,有:
const struct file_operations ext2_file_operations = {
       .llseek   = generic_file_llseek,
       .read     = generic_file_read,
       .write    = generic_file_write,
       .aio_read = generic_file_aio_read,
       .aio_write = generic_file_aio_write,
       .......
}

檔案/mm/filemap.c
ssize_t
generic_file_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
{
    struct iovec local_iov = { .iov_base = buf, .iov_len = count };
    struct kiocb kiocb;
    ssize_t ret;

    init_sync_kiocb(&kiocb, filp);
    ret = __generic_file_aio_read(&kiocb, &local_iov, 1, ppos);
    if (-EIOCBQUEUED == ret)
        ret = wait_on_sync_kiocb(&kiocb);
    return ret;
}

ssize_t
__generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
        unsigned long nr_segs, loff_t *ppos)
{
    ......
    do_generic_file_read(filp,ppos,&desc,file_read_actor);
    ......
}

檔案/include/linux/fs.h
static inline void do_generic_file_read(struct file * filp, loff_t *ppos,
                    read_descriptor_t * desc,
                    read_actor_t actor)
{
    do_generic_mapping_read(filp->f_mapping,
                &filp->f_ra,
                filp,
                ppos,
                desc,
                actor);
}

2、讀入操作。完成了上面的準備工作,下一步就是執行讀操作的核心函數do_generic_mapping_read,這是一個比較複雜的函數,裡面有大量的goto跳轉,但還是比較清晰的。

它工作過程可以描述如下:

a. 如果所要讀取的檔案在頁面緩衝中,則跳轉到步驟d。

b. 檔案還沒有被緩衝,所以要從裝置中去讀取,首先分配一個頁面,並將這個頁面鏈入到相應的address_space中去

c. 然後調用address_space中的readpage()函數,去從裝置中讀出一個頁面大小的資料到這個頁面緩衝中。

d. 檢查PageUptodate(page)

e. 調用由參數傳入的actor函數指標,在此為file_read_actor(),將資料中頁面緩衝中拷貝到使用者緩衝區。

f. 如果請求讀取的資料長度已完成,則函數返回,否則跳轉到步驟a重複執行。

先看看file_read_actor():

int file_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset, unsigned long size)

{

       char *kaddr;
       unsigned long left, count = desc->count;

       if (size > count)
              size = count;
       ……

       /* Do it the slow way */
       kaddr = kmap(page);
       left = __copy_to_user(desc->arg.buf, kaddr + offset, size); //將資料拷貝到使用者空間
       kunmap(page);

       if (left) {
              size -= left;
              desc->error = -EFAULT;

       }

success:
       desc->count = count - size;
       desc->written += size;
       desc->arg.buf += size;
       return size;

}

檔案/mm/filemap.c
/**
* This is a generic file read routine, and uses the
* mapping->a_ops->readpage() function for the actual low-level stuff.
*/
void do_generic_mapping_read(struct address_space *mapping,
                          struct file_ra_state *_ra,
                          struct file *filp,
                          loff_t *ppos,
                          read_descriptor_t *desc,
                          read_actor_t actor)
{
       struct inode *inode = mapping->host;
       unsigned long index;
       unsigned long end_index;
       unsigned long offset;
       unsigned long last_index;
       unsigned long next_index;
       unsigned long prev_index;
       loff_t isize;
       struct page *cached_page;
       int error;
       struct file_ra_state ra = *_ra;

       cached_page = NULL;
       index = *ppos >> PAGE_CACHE_SHIFT;
       next_index = index;
       prev_index = ra.prev_page;
       last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
       offset = *ppos & ~PAGE_CACHE_MASK;

       isize = i_size_read(inode);
       if (!isize)
              goto out;

       end_index = (isize - 1) >> PAGE_CACHE_SHIFT;

       for (;;) {
              struct page *page;
              unsigned long nr, ret;

              /* nr is the maximum number of bytes to copy from this page */
              nr = PAGE_CACHE_SIZE;
              if (index >= end_index) {
                     if (index > end_index)
                            goto out;

                     nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
                     if (nr <= offset) {
                            goto out;
                     }
              }
              nr = nr - offset;
              cond_resched();
              if (index == next_index)
                     next_index = page_cache_readahead(mapping, &ra, filp, index, last_index - index);

find_page:
              page = find_get_page(mapping, index); //在緩衝中尋找
              if (unlikely(page == NULL)) {
                     handle_ra_miss(mapping, &ra, index);
                     goto no_cached_page; //沒有找到
              }
              if (!PageUptodate(page)) //Uptodate
                     goto page_not_up_to_date;

page_ok: //找到了相關快取頁面面
              ret = actor(desc, page, offset, nr); //拷貝資料到使用者緩衝區
              //更新一些變數值
              offset += ret;
              index += offset >> PAGE_CACHE_SHIFT;
              offset &= ~PAGE_CACHE_MASK;
              page_cache_release(page);
              if (ret == nr && desc->count)
                     continue; //未完成,進入下一次迴圈
              goto out; //完成

page_not_up_to_date:
              /* Get exclusive access to the page ... */
              lock_page(page);
              /* Did it get truncated before we got the lock? */
              if (!page->mapping) {
                     unlock_page(page);
                     page_cache_release(page);
                     continue;
              }

              /* Did somebody else fill it already? */
              if (PageUptodate(page)) {
                     unlock_page(page);
                     goto page_ok;
              }

readpage: //讀操作

              /* Start the actual read. The read will unlock the page. */
              error = mapping->a_ops->readpage(filp, page); //真正的讀操作
              ……             
              /* nr is the maximum number of bytes to copy from this page */
              nr = PAGE_CACHE_SIZE;
              if (index == end_index) {
                     nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
                     if (nr <= offset) {
                            page_cache_release(page);
                            goto out;
                     }
              }
              nr = nr - offset;
              goto page_ok;

readpage_error:
              /* UHHUH! A synchronous read error occurred. Report it */
              desc->error = error;
              page_cache_release(page);
              goto out;

no_cached_page: //分配一個新的頁面,比將它鏈入緩衝樹中。
              /*
              * Ok, it wasn't cached, so we need to create a new
              * page..
              */

              if (!cached_page) {
                     cached_page = page_cache_alloc_cold(mapping);
                     if (!cached_page) {
                            desc->error = -ENOMEM;
                            goto out;
                     }
              }
              error = add_to_page_cache_lru(cached_page, mapping, index, GFP_KERNEL);
              page = cached_page;
              cached_page = NULL;
              goto readpage;
       }

out:
       *_ra = ra;
       *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
       if (cached_page)
              page_cache_release(cached_page);
       if (filp)
              file_accessed(filp);
}

3、從裝置讀取

對於不同的檔案系統有不同的address_space,而且有不同的address_space_operations,對於ext2檔案系統來說,這個是如下一個結構:

struct address_space_operations ext2_aops = {
    .readpage        = ext2_readpage,
    .readpages       = ext2_readpages,
    .writepage       = ext2_writepage,
    .sync_page       = block_sync_page,
    .prepare_write   = ext2_prepare_write,
    .commit_write    = generic_commit_write,
    .bmap            = ext2_bmap,
    .direct_IO       = ext2_direct_IO,
    .writepages      = ext2_writepages,
};

可見,這個readpage()便是ext2_readpage(),它負責從裝置中讀取一個頁面。

static int ext2_readpage(struct file *file, struct page *page)
{
       return mpage_readpage(page, ext2_get_block);
}

/*
* This isn't called much at all
*/
int mpage_readpage(struct page *page, get_block_t get_block)
{
       struct bio *bio = NULL;
       sector_t last_block_in_bio = 0;
       struct buffer_head map_bh;
       unsigned long first_logical_block = 0;
       clear_buffer_mapped(&map_bh);

       bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio,&map_bh, &first_logical_block, get_block);
       if (bio)
              mpage_bio_submit(READ, bio);

       return 0;

}

這個函數最終將讀請求轉成submit_bio(),之後就是通用塊層的事情了。

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.