Standard File Read process (read)

Source: Internet
Author: User

1. preparation. The VFS layer and some initialization operations are used to prepare for real read operations.

First, the user process sends a read request through the read system call:

File/fs/read_write.c
Asmlinkage ssize_t sys_read (unsigned int FD, char _ User * Buf, size_t count)
{
Struct file * file;
Ssize_t ret =-ebadf;
Int fput_needed;

File = fget_light (FD, & fput_needed );
If (File ){
Loff_t Pos = file_pos_read (File );
Ret = vfs_read (file, Buf, Count, & Pos );
File_pos_write (file, POS );
Fput_light (file, fput_needed );
}

Return ret;
}

Then, perform operations at the VFS layer:
Ssize_t vfs_read (struct file * file, char _ User * Buf, size_t count, loff_t * POS)
{
Ssize_t ret;

If (! (File-> f_mode & fmode_read ))
Return-ebadf;
If (! File-> f_op | (! File-> f_op-> read &&! File-> f_op-> aio_read ))
Return-einval;
If (unlikely (! Access_ OK (verify_write, Buf, count )))
Return-efault;

Ret = rw_verify_area (read, file, POs, count );
If (! RET ){
Ret = security_file_permission (file, may_read );
If (! RET ){
If (file-> f_op-> Read)
Ret = file-> f_op-> Read (file, Buf, Count, POS );
Else
Ret = do_sync_read (file, Buf, Count, POS );
If (Ret> 0 ){
Fsnotify_access (file-> f_dentry );
Current-> rchar + = ret;
}
Current-> syscr ++;
}
}

Return ret;
}

Do_sync_read finally calls the aio_read () interface function to complete the read operation. That is, in 2.6, aio_read () is a common interface for synchronous and asynchronous read operations.
Ssize_t do_sync_read (struct file * filp, char _ User * Buf, size_t Len, loff_t * PPOs)
{
Struct kiocb;
Ssize_t ret;

Init_sync_kiocb (& kiocb, filp );
Kiocb. ki_pos = * PPOs;
Ret = filp-> f_op-> aio_read (& kiocb, Buf, Len, kiocb. ki_pos );
If (-eiocbqueued = RET)
Ret = wait_on_sync_kiocb (& kiocb );
* PPOs = kiocb. ki_pos;
Return ret;
}

For ext2 file systems, there are:
Const struct file_operations ext2_file_operations = {
. Llseek = generic_file_llseek,
. Read = generic_file_read,
. Write = generic_file_write,
. Aio_read = generic_file_aio_read,
. Aio_write = generic_file_aio_write,
.......
}

File/MM/filemap. c
Ssize_t
Generic_file_read (struct file * filp, char _ User * Buf, size_t count, loff_t * PPOs)
{
Struct iovec local_iov = {. iov_base = Buf,. iov_len = count };
Struct kiocb;
Ssize_t ret;

Init_sync_kiocb (& kiocb, filp );
Ret = _ generic_file_aio_read (& kiocb, & local_iov, 1, PPOs );
If (-eiocbqueued = RET)
Ret = wait_on_sync_kiocb (& kiocb );
Return ret;
}

Ssize_t
_ Generic_file_aio_read (struct kiocb * iocb, const struct iovec * IOV,
Unsigned long nr_segs, loff_t * PPOs)
{
......
Do_generic_file_read (filp, PPOs, & DESC, file_read_actor );
......
}

File/include/Linux/fs. h
Static inline void do_generic_file_read (struct file * filp, loff_t * PPOs,
Read_descriptor_t * DESC,
Read_actor_t actor)
{
Do_generic_mapping_read (filp-> f_mapping,
& Filp-> f_ra,
Filp,
PPOs,
DESC,
Actor );
}

2. Read operations. After completing the above preparations, the next step is to execute the core function do_generic_mapping_read of the read operation. This is a complicated function with a large number of goto redirects, but it is clear.

It can be described as follows:

A. If the file to be read is in the page cache, the page will jump to step d.

B. The file has not been buffered. to read the file from the device, first allocate a page and link the page to the corresponding address_space.

C. Call the readpage () function in address_space to read the data of a page size from the device to the page cache.

D. Check pageuptodate (page)

E. Call the actor function pointer passed in by the parameter. Here it is file_read_actor (), which copies the page cache in the data to the user buffer.

F. If the length of the data read by the request has been completed, the function returns. Otherwise, the system jumps to step a for repeated execution.

Let's take a look at file_read_actor ():

Int file_read_actor (read_descriptor_t * DESC, struct page * Page, unsigned long offset, unsigned Long SIZE)

{

Char * kaddr;
Unsigned long left, Count = desc-> count;

If (size> count)
Size = count;
......

/* Do it the slow way */
Kaddr = kmap (PAGE );
Left = _ copy_to_user (desc-> Arg. Buf, kaddr + offset, size); // copy data to the user space
Kunmap (PAGE );

If (left ){
Size-= left;
Desc-> error =-efault;

}

Success:
Desc-> COUNT = count-size;
Desc-> written + = size;
Desc-> Arg. BUF + = size;
Return size;

}

File/MM/filemap. c
/**
* This is a generic File Read routine, and uses
* Mapping-> a_ops-> readpage () function for the actual low-level stuff.
*/
Void do_generic_mapping_read (struct address_space * mapping,
Struct file_ra_state * _ Ra,
Struct file * filp,
Loff_t * PPOs,
Read_descriptor_t * DESC,
Read_actor_t actor)
{
Struct inode * inode = mapping-> host;
Unsigned long index;
Unsigned long end_index;
Unsigned long offset;
Unsigned long last_index;
Unsigned long next_index;
Unsigned long prev_index;
Loff_t isize;
Struct page * cached_page;
Int error;
Struct file_ra_state Ra = * _ RA;

Cached_page = NULL;
Index = * PPOs> page_cache_shift;
Next_index = index;
Prev_index = Ra. prev_page;
Last_index = (* PPOs + desc-> count + PAGE_CACHE_SIZE-1)> page_cache_shift;
Offset = * PPOs &~ Page_cache_mask;

Isize = I _size_read (inode );
If (! Isize)
Goto out;

End_index = (isize-1)> page_cache_shift;

For (;;){
Struct page * page;
Unsigned long NR, RET;

/* Nr is the maximum number of bytes to copy from this page */
Nr = page_cache_size;
If (index> = end_index ){
If (index> end_index)
Goto out;

Nr = (isize-1 )&~ Page_cache_mask) + 1;
If (NR <= offset ){
Goto out;
}
}
Nr = nR-offset;
Cond_resched ();
If (Index = next_index)
Next_index = page_cache_readahead (mapping, & RA, filp, index, last_index-index );

Find_page:
Page = find_get_page (mapping, index); // search in the cache
If (unlikely (page = NULL )){
Handle_ra_miss (mapping, & RA, index );
Goto no_cached_page; // No
}
If (! Pageuptodate (page) // uptodate
Goto page_not_up_to_date;

Page_ OK: // The cache page is found.
Ret = actor (DESC, page, offset, NR); // copy data to the user buffer
// Update some variable values
Offset + = ret;
Index + = offset> page_cache_shift;
Offset & = ~ Page_cache_mask;
Page_cache_release (PAGE );
If (ret = nR & desc-> count)
Continue; // unfinished, entering the next cycle
Goto out; // complete

Page_not_up_to_date:
/* Get exclusive access to the page ...*/
Lock_page (PAGE );
/* Did it get truncated before we got the lock? */
If (! Page-> mapping ){
Unlock_page (PAGE );
Page_cache_release (PAGE );
Continue;
}

/* Did somebody else fill it already? */
If (pageuptodate (page )){
Unlock_page (PAGE );
Goto page_ OK;
}

Readpage: // read operation

/* Start the actual read. The read will unlock the page .*/
Error = mapping-> a_ops-> readpage (filp, page); // real read Operations
......
/* Nr is the maximum number of bytes to copy from this page */
Nr = page_cache_size;
If (Index = end_index ){
Nr = (isize-1 )&~ Page_cache_mask) + 1;
If (NR <= offset ){
Page_cache_release (PAGE );
Goto out;
}
}
Nr = nR-offset;
Goto page_ OK;

Readpage_error:
/* Uhhuh! A synchronous read error occurred. Report it */
Desc-> error = error;
Page_cache_release (PAGE );
Goto out;

No_cached_page: // allocate a new page than link it to the cache tree.
/*
* OK, it wasn' t cached, so we need to create a new
* Page ..
*/

If (! Cached_page ){
Cached_page = page_cache_alloc_cold (Mapping );
If (! Cached_page ){
Desc-> error =-enomem;
Goto out;
}
}
Error = add_to_page_cache_lru (cached_page, mapping, index, gfp_kernel );
Page = cached_page;
Cached_page = NULL;
Goto readpage;
}

Out:
* _ Ra = Ra;
* PPOs = (loff_t) index <page_cache_shift) + offset;
If (cached_page)
Page_cache_release (cached_page );
If (filp)
File_accessed (filp );
}

3. Read from the device

Different file systems have different address_space and different address_space_operations. For ext2 file systems, this is shown in the following structure:

Struct address_space_operations ext2_aops = {
. Readpage = ext2_readpage,
. Readpages = ext2_readpages,
. Writepage = ext2_writepage,
. Sync_page = block_sync_page,
. Prepare_write = ext2_prepare_write,
. Commit_write = generic_commit_write,
. Bmap = ext2_bmap,
. Direct_io = ext2_direct_io,
. Writepages = ext2_writepages,
};

This readpage () is ext2_readpage (), which reads a page from the device.

Static int ext2_readpage (struct file * file, struct page * Page)
{
Return mpage_readpage (page, ext2_get_block );
}

/*
* This isn' t called much at all
*/
Int mpage_readpage (struct page * Page, get_block_t get_block)
{
Struct bio * bio = NULL;
Sector_t last_block_in_bio = 0;
Struct buffer_head map_bh;
Unsigned long first_logical_block = 0;
Clear_buffer_mapped (& map_bh );

Bio = do_mpage_readpage (Bio, page, 1, & last_block_in_bio, & map_bh, & first_logical_block, get_block );
If (bio)
Mpage_bio_submit (read, bio );

Return 0;

}

This function eventually converts the Read Request to submit_bio (), and then the general block layer.

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.