In the previous blog, we explained in detail the data structure related to file system installation. Now let's talk about how to install a specific file system.
1. Install a Common File System
Install the file system by calling the do_mount () function in sys_mount:
Retval = do_mount (char *) dev_page, dir_page, (char *) type_page,
Flags, (void *) data_page );
The do_mount () function performs the following operations to handle real installation operations:
Long do_mount (char * dev_name, char * dir_name, char * type_page,
Unsigned long flags, void * data_page)
{
Struct nameidata Nd;
Int retval = 0;
Int mnt_flags = 0;
/* Discard magic */
If (flags & ms_mgc_msk) = ms_mgc_val)
Flags & = ~ Ms_mgc_msk;
/* Basic sanity checks */
If (! Dir_name |! * Dir_name |! Memchr (dir_name, 0, page_size ))
Return-einval;
If (dev_name &&! Memchr (dev_name, 0, page_size ))
Return-einval;
If (data_page)
(Char *) data_page) [page_size-1] = 0;
/* If any of the installation marks ms_nosuid, ms_nodev, ms_noatime, ms_nodiratime, ms_nodev, or ms_noexec in the installed file system object is set,
* Clear them and set the corresponding flag (mnt_nosuid, mnt_nodev, mnt_noexec, mnt_noatime, mnt_nodiratime) in the installed file system object ). */
If (flags & ms_nosuid)
Mnt_flags | = mnt_nosuid;
If (flags & ms_nodev)
Mnt_flags | = mnt_nodev;
If (flags & ms_noexec)
Mnt_flags | = mnt_noexec;
If (flags & ms_noatime)
Mnt_flags | = mnt_noatime;
If (flags & ms_nodiratime)
Mnt_flags | = mnt_nodiratime;
Flags & = ~ (Ms_nosuid | ms_noexec | ms_nodev | ms_active |
Ms_noatime | ms_nodiratime );
/*... And get the mountpoint call path_lookup () to find the path name of the Installation Point;
* This function stores the results of path name search in the local variable nd of the nameidata type (see "path name search" in the next blog "). */
Retval = path_lookup (dir_name, lookup_follow, & Nd );
If (retval)
Return retval;
Retval = security_sb_mount (dev_name, & nd, type_page, flags, data_page );
If (retval)
Goto dput_out;
/* If the ms_remount flag is specified, the purpose is to change the installation flag of the super block object s_flags field,
* And the installation file system identifier of the mnt_flags field of the file system object that has been installed. The do_remount () function executes these changes. */
If (flags & ms_remount)
Retval = do_remount (& nd, flags &~ Ms_remount, mnt_flags,
Data_page );
/* Otherwise, check the ms_bind flag. If it is specified, the file or directory on another installation point in the system directory tree must be visible. */
Else if (flags & ms_bind)
Retval = do_loopback (& nd, dev_name, flags & ms_rec );
Else if (flags & (ms_shared | ms_private | ms_slave | ms_unbindable ))
Retval = do_change_type (& nd, flags );
/* Otherwise, check the ms_move flag. If it is specified, you need to change the Installation Point of the installed file system. The do_move_mount () function completes this task atomically. */
Else if (flags & ms_move)
Retval = do_move_mount (& nd, dev_name );
/* Otherwise, call do_new_mount (). This is the most common case.
* This function is triggered when you require the installation of a special file system or a common file system stored in a disk partition. */
Else
Retval = do_new_mount (& nd, type_page, flags, mnt_flags,
Dev_name, data_page );
Dput_out:
Path_release (& Nd );
Return retval;
}
Let's look at the most common situation:
Static int do_new_mount (struct nameidata * nd, char * type, int flags,
Int mnt_flags, char * Name, void * Data)
{
Struct vfsmount * MNT;
If (! Type |! Memchr (type, 0, page_size ))
Return-einval;
/* We need capabilities ...*/
If (! Capable (cap_sys_admin ))
Return-eperm;
Mnt = do_kern_mount (type, flags, name, data );
If (is_err (mnt ))
Return ptr_err (mnt );
Return do_add_mount (MNT, Nd, mnt_flags, null );
}
Do_new_mount calls the do_kern_mount () function to pass the file system type, installation flag, and block device name.
Do_kern_mount () processes the actual installation operation and returns the address of a New Installation File System descriptor:
Struct vfsmount *
Do_kern_mount (const char * fstype, int flags, const char * Name, void * Data)
{
Struct file_system_type * type = get_fs_type (fstype);/* The get_fs_type function we are familiar with. Please refer to the previous blog article */
Struct vfsmount * MNT;
If (! Type)
Return err_ptr (-enodev );
Mnt = vfs_kern_mount (type, flags, name, data );
Put_filesystem (type );
Return MNT;
}
Struct vfsmount *
Vfs_kern_mount (struct file_system_type * type, int flags, const char * Name, void * Data)
{
Struct vfsmount * MNT;
Char * secdata = NULL;
Int error;
If (! Type)
Return err_ptr (-enodev );
Error =-enomem;
Mnt = alloc_vfsmnt (name);/* Haha, we are familiar with the alloc_vfsmnt function. Please refer to the previous blog */
If (! MNT)
Goto out;
If (data ){
Secdata = alloc_secdata ();
If (! Secdata)
Goto out_mnt;
Error = security_sb_copy_data (type, Data, secdata );
If (error)
Goto out_free_secdata;
}
/* Call the type-> get_sb () function allocation dependent on the file system, and initialize a new super to MNT-> mnt_sb */
Error = type-> get_sb (type, flags, name, Data, MNT );
If (error <0)
Goto out_free_secdata;
Error = security_sb_kern_mount (MNT-> mnt_sb, secdata );
If (error)
Goto out_sb;
/* Initialize the MNT-> mnt_root field to the address of the Directory item object corresponding to the root directory of the file system,
And add the reference counter value of the Directory item object. */
MNT-> mnt_mountpoint = mnt-> mnt_root;
/* Use the value in MNT to initialize the MNT-> mnt_parent field (for common file systems,
* When the graft_tree () mentioned later inserts the descriptor of the installed file system into a suitable linked list,
* Set the mnt_parent field to an appropriate value ). */
MNT-> mnt_parent = mnt;
Up_write (& MNT-> mnt_sb-> s_umount );
Free_secdata (secdata );
Return MNT;
Out_sb:
Dput (MNT-> mnt_root );
Up_write (& MNT-> mnt_sb-> s_umount );
Deactivate_super (MNT-> mnt_sb );
Out_free_secdata:
Free_secdata (secdata );
Out_mnt:
Free_vfsmnt (mnt );
Out:
Return err_ptr (error );
}
Then, the do_new_mount () function calls do_add_mount ():
Int do_add_mount (struct vfsmount * newmnt, struct nameidata * nd,
Int mnt_flags, struct list_head * fslist)
{
Int err;
Down_write (& namespace_sem );
/* Something was mounted here while we slept */
While (d_mountpoint (Nd-> dentry) & follow_down (& Nd-> MNT, & Nd-> dentry ))
;
Err =-einval;
If (! Check_mnt (Nd-> MNT ))
Goto unlock;
/* Refuse the same filesystem on the same mount point */
Err =-ebusy;
If (Nd-> MNT-> mnt_sb = newmnt-> mnt_sb &&
Nd-> MNT-> mnt_root = Nd-> dentry)
Goto unlock;
Err =-einval;
If (s_islnk (newmnt-> mnt_root-> d_inode-> I _mode ))
Goto unlock;
Newmnt-> mnt_flags = mnt_flags;
If (ERR = graft_tree (newmnt, Nd )))
Goto unlock;
If (fslist ){
/* Add to the specified expiration list */
Spin_lock (& vfsmount_lock );
List_add_tail (& newmnt-> mnt_expire, fslist );
Spin_unlock (& vfsmount_lock );
}
Up_write (& namespace_sem );
Return 0;
Unlock:
Up_write (& namespace_sem );
Mntput (newmnt );
Return err;
}
Essentially, it performs the following operations:
1. Obtain the write semaphore namespace_sem of the current process, because the function needs to change the namespace structure.
2. The do_kern_mount () function may sleep the current process. At the same time, another process may install the file system on exactly the same installation point or even change the root file system (current-> namespace-> root ). Verify that the file system recently installed on this installation point still points to the current namespace. If not, release the read/write semaphore and return an error code.
3. If the file system to be installed has been installed on the Installation Point specified by the System-called parameters, or the installation point is a symbolic link, release the read/write semaphore and return an error code.
4. initialize the mnt_flags field of the newly installed File System Object allocated by do_kern_mount.
5. Call graft_tree () to insert the newly installed file system objects to the namespace linked list and hash list (call the attach_recursive_mnt function in the graft_tree () function)
6. Sub-linked list of the parent file system.
7. Release the namespace_sem read/write semaphore and return it.
Static int graft_tree (struct vfsmount * MNT, struct nameidata * Nd)
{
Int err;
If (MNT-> mnt_sb-> s_flags & ms_nouser)
Return-einval;
If (s_isdir (Nd-> dentry-> d_inode-> I _mode )! =
S_isdir (MNT-> mnt_root-> d_inode-> I _mode ))
Return-enotdir;
Err =-enoent;
Mutex_lock (& Nd-> dentry-> d_inode-> I _mutex );
If (is_deaddir (Nd-> dentry-> d_inode ))
Goto out_unlock;
Err = security_sb_check_sb (MNT, Nd );
If (ERR)
Goto out_unlock;
Err =-enoent;
If (is_root (Nd-> dentry) |! D_unhashed (Nd-> dentry ))
Err = attach_recursive_mnt (MNT, Nd, null );
Out_unlock:
Mutex_unlock (& Nd-> dentry-> d_inode-> I _mutex );
If (! Err)
Security_sb_post_addmount (MNT, Nd );
Return err;
Return to the do_mount () function, call path_release () to terminate the path search of the Installation Point (see "path search" in the next blog), and return.
2. Allocate a super block object
The get_sb method of a file system object is usually implemented by a single row function. For example, in the ext2 file system, the implementation of this method is as follows:
// Fs/ext2/super. c
Static int ext2_get_sb (struct file_system_type * fs_type,
Int flags, const char * dev_name, void * data, struct vfsmount * MNT)
{
Return get_sb_bdev (fs_type, flags, dev_name, Data, ext2_fill_super, MNT );
}
The get_sb_bdev () VFS function allocates and initializes a new super block suitable for the disk file system. It receives the address of the ext2_fill_super () function, which reads the ultra block from the ext2 disk partition.
To allocate Super blocks suitable for special file systems, VFS also provides the get_sb_pseudo () function. For special file systems without installation points, such as pipefs (), get_sb_single () functions (for special file systems with unique installation points, such. sysfs) and get_sb_nodev () functions (for special file systems that can be installed multiple times, such as tmpfs; see the following ).
The get_sb_bdev () function is located in/fs/super. C. The Code is as follows:
Int get_sb_bdev (struct file_system_type * fs_type,
Int flags, const char * dev_name, void * data,
INT (* fill_super) (struct super_block *, void *, INT ),
Struct vfsmount * MNT)
{
Struct block_device * bdev;
Struct super_block * s;
Int error = 0;
/* Call open_bdev_excl () to open the block device named dev_name. */
Bdev = open_bdev_excl (dev_name, flags, fs_type );
If (is_err (bdev ))
Return ptr_err (bdev );
/*
* Once the super is inserted into the list by sget, s_umount
* Will protect the lockfs code from trying to start a snapshot
* While we are mounting
*/
Down (& bdev-> bd_mount_sem );
/* Call sget () to search for the super block object linked list of the file system (Type-> fs_supers, see "file system installation data structure" in the previous blog ).
* If a super block related to the block device is found, its address is returned. Otherwise, assign and initialize a new super block object,
* Insert it into the file system linked list and the super block global linked list, and return its address. */
S = sget (fs_type, test_bdev_super, set_bdev_super, bdev );
Up (& bdev-> bd_mount_sem );
If (is_err (s ))
Goto error_s;
If (S-> s_root) {/* if it is not a new super block (note that it is determined by S-> s_root whether it is null )*/
If (flags ^ s-> s_flags) & ms_rdonly ){
Up_write (& S-> s_umount );
Deactivate_super (s );
Error =-ebusy;
Goto error_bdev;
}
Close_bdev_excl (bdev );
} Else {
Char B [bdevname_size];
/* Copy the value in the flags parameter to the s_flags field of the super block,
* Set the s_id, s_old_blocksize, and s_blocksize fields as appropriate values for Block devices. */
S-> s_flags = flags;
Strlcpy (S-> s_id, bdevname (bdev, B), sizeof (S-> s_id ));
Sb_set_blocksize (S, block_size (bdev ));
/* Call functions dependent on the file system (in this example, the ext2_fill_super function is called. We will talk about it when discussing ext2)
* Access the super block information on the disk and fill in other fields of the new super block object. */
Error = fill_super (S, Data, flags & ms_silent? 1: 0 );
If (error ){
Up_write (& S-> s_umount );
Deactivate_super (s );
Goto error;
}
S-> s_flags | = ms_active;
Bdev_uevent (bdev, kobj_mount );
}
Return simple_set_mnt (MNT, S );
Error_s:
Error = ptr_err (s );
Error_bdev:
Close_bdev_excl (bdev );
Error:
Return Error;
}
3. Install the root file system
Installing the root file system is a key part of system initialization. This is a complicated process, because the Linux kernel allows the root file system to be stored in many different places, such as hard disk partitions, floppy disks, and remote file systems shared through NFS, even stored in ramdisk (a virtual block device in Ram ).
To make the description simple, let's assume that the root file system is stored in the hard disk partition (this is the most common case after all ). When the system starts, the kernel needs to find the disk master device number containing the root file system in the variable root_dev:
// Init/do_mounts.c
Dev_t root_dev;
When the kernel is compiled or an appropriate "root" option is passed to the initial boot loader, the root file system can be specified as a device file under the/dev directory. Similarly, the installation mark of the root file system is stored in the root_mountflags variable:
// Init/do_mounts.c
Int root_mountflags = ms_rdonly | ms_silent;
You can specify these flags, or use rdev external programs for compiled kernel images, or pass an appropriate rootflags option to the initial boot loader.
Install the root file system in two phases:
(1) The kernel installs a special rootfs file system, which only provides an empty directory as the initial installation point.
(2) install the actual root file system in an empty directory.
Why is the kernel not troublesome? Should we install the rootfs file system before installing the actual root file system? This is because the rootfs file system allows the kernel to easily change the actual root file system. In fact, in most cases, the kernel installs and uninstalls several root file systems one by one during system initialization. For example, the initial boot disc of a release version may install a Minimum File System stored in ramdisk as the root in the kernel loader RAM with a minimum set of drivers. Next, the program in the initial root file system detects the hardware of the system (for example, they determine whether the hard disk is Eide, SCSI, and so on) and loads all the necessary kernel modules, and reinstall the root file system from the physical block device.
Phase 1: Install the rootfs File System
The first stage is completed by the init_rootfs () and init_mount_tree () functions, which are executed during system initialization.
The init_rootfs () function registers the special file system type rootfs:
Static struct file_system_type rootfs_fs_type = {
. Name = "rootfs ",
. Get_sb = rootfs_get_sb,
. Kill_sb = kill_litter_super,
};
The init_mount_tree () function initializes the root file system:
Static void _ init init_mount_tree (void)
{
Struct vfsmount * MNT;
Struct namespace * namespace;
Struct task_struct * g, * P;
Mnt = do_kern_mount ("rootfs", 0, "rootfs", null );
If (is_err (mnt ))
Panic ("can't create rootfs ");
Namespace = kmalloc (sizeof (* namespace), gfp_kernel );
If (! Namespace)
Panic ("can't allocate initial namespace ");
Atomic_set (& namespace-> count, 1 );
Init_list_head (& namespace-> list );
Init_waitqueue_head (& namespace-> poll );
Namespace-> event = 0;
List_add (& MNT-> mnt_list, & namespace-> list );
Namespace-> root = mnt;
MNT-> mnt_namespace = namespace;
Init_task.namespace = namespace;
Read_lock (& tasklist_lock );
Do_each_thread (G, p ){
Get_namespace (namespace );
P-> namespace = namespace;
} While_each_thread (g, P );
Read_unlock (& tasklist_lock );
Set_fs_pwd (current-> FS, namespace-> root, namespace-> root-> mnt_root );
Set_fs_root (current-> FS, namespace-> root, namespace-> root-> mnt_root );
}
As you can see, init_mount_tree first calls the do_kern_mount () function and passes the string "rootfs" to it as a file system type parameter. The file system flag is 0 and there is no data, save the address of the newly installed file system descriptor returned by the function in the MNT local variable. As described earlier, do_kern_mount () finally calls the get_sb method of the rootfs file system, that is, the rootfs_get_sb () function:
Static int rootfs_get_sb (struct file_system_type * fs_type,
Int flags, const char * dev_name, void * data, struct vfsmount * MNT)
{
Return get_sb_nodev (fs_type, flags | ms_nouser, Data, ramfs_fill_super,
MNT );
}
The get_sb_nodev () function previously mentioned for the rootfs File System:
1. Call the sget () function to allocate a new superblock and pass the address of the set_anon_super () function as the parameter. Next, set the super-fast s_dev field in the appropriate way: the primary device number is O, and the secondary device number is different from the secondary device number of other installed special file systems.
2. Copy the value of the flags parameter to the s_flags field of the super block.
3. Call the ramfs_fill_super () function to allocate the index Node object and the corresponding directory item object and fill in the super block field value. As rootfs is a special file system with no disk superblocks, you only need to perform the following two super block operations:
Static int ramfs_fill_super (struct super_block * Sb, void * data, int silent)
{
Struct inode * inode;
Struct dentry * root;
Sb-> s_maxbytes = max_lfs_filesize;
Sb-> s_blocksize = page_cache_size;
Sb-> s_blocksize_bits = page_cache_shift;
Sb-> s_magic = ramfs_magic;
Sb-> s_op = & ramfs_ops;
Sb-> s_time_gran = 1;
Inode = ramfs_get_inode (SB, s_ifdir | 0755, 0 );
If (! Inode)
Return-enomem;
Root = d_alloc_root (inode );
If (! Root ){
Iput (inode );
Return-enomem;
}
Sb-> s_root = root;
Return 0;
}
4. Return the address of the new superblock.
Return to the init_mount_tree () function and continue:
Assign a namespace object to the namespace of process 0 and insert it to the installed file system descriptor returned by the do_kern_mount () function:
Namespace = kmalloc (sizeof (* namespace), gfp_kernel );
List_add (& MNT-> mnt_list, & namespace-> list );
Namespace-> root = mnt;
MNT-> mnt_namespace = init_task.namespace = namespace;
Set the namespace field of each other process in the system to the address of the namespace object, and initialize the reference counter namespace-> count (by default, all processes share the same initial namespace ).
Set the root directory and current working directory of process 0 to the root file system.
Set_fs_pwd (current-> FS, namespace-> root, namespace-> root-> mnt_root );
Set_fs_root (current-> FS, namespace-> root, namespace-> root-> mnt_root );
Phase 2: Install the actual root file system
The second phase of the root file system installation operation is performed by the kernel at the end of system initialization. Based on the options selected during kernel compilation and the startup options passed by the kernel loader, you can install the actual root file system in several ways. For the sake of simplicity, we only consider the disk file system. Its device file name has been passed to the kernel through the "root" Startup parameter. In addition to the rootfs file system, we assume that no initial special file system is used.
The kernel mainly calls the prepare_namespace () function to install the actual root file system:
Void _ init prepare_namespace (void)
{
Int is_floppy;
If (root_delay ){
Printk (kern_info "Waiting % DSEC before mounting root device.../N ",
Root_delay );
Ssleep (root_delay );
}
Md_run_setup ();
If (saved_root_name [0]) {
/* Set the root_device_name variable to the name of the device file obtained from the startup parameter "root.
* Similarly, set the root_dev variable to the Primary and Secondary device numbers of the same device file. */
Root_device_name = saved_root_name;
If (! Strncmp (root_device_name, "MTD", 3 )){
/* Call the mount_block_root () function to use the most commonly used block device as the sub-File System of the rootfs File System */
Mount_block_root (root_device_name, root_mountflags );
Goto out;
}
Root_dev = name_to_dev_t (root_device_name );
If (strncmp (root_device_name, "/dev/", 5) = 0)
Root_device_name + = 5;
}
Is_floppy = major (root_dev) = floppy_major;
If (initrd_load ())
Goto out;
If (is_floppy & rd_doload & rd_load_disk (0 ))
Root_dev = root_ram0;
Mount_root ();
Out:
/* Move the Installation Point of the installed file system in the root directory of the rootfs file system. */
Sys_mount (".", "/", null, ms_move, null );
Sys_chroot (".");
Security_sb_post_mountroot ();
}
Note that the special rootfs file system is not uninstalled: It is only hidden in the disk-based root file system.
4. Uninstall the File System
The umount () system call is used to detach a file system. We will not discuss its code in detail, which is relatively simple. The corresponding sys_umount () Service Routine Acts on two parameters: the file name (mostly the installation point directory or block device file name) and a set of symbols. This function performs the following operations:
1. Call path_lookup () to find the Installation Point path name. This function stores the returned search operation results in the nameidata type local variable nd.
2. If the final directory to be searched is not the Installation Point of the file system, set the retval return code to-Val and jump to step 2. This check is performed by verifying that Nd-> MNT-> mnt_root (which contains the directory item object address pointed by Nd. dentry.
3. if the file system to be detached is not yet installed in the namespace, set the retval return code to-Val and jump to Step 1 (recall that some special file systems are not installed ). This check is performed by calling the check_mnt () function on the Nd-> MNT.
4. If the user does not have the permission to uninstall the file system, set the retval return code to-eperm and jump to step 2.
5. Call do_umount () and pass the parameters to it as Nd. MNT (File System Object installed) and flags (a set of flags ). This function performs the following operations:
A) retrieve the address of the super block Sb from the mnt_sb field of the installed file system object.
B) If you require a force uninstall operation, call the umount_begin superblock operation to interrupt any ongoing installation operations.
C) if the file system to be detached is the root file system and you do not need to detach it, call do_remount_sb () to reinstall the root file system as read-only and terminate.
D) obtain the namespace of the current process for write operations-> SEM read/write semaphores and vfsmount_lock spin locks.
E) If the installed file system does not contain any installation points for the sub-Installation File System, or the user requires that the file system be forcibly uninstalled, The umount_tree () detach a file system (and all its sub-file systems)
F) release the vfsmount_lock spin lock and the namespace of the current process-> SEM read/write semaphore.
6. Reduce the reference counter values of the Directory item objects and installed file system descriptors in the root directory of the corresponding file system. These counter values are increased by path_lookup.
7. Return the value of retval.