文章目錄
虛擬檔案系統(VFS)為使用者空間提供了檔案系統相關的介面,使用者程式可以通過標準的Unix檔案系統調用對不同介質上的不同檔案系統進行讀寫操作。
通用檔案系統介面
VFS使得使用者可以直接使用open()、read()和write()而無需考慮具體的檔案系統和實際物理介質。標準系統調用也可以在不同的介質和檔案系統之間執行,VFS負責這種不同介質和不同檔案系統之間的協調,並對上提供一種通用的存取方法。
之所以這種通用介面對所有類型的檔案系統都可以操作,是因為核心在它的底層檔案系統之上建立了一個抽象層。這個抽象層提供了一個通用檔案系統模型,支援各種檔案系統。VFS定義了所有檔案系統都支援的基本資料結構和介面,而實際檔案系統都實現了這些基本介面。由於實際檔案系統的代碼在統一的介面和資料結構下隱藏了實現的細節,所以在VFS層和核心的其他部分來看,所有的檔案系統都是相同的。
VFS中有四個主要的物件模型,分別是:
- 超級塊對象:一個已安裝的檔案系統;
- 索引節點對象:代表一個檔案;
- 目錄項對象:代表路徑的一個組成部分;
- 檔案對象:代表檔案,注意目錄也是檔案。
每種物件模型核心都定義了對應的操作對象,描述了核心針對該對象可以使用的方法。
超級塊對象
每種檔案系統都必須實現超級塊,用於儲存特定檔案系統的資訊,通常對應於存放在磁碟特定扇區中的檔案系統超級塊或檔案系統控制塊。對於非基於磁碟的檔案系統,會在使用現場建立超級塊並儲存在記憶體中。
超級塊用struct super_block結構體表示:
1400struct super_block {1401 struct list_head s_list; /* Keep this first 指向超級塊鏈表的指標 */1402 dev_t s_dev; /* search index; _not_ kdev_t 裝置標誌符 */1403 unsigned char s_dirt; /* 修改(髒)標誌 */1404 unsigned char s_blocksize_bits; /* 塊大小 單位bits */1405 unsigned long s_blocksize; /* 塊大小 單位Bytes*/1406 loff_t s_maxbytes; /* Max file size */1407 struct file_system_type *s_type;1408 const struct super_operations *s_op; /× 超級塊方法 ×/1409 const struct dquot_operations *dq_op; /× 磁碟限額方法 ×/1410 const struct quotactl_ops *s_qcop; /× 限額控制方法 ×/1411 const struct export_operations *s_export_op; /× 匯出方法 ×/1412 unsigned long s_flags;1413 unsigned long s_magic;1414 struct dentry *s_root;1415 struct rw_semaphore s_umount;1416 struct mutex s_lock;1417 int s_count;1418 atomic_t s_active;1419#ifdef CONFIG_SECURITY1420 void *s_security;1421#endif1422 const struct xattr_handler **s_xattr;14231424 struct list_head s_inodes; /* all inodes */1425 struct hlist_bl_head s_anon; /* anonymous dentries for (nfs) exporting */1426#ifdef CONFIG_SMP1427 struct list_head __percpu *s_files;1428#else1429 struct list_head s_files;1430#endif1431 /* s_dentry_lru, s_nr_dentry_unused protected by dcache.c lru locks */1432 struct list_head s_dentry_lru; /* unused dentry lru */1433 int s_nr_dentry_unused; /* # of dentry on lru */14341435 /* s_inode_lru_lock protects s_inode_lru and s_nr_inodes_unused */1436 spinlock_t s_inode_lru_lock ____cacheline_aligned_in_smp;1437 struct list_head s_inode_lru; /* unused inode lru */1438 int s_nr_inodes_unused; /* # of inodes on lru */14391440 struct block_device *s_bdev;1441 struct backing_dev_info *s_bdi;1442 struct mtd_info *s_mtd;1443 struct list_head s_instances;1444 struct quota_info s_dquot; /* Diskquota specific options */14451446 int s_frozen;1447 wait_queue_head_t s_wait_unfrozen;14481449 char s_id[32]; /* Informational name */1450 u8 s_uuid[16]; /* UUID */14511452 void *s_fs_info; /* Filesystem private info */1453 fmode_t s_mode;14541455 /* Granularity of c/m/atime in ns.1456 Cannot be worse than a second */1457 u32 s_time_gran;14581459 /*1460 * The next field is for VFS *only*. No filesystems have any business1461 * even looking at it. You had been warned.1462 */1463 struct mutex s_vfs_rename_mutex; /* Kludge */14641465 /*1466 * Filesystem subtype. If non-empty the filesystem type field1467 * in /proc/mounts will be "type.subtype"1468 */1469 char *s_subtype;14701471 /*1472 * Saved mount options for lazy filesystems using1473 * generic_show_options()1474 */1475 char __rcu *s_options;1476 const struct dentry_operations *s_d_op; /* default d_op for dentries */14771478 /*1479 * Saved pool identifier for cleancache (-1 means none)1480 */1481 int cleancache_poolid;14821483 struct shrinker s_shrink; /* per-sb shrinker handle */1484};
超級塊對象中的s_op定義了超級塊的操作函數表,用super_operations結構體表示,其中的每一項都定義了一種操作的函數指標:
1658struct super_operations {1659 struct inode *(*alloc_inode)(struct super_block *sb);1660 void (*destroy_inode)(struct inode *);16611662 void (*dirty_inode) (struct inode *, int flags);1663 int (*write_inode) (struct inode *, struct writeback_control *wbc);1664 int (*drop_inode) (struct inode *);1665 void (*evict_inode) (struct inode *);1666 void (*put_super) (struct super_block *);1667 void (*write_super) (struct super_block *);1668 int (*sync_fs)(struct super_block *sb, int wait);1669 int (*freeze_fs) (struct super_block *);1670 int (*unfreeze_fs) (struct super_block *);1671 int (*statfs) (struct dentry *, struct kstatfs *);1672 int (*remount_fs) (struct super_block *, int *, char *);1673 void (*umount_begin) (struct super_block *);16741675 int (*show_options)(struct seq_file *, struct vfsmount *);1676 int (*show_devname)(struct seq_file *, struct vfsmount *);1677 int (*show_path)(struct seq_file *, struct vfsmount *);1678 int (*show_stats)(struct seq_file *, struct vfsmount *);1679#ifdef CONFIG_QUOTA1680 ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);1681 ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);1682#endif1683 int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);1684 int (*nr_cached_objects)(struct super_block *);1685 void (*free_cached_objects)(struct super_block *, int);1686};
在具體的檔案系統中,存在一個指向超級塊結構的指標,其內會向該結構體傳遞對應該檔案系統的操作函數實現。上述的動作表也不必全部實現,檔案系統可以將不需要的函數指標設定為NULL。另外,上述函數均在進程上下文中調用,必要時均可阻塞。
索引節點對象
索引節點對象包含了核心在操作檔案和目錄時需要的全部資訊,這些資訊可以從磁碟索引節點直接讀入。
索引節點使用inode結構體表示,一個索引節點代表了檔案系統中的一個檔案,它也可以是裝置或者管道這樣的特殊檔案。
744/* 745 * Keep mostly read-only and often accessed (especially for 746 * the RCU path lookup and 'stat' data) fields at the beginning 747 * of the 'struct inode' 748 */ 749struct inode { 750 umode_t i_mode; 751 unsigned short i_opflags; 752 uid_t i_uid; 753 gid_t i_gid; 754 unsigned int i_flags; 755 756#ifdef CONFIG_FS_POSIX_ACL 757 struct posix_acl *i_acl; 758 struct posix_acl *i_default_acl; 759#endif 760 761 const struct inode_operations *i_op; 762 struct super_block *i_sb; 763 struct address_space *i_mapping; 764 765#ifdef CONFIG_SECURITY 766 void *i_security; 767#endif 768 769 /* Stat data, not accessed from path walking */ 770 unsigned long i_ino; 771 /* 772 * Filesystems may only read i_nlink directly. They shall use the 773 * following functions for modification: 774 * 775 * (set|clear|inc|drop)_nlink 776 * inode_(inc|dec)_link_count 777 */ 778 union { 779 const unsigned int i_nlink; 780 unsigned int __i_nlink; 781 }; 782 dev_t i_rdev; 783 struct timespec i_atime; 784 struct timespec i_mtime; 785 struct timespec i_ctime; 786 spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ 787 unsigned short i_bytes; 788 blkcnt_t i_blocks; 789 loff_t i_size; 790 791#ifdef __NEED_I_SIZE_ORDERED 792 seqcount_t i_size_seqcount; 793#endif 794 795 /* Misc */ 796 unsigned long i_state; 797 struct mutex i_mutex; 798 799 unsigned long dirtied_when; /* jiffies of first dirtying */ 800 801 struct hlist_node i_hash; 802 struct list_head i_wb_list; /* backing dev IO list */ 803 struct list_head i_lru; /* inode LRU list */ 804 struct list_head i_sb_list; 805 union { 806 struct list_head i_dentry; 807 struct rcu_head i_rcu; 808 }; 809 atomic_t i_count; 810 unsigned int i_blkbits; 811 u64 i_version; 812 atomic_t i_dio_count; 813 atomic_t i_writecount; 814 const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ 815 struct file_lock *i_flock; 816 struct address_space i_data; 817#ifdef CONFIG_QUOTA 818 struct dquot *i_dquot[MAXQUOTAS]; 819#endif 820 struct list_head i_devices; 821 union { 822 struct pipe_inode_info *i_pipe; 823 struct block_device *i_bdev; 824 struct cdev *i_cdev; 825 }; 826 827 __u32 i_generation; 828 829#ifdef CONFIG_FSNOTIFY 830 __u32 i_fsnotify_mask; /* all events this inode cares about */ 831 struct hlist_head i_fsnotify_marks; 832#endif 833 834#ifdef CONFIG_IMA 835 atomic_t i_readcount; /* struct files open RO */ 836#endif 837 void *i_private; /* fs or device private pointer */ 838}; 839
其中,i_op定義了索引節點對象的所有操作方法:
1613struct inode_operations {
1614 struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *);
1615 void * (*follow_link) (struct dentry *, struct nameidata *);
1616 int (*permission) (struct inode *, int);
1617 struct posix_acl * (*get_acl)(struct inode *, int);
1618
1619 int (*readlink) (struct dentry *, char __user *,int);
1620 void (*put_link) (struct dentry *, struct nameidata *, void *);
1621
1622 int (*create) (struct inode *,struct dentry *,int, struct nameidata *);
1623 int (*link) (struct dentry *,struct inode *,struct dentry *);
1624 int (*unlink) (struct inode *,struct dentry *);
1625 int (*symlink) (struct inode *,struct dentry *,const char *);
1626 int (*mkdir) (struct inode *,struct dentry *,int);
1627 int (*rmdir) (struct inode *,struct dentry *);
1628 int (*mknod) (struct inode *,struct dentry *,int,dev_t);
1629 int (*rename) (struct inode *, struct dentry *,
1630 struct inode *, struct dentry *);
1631 void (*truncate) (struct inode *);
1632 int (*setattr) (struct dentry *, struct iattr *);
1633 int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
1634 int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
1635 ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
1636 ssize_t (*listxattr) (struct dentry *, char *, size_t);
1637 int (*removexattr) (struct dentry *, const char *);
1638 void (*truncate_range)(struct inode *, loff_t, loff_t);
1639 int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
1640 u64 len);