部落格已遷移至:http://kulv.sinaapp.com/,這裡不再使用
sys_umount系統調用的實現注釋。2.4版核心
/*<br /> * Now umount can handle mount points as well as block devices.<br /> * This is important for filesystems which use unnamed block devices.<br /> *<br /> * We now support a flag for forced unmount like the other 'big iron'<br /> * unixes. Our API is identical to OSF/1 to avoid making a mess of AMD<br /> */<br />//檔案系統卸載的系統調用<br />asmlinkage long sys_umount(char * name, int flags)<br />{<br />struct nameidata nd;<br />char *kname;<br />int retval;</p><p>lock_kernel();<br />kname = getname(name);<br />retval = PTR_ERR(kname);<br />if (IS_ERR(kname))<br />goto out;<br />retval = 0;<br />//kulv:得到卸載目錄的nameidata結構。其內有該目錄的dentry , inode<br />if (path_init(kname, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &nd))<br />retval = path_walk(kname, &nd);<br />putname(kname);<br />if (retval)<br />goto out;<br />retval = -EINVAL;<br />if (nd.dentry != nd.mnt->mnt_root)//mnt 是代表該安裝點的串連結構,此結構的mnt_root指向了裝置的根節點<br />goto dput_and_out;//如果該安裝目錄的dentry結構不等於裝置的根節點,則一定有問題。</p><p>retval = -EPERM;<br />if (!capable(CAP_SYS_ADMIN) && current->uid!=nd.mnt->mnt_owner)<br />goto dput_and_out;//如果不是管理員,且不是擁有者。</p><p>dput(nd.dentry);//減少使用計數<br />/* puts nd.mnt */<br />down(&mount_sem);<br />retval = do_umount(nd.mnt, 0, flags);//真正工作的地方.<br />up(&mount_sem);<br />goto out;<br />dput_and_out:<br />path_release(&nd);<br />out:<br />unlock_kernel();<br />return retval;<br />}</p><p>/*kulv:<br />mnt為該安裝點的串連結構。包含了裝置目錄,超級塊,以及安裝樹的支援結構。<br />umount_root = 0 ;<br />flags:從sys_umount 傳下來<br />*/<br />static int do_umount(struct vfsmount *mnt, int umount_root, int flags)<br />{<br />struct super_block * sb = mnt->mnt_sb;//得到超級塊</p><p>/*<br /> * No sense to grab the lock for this test, but test itself looks<br /> * somewhat bogus. Suggestions for better replacement?<br /> * Ho-hum... In principle, we might treat that as umount + switch<br /> * to rootfs. GC would eventually take care of the old vfsmount.<br /> * The problem being: we have to implement rootfs and GC for that <br /> * Actually it makes sense, especially if rootfs would contain a<br /> * /reboot - static binary that would close all descriptors and<br /> * call reboot(9). Then init(8) could umount root and exec /reboot.<br /> */<br />if (mnt == current->fs->rootmnt && !umount_root) {<br />int retval = 0;//如果卸載的目錄為當前進程根目錄,<br />/*<br /> * Special case for "unmounting" root ...<br /> * we just try to remount it readonly.<br /> */<br />mntput(mnt);<br />if (!(sb->s_flags & MS_RDONLY))//因為調用這的時候flag為0,所以下面不成立,使用者進程無法卸載自己的根目錄<br />retval = do_remount_sb(sb, MS_RDONLY, 0);<br />return retval;<br />}</p><p>spin_lock(&dcache_lock);</p><p>//如果一個裝置安裝了多次,即一個超級塊有多個串連結構,那麼<br />if (mnt->mnt_instances.next != mnt->mnt_instances.prev) {//如果安裝了多次,即其<br />if (atomic_read(&mnt->mnt_count) > 2) {//引用計數大於2,表面還有其他地方用到這個結構,那麼···<br />spin_unlock(&dcache_lock);<br />mntput(mnt);//遞減一下計數就可以了<br />return -EBUSY;<br />}<br />if (sb->s_type->fs_flags & FS_SINGLE)//FS_SINGLE表示超級塊在同種檔案系統中是共用的<br />put_filesystem(sb->s_type);<br />/* We hold two references, so mntput() is safe */<br />mntput(mnt);<br />//因為這個超級塊有多次安裝,//即裝置有多次安裝,不能將裝置拆下,只是拆除多次安裝的一次<br />//下面的函數只是把當前串連件從各種隊列中刪除,然後遞減相關計數,這樣就無法訪問到了<br />remove_vfsmnt(mnt);<br />return 0;<br />}<br />spin_unlock(&dcache_lock);</p><p>//下面代表裝置的唯一一次安裝,那就麻煩點了<br />/*<br /> * Before checking whether the filesystem is still busy,<br /> * make sure the kernel doesn't hold any quota files open<br /> * on the device. If the umount fails, too bad -- there<br /> * are no quotas running any more. Just turn them on again.<br /> */<br />DQUOT_OFF(sb);//關於這個磁碟空間的配額。<br />acct_auto_close(sb->s_dev);</p><p>/*<br /> * If we may have to abort operations to get out of this<br /> * mount, and they will themselves hold resources we must<br /> * allow the fs to do things. In the Unix tradition of<br /> * 'Gee thats tricky lets do it in userspace' the umount_begin<br /> * might fail to complete on the first run through as other tasks<br /> * must return, and the like. Thats for the mount program to worry<br /> * about for the moment.<br /> */</p><p>if( (flags&MNT_FORCE) && sb->s_op->umount_begin)<br />sb->s_op->umount_begin(sb);//讓相應的檔案系統來處理卸載準備工作<br />//struct file_operations 定義在:include/linux/fs.h裡面</p><p>/*<br /> * Shrink dcache, then fsync. This guarantees that if the<br /> * filesystem is quiescent at this point, then (a) only the<br /> * root entry should be in use and (b) that root entry is<br /> * clean.<br /> */<br />shrink_dcache_sb(sb);//將緩衝在dentry_unused 隊列中的屬於本裝置的dentry結構刪除<br />fsync_dev(sb->s_dev);//將緩衝在記憶體中,還沒有同步到磁碟裝置中的資料重新整理到裝置上去。</p><p>if (sb->s_root->d_inode->i_state) {<br />mntput(mnt);<br />return -EBUSY;<br />}</p><p>/* Something might grab it again - redo checks */</p><p>spin_lock(&dcache_lock);<br />if (atomic_read(&mnt->mnt_count) > 2) {<br />spin_unlock(&dcache_lock);<br />mntput(mnt);<br />return -EBUSY;<br />}</p><p>/* OK, that's the point of no return */<br />mntput(mnt);<br />remove_vfsmnt(mnt);<br />//裝置上的緩衝,inode,髒頁面,鎖住的頁面,睡眠等待的進程·都處理完了,該把<br />//當前串連件從各種隊列中刪除,然後遞減相關計數,這樣就無法訪問到了.<br />//你懂的,這些隊列什麼的形成了一個掛載點所需要的所有串連</p><p>//將超級塊的後事處理一下,包括調用其super_operations函數通知相應的檔案系統<br />//然後刪除相關申請的緩衝什麼的。然後安全退出<br />kill_super(sb, umount_root);<br />return 0;<br />}</p><p>/*<br /> * Called with spinlock held, releases it.<br /> */<br />static void remove_vfsmnt(struct vfsmount *mnt)<br />{//本函數只是卸載相應裝置的多次安裝中的一次<br />/* First of all, remove it from all lists */<br />list_del(&mnt->mnt_instances);//把自己從超級塊的多次安裝中刪除,此成員是掛入超級塊的s_mounts上的。<br />list_del(&mnt->mnt_clash);//一個安裝點可以安裝多個檔案系統,mnt_clash掛入安裝點的dentry結構的d_vfsmount上<br />list_del(&mnt->mnt_list);//系統有個全域的vfsmntlist隊列,記錄系統中所有的串連件<br />list_del(&mnt->mnt_child);//從上一層安裝中刪除,相應的,自己也有一個mnt_mounts隊列表示安裝在我下面的裝置<br />spin_unlock(&dcache_lock);<br />/* Now we can work safely */<br />if (mnt->mnt_parent != mnt)//我的上一層裝置為我自己,那就說明這是根節點<br />mntput(mnt->mnt_parent);</p><p>dput(mnt->mnt_mountpoint);//指安裝點的dentry<br />dput(mnt->mnt_root);//裝置的根目錄dentry結構<br />if (mnt->mnt_devname)<br />kfree(mnt->mnt_devname);<br />kfree(mnt);<br />}</p><p>/*<br /> * Shrink the dcache for the specified super block.<br /> * This allows us to unmount a device without disturbing<br /> * the dcache for the other devices.<br /> *<br /> * This implementation makes just two traversals of the<br /> * unused list. On the first pass we move the selected<br /> * dentries to the most recent end, and on the second<br /> * pass we free them. The second pass must restart after<br /> * each dput(), but since the target dentries are all at<br /> * the end, it's really just a single traversal.<br /> */</p><p>/**<br /> * shrink_dcache_sb - shrink dcache for a superblock<br /> * @sb: superblock<br /> *<br /> * Shrink the dcache for the specified super block. This<br /> * is used to free the dcache before unmounting a file<br /> * system<br /> */<br />//刪除屬於給定超級塊的裝置的所有資料<br />void shrink_dcache_sb(struct super_block * sb)<br />{<br />struct list_head *tmp, *next;<br />struct dentry *dentry;</p><p>/*<br /> * Pass one ... move the dentries for the specified<br /> * superblock to the most recent end of the unused list.<br /> */<br />spin_lock(&dcache_lock);<br />next = dentry_unused.next;//系統全域的邋錮entry_unused資料結構隊列,緩衝所有暫時不用要刪除的資料<br />while (next != &dentry_unused) {<br />tmp = next;<br />next = tmp->next;<br />dentry = list_entry(tmp, struct dentry, d_lru);<br />if (dentry->d_sb != sb)//越過不屬於我們的<br />continue;<br />list_del(tmp);//從dentry_unused中刪除<br />list_add(tmp, &dentry_unused);//然後插入到節點之後,即第一個節點<br />}//經過這輪迭代,所有合格節點都已經相對倒敘的放在dentry_unused的前面部分了</p><p>/*<br /> * Pass two ... free the dentries for this superblock.<br /> */<br />repeat:<br />next = dentry_unused.next;<br />while (next != &dentry_unused) {//再來一次<br />tmp = next;<br />next = tmp->next;<br />dentry = list_entry(tmp, struct dentry, d_lru);<br />if (dentry->d_sb != sb)<br />continue;//這用的著嗎,直接退出就可以呀,因為上一趟已經把所有合格都放到前面了<br />if (atomic_read(&dentry->d_count))<br />continue;<br />dentry_stat.nr_unused--;//總數減少<br />list_del(tmp);//正式刪除了哈<br />INIT_LIST_HEAD(tmp);//這···用得著嗎?<br />prune_one_dentry(dentry);//把dentry處理一下,其實就是把相關的指標刪除,比如hash等<br />goto repeat;<br />}<br />spin_unlock(&dcache_lock);<br />}</p><p>static inline void prune_one_dentry(struct dentry * dentry)<br />{<br />struct dentry * parent;</p><p>list_del_init(&dentry->d_hash);<br />list_del(&dentry->d_child);<br />dentry_iput(dentry);<br />parent = dentry->d_parent;<br />d_free(dentry);<br />if (parent != dentry)//莫非這裡如果不成立,那就是說這是根目錄了?<br />dput(parent);<br />spin_lock(&dcache_lock);<br />}<br />//KULV:<br />//dev:裝置號<br />int fsync_dev(kdev_t dev)<br />{<br />//sync_buffers的第一趟,0表示對所有<br />sync_buffers(dev, 0);//第一趟,重新整理,不等待被鎖住的緩衝,過門不入</p><p>lock_kernel();<br />sync_supers(dev);//將超級塊也處理一下,實際上就是在super_blocks全域隊列中找到裝置號相同的<br />//然後如果相應檔案系統有對應的sb->s_op->write_super(sb),則調用,更新LRU<br />sync_inodes(dev);<br />DQUOT_SYNC(dev);<br />unlock_kernel();</p><p>return sync_buffers(dev, 1);//最後一趟,如果buf被鎖住了,<br />//那會進入等待狀態,有可能會引起重新調度<br />}</p><p>/* Call sync_buffers with wait!=0 to ensure that the call does not<br /> * return until all buffer writes have completed. Sync() may return<br /> * before the writes have finished; fsync() may not.<br /> */</p><p>/* Godamity-damn. Some buffers (bitmaps for filesystems)<br /> * spontaneously dirty themselves without ever brelse being called.<br /> * We will ultimately want to put these in a separate list, but for<br /> * now we search all of the lists for dirty buffers.<br /> */<br />static int sync_buffers(kdev_t dev, int wait)<br />{//第一趟wait為0,表示不等,第二趟為1,等<br />int i, retry, pass = 0, err = 0;<br />struct buffer_head * bh, *next;</p><p>/* One pass for no-wait, three for wait:<br /> * 0) write out all dirty, unlocked buffers;<br /> * 1) write out all dirty buffers, waiting if locked;<br /> * 2) wait for completion by waiting for all buffers to unlock.<br /> */<br />do {<br />retry = 0;</p><p>/* We search all lists as a failsafe mechanism, not because we expect<br /> * there to be dirty buffers on any of the other lists.<br /> */<br />repeat:<br />spin_lock(&lru_list_lock);<br />bh = lru_list[BUF_DIRTY];//取得髒的隊列頭,待會會被移到其他隊列中,比如被鎖定的<br />if (!bh)<br />goto repeat2;</p><p>//nr_buffers_type[BUF_DIRTY] 指相對每種buf的總數<br />for (i = nr_buffers_type[BUF_DIRTY]*2 ; i-- > 0 ; bh = next) {<br />next = bh->b_next_free;</p><p>if (!lru_list[BUF_DIRTY])//這···有可能嗎,上面不是檢查過bh了嗎···<br />break;<br />if (dev && bh->b_dev != dev)//忽略所有不屬於此裝置的。前面的dev用不著了吧?<br />continue;<br />if (buffer_locked(bh)) {//看是否是鎖住的,如果是,那麼進去<br />/* Buffer is locked; skip it unless wait is<br /> * requested AND pass > 0.<br /> */<br />if (!wait || !pass) {<br />retry = 1;//第一趟wait為0,不等,直接跳過,只是鎖住它<br />continue;<br />}<br />atomic_inc(&bh->b_count);<br />spin_unlock(&lru_list_lock);<br />wait_on_buffer (bh);<br />atomic_dec(&bh->b_count);<br />goto repeat;<br />}</p><p>//以下節點是沒有鎖住的<br />/* If an unlocked buffer is not uptodate, there has<br /> * been an IO error. Skip it.<br /> */<br />if (wait && buffer_req(bh) && !buffer_locked(bh) &&<br /> !buffer_dirty(bh) && !buffer_uptodate(bh)) {<br />err = -EIO;<br />continue;<br />}</p><p>/* Don't write clean buffers. Don't write ANY buffers<br /> * on the third pass.<br /> */<br />if (!buffer_dirty(bh) || pass >= 2)<br />continue;</p><p>atomic_inc(&bh->b_count);<br />spin_unlock(&lru_list_lock);<br />ll_rw_block(WRITE, 1, &bh);//刷到磁碟上面<br />atomic_dec(&bh->b_count);<br />retry = 1;<br />goto repeat;//成功刷了一次,為何要這樣重來?難道為了怕鏈表變動?<br />}</p><p> repeat2:<br />bh = lru_list[BUF_LOCKED];<br />if (!bh) {<br />spin_unlock(&lru_list_lock);<br />break;<br />}<br />for (i = nr_buffers_type[BUF_LOCKED]*2 ; i-- > 0 ; bh = next) {<br />next = bh->b_next_free;</p><p>if (!lru_list[BUF_LOCKED])<br />break;<br />if (dev && bh->b_dev != dev)<br />continue;<br />if (buffer_locked(bh)) {<br />/* Buffer is locked; skip it unless wait is<br /> * requested AND pass > 0.<br /> */<br />if (!wait || !pass) {<br />retry = 1;<br />continue;<br />}<br />atomic_inc(&bh->b_count);<br />spin_unlock(&lru_list_lock);<br />wait_on_buffer (bh); //等待buf被解鎖,會引起進程調度<br />spin_lock(&lru_list_lock);<br />atomic_dec(&bh->b_count);<br />goto repeat2;//這裡重新來迴圈是因為等待了之後list變化了嗎?<br />}<br />}<br />spin_unlock(&lru_list_lock);</p><p>/* If we are waiting for the sync to succeed, and if any dirty<br /> * blocks were written, then repeat; on the second pass, only<br /> * wait for buffers being written (do not pass to write any<br /> * more buffers on the second pass).<br /> */<br />} while (wait && retry && ++pass<=2);<br />return err;<br />}</p><p>/*<br /> * Note: check the dirty flag before waiting, so we don't<br /> * hold up the sync while mounting a device. (The newly<br /> * mounted device won't need syncing.)<br /> */<br />void sync_supers(kdev_t dev)<br />{<br />struct super_block * sb;</p><p>//super_blocks是系統全域的隊列,記錄了所有超級塊<br />for (sb = sb_entry(super_blocks.next);<br /> sb != sb_entry(&super_blocks);<br /> sb = sb_entry(sb->s_list.next)) {<br />if (!sb->s_dev)<br />continue;<br />if (dev && sb->s_dev != dev)//不是我的,就掠過<br />continue;<br />if (!sb->s_dirt)<br />continue;<br />lock_super(sb);<br />if (sb->s_dev && sb->s_dirt && (!dev || dev == sb->s_dev))<br />if (sb->s_op && sb->s_op->write_super)<br />sb->s_op->write_super(sb);<br />//如果相應的裝置提供了write_super的方法,那調用之。<br />//對於ext2檔案系統來說,這是ext2_write_super函數,其初始化如下<br />/*其實ext2檔案系統也沒幹啥事,就是更新了一下LRU,時間什麼的<br />static struct super_operations ext2_sops = {<br />read_inode:ext2_read_inode,<br />write_inode:ext2_write_inode,<br />put_inode:ext2_put_inode,<br />delete_inode:ext2_delete_inode,<br />put_super:ext2_put_super,<br />write_super:ext2_write_super,<br />statfs:ext2_statfs,<br />remount_fs:ext2_remount,<br />};*/<br />unlock_super(sb);<br />}<br />}</p><p>/**<br /> *sync_inodes<br /> *@dev: device to sync the inodes from.<br /> *<br /> *sync_inodes goes through the super block's dirty list,<br /> *writes them out, and puts them back on the normal list.<br /> */</p><p>void sync_inodes(kdev_t dev)<br />{<br />struct super_block * sb = sb_entry(super_blocks.next);</p><p>/*<br /> * Search the super_blocks array for the device(s) to sync.<br /> */<br />spin_lock(&inode_lock);<br />for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.next)) {<br />if (!sb->s_dev)<br />continue;<br />if (dev && sb->s_dev != dev)<br />continue;<br />//找到了屬於我的超塊<br />sync_list(&sb->s_dirty);//s_dirty表示屬於此超級塊(裝置)的髒了的inodes,全部寫回去</p><p>if (dev)<br />break;<br />}<br />spin_unlock(&inode_lock);<br />}</p><p>static inline void sync_list(struct list_head *head)<br />{<br />struct list_head * tmp;</p><p>while ((tmp = head->prev) != head)//無非就是得到每一個inode,然後重新整理之<br />sync_one(list_entry(tmp, struct inode, i_list), 0);<br />/*sync_one會吧給定inode的所有髒頁面,鎖住的頁面都耍到磁碟上去<br />然後喚醒所有等待在上面的進程<br />*/<br />}</p><p>static inline void sync_one(struct inode *inode, int sync)<br />{<br />if (inode->i_state & I_LOCK) {//不明白,鎖住的難道不髒?<br />__iget(inode);//如果被鎖住了,先遞增計數吧,免得待會被別人先登了<br />spin_unlock(&inode_lock);<br />__wait_on_inode(inode);//這是沒辦法的,必須等<br />iput(inode);//遞減技術<br />spin_lock(&inode_lock);<br />} else {<br />unsigned dirty;</p><p>list_del(&inode->i_list);//從隊列中刪除<br />list_add(&inode->i_list, atomic_read(&inode->i_count)<br />? &inode_in_use<br />: &inode_unused);//是放入在用的呢,還是不用的?不過都是緩衝<br />/* Set I_LOCK, reset I_DIRTY */<br />dirty = inode->i_state & I_DIRTY;//這是不是髒的呢<br />inode->i_state |= I_LOCK;//我鎖住了哈,你們別來碰<br />inode->i_state &= ~I_DIRTY;//去掉髒位<br />spin_unlock(&inode_lock);</p><p>filemap_fdatasync(inode->i_mapping);//你懂的,i_mapping 有大文章,緩衝的<br />//裡面其實是把mapping->dirty_pages,即這個inode的所有緩衝的髒頁面寫入到磁碟。</p><p>//剛才filemap_fdatasync寫完了所有的髒頁面,那麼,如果真的有髒的,通知一下相應檔案系統吧<br />/* Don't write the inode if only I_DIRTY_PAGES was set */<br />if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC))<br />write_inode(inode, sync);<br />//其實就是調用相應檔案系統的inode->i_sb->s_op->write_inode(inode, sync);<br />//把這個inode寫進去</p><p>//這一次,把鎖住的頁面locked_pages也重新整理了,不過會等待的___wait_on_page(page);<br />filemap_fdatawait(inode->i_mapping);</p><p>spin_lock(&inode_lock);<br />inode->i_state &= ~I_LOCK;<br />wake_up(&inode->i_wait);//有誰再我這上面睡找了,都醒來<br />}<br />}</p><p>