標籤:pipe ipc
Linux pipe 源碼分析
管道pipe作為Unix中曆史最悠久的IPC機制,存在各個版本的Unix中,主要用於父子進程之間的通訊(使用fork,從而子進程會獲得父進程的開啟檔案表),pipe()系統調用底層的實現就相當於一個特殊的檔案系統,每次調用的時候建立一個inode關聯著兩個file,一個用於讀,一個用於寫,從而實現資料的單向流動。
使用者層API:
#include <unistd.h> int pipe(int pipefd[2]); #define _GNU_SOURCE /* See feature_test_macros(7) */ #include <unistd.h> int pipe2(int pipefd[2], int flags);
核心源碼路徑如下:
// sys_pipe(.......)SYSCALL_DEFINE1(pipe, int __user *, fildes){ return sys_pipe2(fildes, 0);}SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags){ struct file *files[2]; int fd[2]; int error; // 核心是do_pipe error = __do_pipe_flags(fd, files, flags); if (!error) { // 一切準備就緒後 把剛才和管道關聯的2個fd拷貝到使用者空間 if (unlikely(copy_to_user(fildes, fd, sizeof(fd)))) { fput(files[0]); fput(files[1]); put_unused_fd(fd[0]); put_unused_fd(fd[1]); error = -EFAULT; } else { // 把fd和file的映射關係更新到該進程的檔案描述表中fdtable fd_install(fd[0], files[0]); fd_install(fd[1], files[1]); } } return error;}static int __do_pipe_flags(int *fd, struct file **files, int flags){ int error; int fdw, fdr; if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT)) return -EINVAL; // 為該管道建立倆struct file error = create_pipe_files(files, flags); if (error) return error; // 獲得兩個能用的檔案描述符 error = get_unused_fd_flags(flags); if (error < 0) goto err_read_pipe; fdr = error; error = get_unused_fd_flags(flags); if (error < 0) goto err_fdr; fdw = error; audit_fd_pair(fdr, fdw); fd[0] = fdr; fd[1] = fdw; return 0;err_fdr: put_unused_fd(fdr);err_read_pipe: fput(files[0]); fput(files[1]); return error;}/** 為管道建立兩個file執行個體*/int create_pipe_files(struct file **res, int flags){ int err; // 為pipe建立一個inode並做一定的初始化 struct inode *inode = get_pipe_inode(); struct file *f; struct path path; static struct qstr name = { .name = "" }; // quick string ?? if (!inode) return -ENFILE; err = -ENOMEM; // 分配一個directory entry path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &name); if (!path.dentry) goto err_inode; path.mnt = mntget(pipe_mnt); // 引用計數加1 d_instantiate(path.dentry, inode); err = -ENFILE; f = alloc_file(&path, FMODE_WRITE, &pipefifo_fops); if (IS_ERR(f)) goto err_dentry; f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)); f->private_data = inode->i_pipe; // 所以你會明白 fd[0]是讀 fd[1]是寫 res[0] = alloc_file(&path, FMODE_READ, &pipefifo_fops); if (IS_ERR(res[0])) goto err_file; path_get(&path); res[0]->private_data = inode->i_pipe; res[0]->f_flags = O_RDONLY | (flags & O_NONBLOCK); res[1] = f; return 0;err_file: put_filp(f);err_dentry: free_pipe_info(inode->i_pipe); path_put(&path); return err;err_inode: free_pipe_info(inode->i_pipe); iput(inode); return err;}static struct inode * get_pipe_inode(void){ struct inode *inode = new_inode_pseudo(pipe_mnt->mnt_sb); struct pipe_inode_info *pipe; if (!inode) goto fail_inode; // 分配一個inode號 inode->i_ino = get_next_ino(); // 分配一個pipe的核心級對象 pipe = alloc_pipe_info(); if (!pipe) goto fail_iput; inode->i_pipe = pipe; pipe->files = 2; pipe->readers = pipe->writers = 1; inode->i_fop = &pipefifo_fops; /* * Mark the inode dirty from the very beginning, * that way it will never be moved to the dirty * list because "mark_inode_dirty()" will think * that it already _is_ on the dirty list. */ inode->i_state = I_DIRTY; inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; return inode;fail_iput: iput(inode);fail_inode: return NULL;}// 針對pipe的檔案操作執行個體const struct file_operations pipefifo_fops = { .open = fifo_open, .llseek = no_llseek, .read = new_sync_read, .read_iter = pipe_read, .write = new_sync_write, .write_iter = pipe_write, .poll = pipe_poll, .unlocked_ioctl = pipe_ioctl, .release = pipe_release, .fasync = pipe_fasync,};
整體的邏輯圖可以這樣:
TODO:具體讀寫的實現細節new_sync_read/write()有待分析。
參考:(1)Linux kernel 3.18 source code (2)Linux man page(3)Linux核心源碼情景分析
Linux pipe 源碼分析