關於fuse使用者態檔案系統的文章有很多,比如http://my.debugman.net/program/fuse-180.html,就寫得很全面。但關於fuse使用者態、核心態通訊的文章還比較少,我現在發現的一篇是http://blog.chinaunix.net/uid-20687780-id-313603.html,主要講解了使用者態、核心態的通訊協定。
這裡主要分析一下fuse的核心態使用者態通訊機制。fuse的主要運行流程如所示:
當使用者態程式執行了POSIX的檔案系統操作,經過glibc,變換為系統調用傳遞給vfs,vfs再將其傳給FUSE的核心模組,FUSE的核心模組根據系統調用的類型,將請求發送到使用者態的FUSE進程,並等待使用者態進程的應答。FUSE核心模組再收到應答後,將其發送給vfs,把最終運行結果呈現到使用者態程式。
那FUSE是如何讓使用者態與核心態通訊的呢?這個在原始碼中可以看得比較清楚。
首先在,核心代碼fs/fuse/dev.c中,
/* 為fuse定義一個misc裝置 */static struct miscdevice fuse_miscdevice = { .minor = FUSE_MINOR, .name = "fuse", /* 生產的misc裝置將會出現在/dev/fuse */ .fops = &fuse_dev_operations,};int __init fuse_dev_init(void){ int err = -ENOMEM; fuse_req_cachep = kmem_cache_create("fuse_request", sizeof(struct fuse_req), 0, 0, NULL); if (!fuse_req_cachep) goto out; err = misc_register(&fuse_miscdevice); /* 註冊成misc裝置,misc裝置的主裝置號為10 */ if (err) goto out_cache_clean; return 0; out_cache_clean: kmem_cache_destroy(fuse_req_cachep); out: return err;}
通過調用fuse_dev_init函數,將會產生一個misc裝置(類似字元裝置,但主裝置號為10,並且會在/dev/目錄下根據裝置名稱,自動產生裝置檔案)在/dev/fuse下。使用者態代碼在通過open這個裝置檔案,並且通過如下函數,註冊向fuse核心態通訊的函數:
struct fuse_chan *fuse_kern_chan_new(int fd){struct fuse_chan_ops op = {.receive = fuse_kern_chan_receive,.send = fuse_kern_chan_send,.destroy = fuse_kern_chan_destroy,};size_t bufsize = getpagesize() + 0x1000;bufsize = bufsize < MIN_BUFSIZE ? MIN_BUFSIZE : bufsize;return fuse_chan_new(&op, fd, bufsize, NULL);}
fuse_kern_chan_receive函數,通過res = read(fuse_chan_fd(ch), buf, size);從/dev/fuse中讀取核心發來的情求,再通過fuse_kern_chan_send函數中的ssize_t
res = writev(fuse_chan_fd(ch), iov, count);將資料發送到核心模組。
再回到核心模組,還是fs/fuse/dev.c檔案中,FUSE通過為/dev/fuse裝置檔案註冊以下操作回調來支援使用者態的對其的讀寫操作:
const struct file_operations fuse_dev_operations = {.owner= THIS_MODULE,.llseek= no_llseek, /* 不支援seek操作 */.read= do_sync_read, /* 使用通用的同步讀函數 */.aio_read= fuse_dev_read, /* fuse為使用者態讀取提供的非同步函數 */.write= do_sync_write, /* 使用通用的同步寫函授 */.aio_write= fuse_dev_write, /* fuse為使用者態讀取提供的非同步函授 */.poll= fuse_dev_poll, /* 檢查是否在一個檔案上有操作發生,如果沒有則睡眠,直到該檔案上有操作發生*/.release= fuse_dev_release, /* 使用者態close該裝置檔案對應的fd */.fasync= fuse_dev_fasync, /* 通過訊號來啟用或禁止I/O事件通告*/};
其中,do_sync_read中,調用了ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos),同樣do_sync_write函數中,也調用了ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos),所以他們不用單獨實現。
在FUSE核心中,存在一個fuse_conn結構體,為使用者態、核心態通訊服務,其結構為:
/** * A Fuse connection. * * This structure is created, when the filesystem is mounted, and is * destroyed, when the client device is closed and the filesystem is * unmounted. */struct fuse_conn {/** Lock protecting accessess to members of this structure */spinlock_t lock;/** Mutex protecting against directory alias creation */struct mutex inst_mutex;/** Refcount 結構體的引用計數*/atomic_t count;/** The user id for this mount 使用者ID*/uid_t user_id;/** The group id for this mount 組ID*/gid_t group_id;/** The fuse mount flags for this mount 掛載參數*/unsigned flags;/** Maximum read size 最大讀取位元組數*/unsigned max_read;/** Maximum write size 最大寫入位元組數*/unsigned max_write;/** Readers of the connection are waiting on this 讀取請求的等待隊列*/wait_queue_head_t waitq;/** The list of pending requests 正在等待的隊列*/struct list_head pending;/** The list of requests being processed 正在處理的隊列*/struct list_head processing;/** The list of requests under I/O 進行中IO操作的隊列*/struct list_head io;/** The next unique kernel file handle */u64 khctr;/** rbtree of fuse_files waiting for poll events indexed by ph */struct rb_root polled_files;/** Maximum number of outstanding background requests 最大後台請求數*/unsigned max_background;/** Number of background requests at which congestion starts */unsigned congestion_threshold;/** Number of requests currently in the background 後台請求數*/unsigned num_background;/** Number of background requests currently queued for userspace 正在執行的後台請求數*/unsigned active_background;/** The list of background requests set aside for later queuing */struct list_head bg_queue;/** Pending interrupts 插斷要求隊列*/struct list_head interrupts;/** Flag indicating if connection is blocked. This will be the case before the INIT reply is received, and if there are too many outstading backgrounds requests 阻塞標誌*/int blocked;/** waitq for blocked connection 阻塞等待隊列*/wait_queue_head_t blocked_waitq;/** waitq for reserved requests 等待服務的隊列*/wait_queue_head_t reserved_req_waitq;/** The next unique request id */u64 reqctr;/** Connection established, cleared on umount, connection abort and device release 串連標誌*/unsigned connected;/** Connection failed (version mismatch). Cannot race with setting other bitfields since it is only set once in INIT reply, before any other request, and never cleared */unsigned conn_error:1;/** Connection successful. Only set in INIT */unsigned conn_init:1;/** Do readpages asynchronously? Only set in INIT */unsigned async_read:1;/** Do not send separate SETATTR request before open(O_TRUNC) */unsigned atomic_o_trunc:1;/** Filesystem supports NFS exporting. Only set in INIT */unsigned export_support:1;/** Set if bdi is valid */unsigned bdi_initialized:1;/* * The following bitfields are only for optimization purposes * and hence races in setting them will not cause malfunction *//** Is fsync not implemented by fs? */unsigned no_fsync:1;/** Is fsyncdir not implemented by fs? */unsigned no_fsyncdir:1;/** Is flush not implemented by fs? */unsigned no_flush:1;/** Is setxattr not implemented by fs? */unsigned no_setxattr:1;/** Is getxattr not implemented by fs? */unsigned no_getxattr:1;/** Is listxattr not implemented by fs? */unsigned no_listxattr:1;/** Is removexattr not implemented by fs? */unsigned no_removexattr:1;/** Are file locking primitives not implemented by fs? */unsigned no_lock:1;/** Is access not implemented by fs? */unsigned no_access:1;/** Is create not implemented by fs? */unsigned no_create:1;/** Is interrupt not implemented by fs? */unsigned no_interrupt:1;/** Is bmap not implemented by fs? */unsigned no_bmap:1;/** Is poll not implemented by fs? */unsigned no_poll:1;/** Do multi-page cached writes */unsigned big_writes:1;/** Don't apply umask to creation modes */unsigned dont_mask:1;/** The number of requests waiting for completion */atomic_t num_waiting;/** Negotiated minor version */unsigned minor;/** Backing dev info */struct backing_dev_info bdi;/** Entry on the fuse_conn_list */struct list_head entry;/** Device ID from super block 超級塊的裝置id*/dev_t dev;/** Dentries in the control filesystem */struct dentry *ctl_dentry[FUSE_CTL_NUM_DENTRIES];/** number of dentries used in the above array */int ctl_ndents;/** O_ASYNC requests */struct fasync_struct *fasync;/** Key for lock owner ID scrambling */u32 scramble_key[4];/** Reserved request for the DESTROY message */struct fuse_req *destroy_req;/** Version counter for attribute changes 檔案屬性的版本*/u64 attr_version;/** Called on final put */void (*release)(struct fuse_conn *);/** Super block for this connection. */struct super_block *sb;/** Read/write semaphore to hold when accessing sb. 訪問超級塊的訊號量*/struct rw_semaphore killsb;};
fuse_conn結構體的指標將會儲存在file->private_data中,每次核心態向使用者態發送情求時都會用到fuse_conn結構體。在fuse_dev_read函數的處理流程主要入下:
static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos){ struct fuse_in *in; /* 用來表示使用者態讀入的核心 *///省略變數定義struct fuse_conn *fc = fuse_get_conn(file); /* 獲得fuse_conn結構體的指標 */if (!fc)return -EPERM; restart:spin_lock(&fc->lock);err = -EAGAIN;if ((file->f_flags & O_NONBLOCK) && fc->connected && !request_pending(fc)) //如果是非阻塞方式,則判斷隊列中有無等待處理請求,無請求則直接返回goto err_unlock;request_wait(fc); //阻塞等待核心態的請求到了 ......if (!list_empty(&fc->interrupts)) { //判斷是否有插斷要求需要發送,有則先發插斷要求req = list_entry(fc->interrupts.next, struct fuse_req, intr_entry);return fuse_read_interrupt(fc, req, iov, nr_segs);}req = list_entry(fc->pending.next, struct fuse_req, list); //從pending隊列中獲得下一個要發生的請求req->state = FUSE_REQ_READING;list_move(&req->list, &fc->io); //將請求移動到進行中IO的隊列中in = &req->in;reqsize = in->h.len;/* If request is too large, reply with an error and restart the read */........spin_unlock(&fc->lock);fuse_copy_init(&cs, fc, 1, req, iov, nr_segs); //為將請求拷貝到使用者態做準備err = fuse_copy_one(&cs, &in->h, sizeof(in->h)); //將請求的包頭拷貝到使用者態if (!err)err = fuse_copy_args(&cs, in->numargs, in->argpages, (struct fuse_arg *) in->args, 0); //將請求的包體拷貝到使用者態,如果包中有多個參數,則需要迴圈將參數拷完fuse_copy_finish(&cs); //完成拷貝,釋放記憶體spin_lock(&fc->lock);req->locked = 0;//對發送過程進行錯誤判斷,省略....if (!req->isreply) //如果沒有傳回值,則結束請求request_end(fc, req);else {req->state = FUSE_REQ_SENT; //如果這個請求需要使用者態返回執行結果list_move_tail(&req->list, &fc->processing); //則將請求轉到processing隊列中,交給fuse_dev_write來處理if (req->interrupted)queue_interrupt(fc, req);spin_unlock(&fc->lock);}return reqsize; err_unlock:spin_unlock(&fc->lock);return err;}
其中fuse_in結構體如下所示:
/** The request input */struct fuse_in {/** The request header 指令的頭部*/struct fuse_in_header h; /** True if the data for the last argument is in req->pages */unsigned argpages:1;/** Number of arguments 這條指令中包含的參數個數*/unsigned numargs;/** Array of arguments 參數的數組*/struct fuse_in_arg args[3];};
此結構體中,包含的另外兩個結構體
struct fuse_in_header {__u32len; //包的長度__u32opcode; //作業碼,用來表示操作類型__u64unique; //此包的唯一編號__u64nodeid; //表示操作檔案節點的id,類似ino__u32uid;__u32gid;__u32pid;__u32padding; //是否處於掛起狀態 ???};
/** One input argument of a request */struct fuse_in_arg {unsigned size; //參數的長度const void *value; //參數的指標};