背景
参考:Google文档 FUSE 透传
参考此文档,目前kernel.org提供的fuse passthrough补丁在6.9版本之后,但想要在5.4和5.15版本内核做移植应该如何简单点呢?文档中提到 Android的内核为5.4 和 5.15版本内核做了fuse passthrough功能,可以参考Android提供的内核版本来修改对应kernel.org发布的版本。(本文提供一个思路)
还可以参考:https://lore.kernel.org/lkml/20210125153057.3623715-1-balsini@android.com/
1 下载Android系统版本的Linux kernel
可以从这里使用repo下载对应版本的内核,可以随便找一个5.4或5.15版内核,https://android.googlesource.com/kernel/manifest,关于如何使用repo下载可在网上搜索,这里不过多介绍。
2 如果不想麻烦,我会提供我移植测试过的patch供分享
2.1 linux kernel 5.4版本patch (fuse-passthrough-on-kernel5.4.290+.patch)
diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
index 3e8cebfb5..6971454a2 100644
--- a/fs/fuse/Makefile
+++ b/fs/fuse/Makefile
@@ -8,4 +8,5 @@ obj-$(CONFIG_CUSE) += cuse.oobj-$(CONFIG_VIRTIO_FS) += virtiofs.ofuse-objs := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o
+fuse-objs += passthrough.ovirtiofs-y += virtio_fs.o
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 2d02008ec..ba9ec9c21 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -2257,37 +2257,50 @@ static int fuse_device_clone(struct fuse_conn *fc, struct file *new)static long fuse_dev_ioctl(struct file *file, unsigned int cmd,unsigned long arg){
- int err = -ENOTTY;
-
- if (cmd == FUSE_DEV_IOC_CLONE) {
- int oldfd;
-
- err = -EFAULT;
- if (!get_user(oldfd, (__u32 __user *) arg)) {
- struct file *old = fget(oldfd);
-
- err = -EINVAL;
- if (old) {
- struct fuse_dev *fud = NULL;
-
- /*
- * Check against file->f_op because CUSE
- * uses the same ioctl handler.
- */
- if (old->f_op == file->f_op &&
- old->f_cred->user_ns == file->f_cred->user_ns)
- fud = fuse_get_dev(old);
-
- if (fud) {
- mutex_lock(&fuse_mutex);
- err = fuse_device_clone(fud->fc, file);
- mutex_unlock(&fuse_mutex);
- }
- fput(old);
- }
- }
- }
- return err;
+ int res;
+ int oldfd;
+ struct fuse_dev *fud = NULL;
+
+ switch (cmd) {
+ case FUSE_DEV_IOC_CLONE:
+ res = -EFAULT;
+ if (!get_user(oldfd, (__u32 __user *)arg)) {
+ struct file *old = fget(oldfd);
+
+ res = -EINVAL;
+ if (old) {
+ /*
+ * Check against file->f_op because CUSE
+ * uses the same ioctl handler.
+ */
+ if (old->f_op == file->f_op &&
+ old->f_cred->user_ns ==
+ file->f_cred->user_ns)
+ fud = fuse_get_dev(old);
+
+ if (fud) {
+ mutex_lock(&fuse_mutex);
+ res = fuse_device_clone(fud->fc, file);
+ mutex_unlock(&fuse_mutex);
+ }
+ fput(old);
+ }
+ }
+ break;
+ case FUSE_DEV_IOC_PASSTHROUGH_OPEN:
+ res = -EFAULT;
+ if (!get_user(oldfd, (__u32 __user *)arg)) {
+ res = -EINVAL;
+ fud = fuse_get_dev(file);
+ if (fud)
+ res = fuse_passthrough_open(fud, oldfd);
+ }
+ break;
+ default:
+ res = -ENOTTY;
+ break;
+ }
+ return res;}const struct file_operations fuse_dev_operations = {
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index b2f37809f..fd2f0f635 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -492,6 +492,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,ff->fh = outopen.fh;ff->nodeid = outentry.nodeid;ff->open_flags = outopen.open_flags;
+ fuse_passthrough_setup(fc, ff, &outopen);inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,&outentry.attr, entry_attr_timeout(&outentry), 0);if (!inode) {
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index d157eef3e..8979d05dc 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -151,7 +151,7 @@ int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,if (!err) {ff->fh = outarg.fh;ff->open_flags = outarg.open_flags;
-
+ fuse_passthrough_setup(fc, ff, &outarg);} else if (err != -ENOSYS) {fuse_file_free(ff);return err;
@@ -287,6 +287,8 @@ void fuse_release_common(struct file *file, bool isdir)struct fuse_release_args *ra = ff->release_args;int opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE;+ fuse_passthrough_release(&ff->passthrough);
+fuse_prepare_release(fi, ff, file->f_flags, opcode);if (ff->flock) {
@@ -1596,10 +1598,12 @@ static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to)if (fuse_is_bad(file_inode(file)))return -EIO;- if (!(ff->open_flags & FOPEN_DIRECT_IO))
- return fuse_cache_read_iter(iocb, to);
- else
- return fuse_direct_read_iter(iocb, to);
+ if (ff->passthrough.filp)
+ return fuse_passthrough_read_iter(iocb, to);
+ else if (!(ff->open_flags & FOPEN_DIRECT_IO))
+ return fuse_cache_read_iter(iocb, to);
+ else
+ return fuse_direct_read_iter(iocb, to);}static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
@@ -1610,10 +1614,12 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)if (fuse_is_bad(file_inode(file)))return -EIO;- if (!(ff->open_flags & FOPEN_DIRECT_IO))
- return fuse_cache_write_iter(iocb, from);
- else
- return fuse_direct_write_iter(iocb, from);
+ if (ff->passthrough.filp)
+ return fuse_passthrough_write_iter(iocb, from);
+ else if (!(ff->open_flags & FOPEN_DIRECT_IO))
+ return fuse_cache_write_iter(iocb, from);
+ else
+ return fuse_direct_write_iter(iocb, from);}static void fuse_writepage_free(struct fuse_writepage_args *wpa)
@@ -2330,6 +2336,9 @@ static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma){struct fuse_file *ff = file->private_data;+ if (ff->passthrough.filp)
+ return fuse_passthrough_mmap(file, vma);
+if (ff->open_flags & FOPEN_DIRECT_IO) {/* Can't provide the coherency needed for MAP_SHARED */if (vma->vm_flags & VM_MAYSHARE)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index e3b3dc686..9d4b0af5f 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -165,6 +165,17 @@ enum {struct fuse_conn;struct fuse_release_args;+/**
+ * Reference to lower filesystem file for read/write operations handled in
+ * passthrough mode.
+ * This struct also tracks the credentials to be used for handling read/write
+ * operations.
+ */
+struct fuse_passthrough {
+ struct file *filp;
+ struct cred *cred;
+};
+/** FUSE specific file data */struct fuse_file {/** Fuse connection for this file */
@@ -210,6 +221,9 @@ struct fuse_file {} readdir;+ /** Container for data related to the passthrough functionality */
+ struct fuse_passthrough passthrough;
+/** RB node to be linked on fuse_conn->polled_files */struct rb_node polled_node;@@ -723,6 +737,9 @@ struct fuse_conn {/* Do not show mount options */unsigned int no_mount_options:1;+ /** Passthrough mode for read/write IO */
+ unsigned int passthrough:1;
+/** The number of requests waiting for completion */atomic_t num_waiting;@@ -758,6 +775,12 @@ struct fuse_conn {/** List of device instances belonging to this connection */struct list_head devices;
+
+ /** IDR for passthrough requests */
+ struct idr passthrough_req;
+
+ /** Protects passthrough_req */
+ spinlock_t passthrough_req_lock;};static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
@@ -1107,4 +1130,13 @@ unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args);u64 fuse_get_unique(struct fuse_iqueue *fiq);void fuse_free_conn(struct fuse_conn *fc);+/* passthrough.c */
+int fuse_passthrough_open(struct fuse_dev *fud, u32 lower_fd);
+int fuse_passthrough_setup(struct fuse_conn *fc, struct fuse_file *ff,
+ struct fuse_open_out *openarg);
+void fuse_passthrough_release(struct fuse_passthrough *passthrough);
+ssize_t fuse_passthrough_read_iter(struct kiocb *iocb, struct iov_iter *to);
+ssize_t fuse_passthrough_write_iter(struct kiocb *iocb, struct iov_iter *from);
+ssize_t fuse_passthrough_mmap(struct file *file, struct vm_area_struct *vma);
+#endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 287e850fb..bdaaed31c 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -631,6 +631,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns,memset(fc, 0, sizeof(*fc));spin_lock_init(&fc->lock);spin_lock_init(&fc->bg_lock);
+ spin_lock_init(&fc->passthrough_req_lock);init_rwsem(&fc->killsb);refcount_set(&fc->count, 1);atomic_set(&fc->dev_count, 1);
@@ -639,6 +640,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns,INIT_LIST_HEAD(&fc->bg_queue);INIT_LIST_HEAD(&fc->entry);INIT_LIST_HEAD(&fc->devices);
+ idr_init(&fc->passthrough_req);atomic_set(&fc->num_waiting, 0);fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND;fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD;
@@ -975,6 +977,12 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_args *args,min_t(unsigned int, FUSE_MAX_MAX_PAGES,max_t(unsigned int, arg->max_pages, 1));}
+ if (arg->flags & FUSE_PASSTHROUGH) {
+ fc->passthrough = 1;
+ /* Prevent further stacking */
+ fc->sb->s_stack_depth =
+ FILESYSTEM_MAX_STACK_DEPTH;
+ }} else {ra_pages = fc->max_read / PAGE_SIZE;fc->no_lock = 1;
@@ -1012,7 +1020,8 @@ void fuse_send_init(struct fuse_conn *fc)FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT |FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL |FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS |
- FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA;
+ FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA |
+ FUSE_PASSTHROUGH;ia->args.opcode = FUSE_INIT;ia->args.in_numargs = 1;ia->args.in_args[0].size = sizeof(ia->in);
@@ -1033,9 +1042,21 @@ void fuse_send_init(struct fuse_conn *fc)}EXPORT_SYMBOL_GPL(fuse_send_init);+static int free_fuse_passthrough(int id, void *p, void *data)
+{
+ struct fuse_passthrough *passthrough = (struct fuse_passthrough *)p;
+
+ fuse_passthrough_release(passthrough);
+ kfree(p);
+
+ return 0;
+}
+void fuse_free_conn(struct fuse_conn *fc){WARN_ON(!list_empty(&fc->devices));
+ idr_for_each(&fc->passthrough_req, free_fuse_passthrough, NULL);
+ idr_destroy(&fc->passthrough_req);kfree_rcu(fc, rcu);}EXPORT_SYMBOL_GPL(fuse_free_conn);
diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c
new file mode 100644
index 000000000..95368ddfb
--- /dev/null
+++ b/fs/fuse/passthrough.c
@@ -0,0 +1,305 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "fuse_i.h"
+
+#include <linux/fuse.h>
+#include <linux/idr.h>
+#include <linux/uio.h>
+
+#define PASSTHROUGH_IOCB_MASK \
+ (IOCB_APPEND | IOCB_DSYNC | IOCB_HIPRI | IOCB_NOWAIT | IOCB_SYNC)
+
+struct fuse_aio_req {
+ struct kiocb iocb;
+ struct kiocb *iocb_fuse;
+};
+
+static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
+ struct file *filp)
+{
+ *kiocb = (struct kiocb){
+ .ki_filp = filp,
+ .ki_flags = kiocb_src->ki_flags,
+ .ki_hint = kiocb_src->ki_hint,
+ .ki_ioprio = kiocb_src->ki_ioprio,
+ .ki_pos = kiocb_src->ki_pos,
+ };
+}
+
+static void fuse_file_accessed(struct file *dst_file, struct file *src_file)
+{
+ struct inode *dst_inode;
+ struct inode *src_inode;
+
+ if (dst_file->f_flags & O_NOATIME)
+ return;
+
+ dst_inode = file_inode(dst_file);
+ src_inode = file_inode(src_file);
+
+ if ((!timespec64_equal(&dst_inode->i_mtime, &src_inode->i_mtime) ||
+ !timespec64_equal(&dst_inode->i_ctime, &src_inode->i_ctime))) {
+ dst_inode->i_mtime = src_inode->i_mtime;
+ dst_inode->i_ctime = src_inode->i_ctime;
+ }
+
+ touch_atime(&dst_file->f_path);
+}
+
+static void fuse_copyattr(struct file *dst_file, struct file *src_file)
+{
+ struct inode *dst = file_inode(dst_file);
+ struct inode *src = file_inode(src_file);
+
+ dst->i_atime = src->i_atime;
+ dst->i_mtime = src->i_mtime;
+ dst->i_ctime = src->i_ctime;
+ i_size_write(dst, i_size_read(src));
+}
+
+static void fuse_aio_cleanup_handler(struct fuse_aio_req *aio_req)
+{
+ struct kiocb *iocb = &aio_req->iocb;
+ struct kiocb *iocb_fuse = aio_req->iocb_fuse;
+
+ if (iocb->ki_flags & IOCB_WRITE) {
+ __sb_writers_acquired(file_inode(iocb->ki_filp)->i_sb,
+ SB_FREEZE_WRITE);
+ file_end_write(iocb->ki_filp);
+ fuse_copyattr(iocb_fuse->ki_filp, iocb->ki_filp);
+ }
+
+ iocb_fuse->ki_pos = iocb->ki_pos;
+ kfree(aio_req);
+}
+
+static void fuse_aio_rw_complete(struct kiocb *iocb, long res, long res2)
+{
+ struct fuse_aio_req *aio_req =
+ container_of(iocb, struct fuse_aio_req, iocb);
+ struct kiocb *iocb_fuse = aio_req->iocb_fuse;
+
+ fuse_aio_cleanup_handler(aio_req);
+ iocb_fuse->ki_complete(iocb_fuse, res, res2);
+}
+
+ssize_t fuse_passthrough_read_iter(struct kiocb *iocb_fuse,
+ struct iov_iter *iter)
+{
+ ssize_t ret;
+ const struct cred *old_cred;
+ struct file *fuse_filp = iocb_fuse->ki_filp;
+ struct fuse_file *ff = fuse_filp->private_data;
+ struct file *passthrough_filp = ff->passthrough.filp;
+
+ if (!iov_iter_count(iter))
+ return 0;
+
+ old_cred = override_creds(ff->passthrough.cred);
+ if (is_sync_kiocb(iocb_fuse)) {
+ ret = vfs_iter_read(passthrough_filp, iter, &iocb_fuse->ki_pos,
+ iocb_to_rw_flags(iocb_fuse->ki_flags,
+ PASSTHROUGH_IOCB_MASK));
+ } else {
+ struct fuse_aio_req *aio_req;
+
+ aio_req = kmalloc(sizeof(struct fuse_aio_req), GFP_KERNEL);
+ if (!aio_req) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ aio_req->iocb_fuse = iocb_fuse;
+ kiocb_clone(&aio_req->iocb, iocb_fuse, passthrough_filp);
+ aio_req->iocb.ki_complete = fuse_aio_rw_complete;
+ ret = call_read_iter(passthrough_filp, &aio_req->iocb, iter);
+ if (ret != -EIOCBQUEUED)
+ fuse_aio_cleanup_handler(aio_req);
+ }
+out:
+ revert_creds(old_cred);
+
+ fuse_file_accessed(fuse_filp, passthrough_filp);
+
+ return ret;
+}
+
+ssize_t fuse_passthrough_write_iter(struct kiocb *iocb_fuse,
+ struct iov_iter *iter)
+{
+ ssize_t ret;
+ const struct cred *old_cred;
+ struct file *fuse_filp = iocb_fuse->ki_filp;
+ struct fuse_file *ff = fuse_filp->private_data;
+ struct inode *fuse_inode = file_inode(fuse_filp);
+ struct file *passthrough_filp = ff->passthrough.filp;
+ struct inode *passthrough_inode = file_inode(passthrough_filp);
+
+ if (!iov_iter_count(iter))
+ return 0;
+
+ inode_lock(fuse_inode);
+
+ fuse_copyattr(fuse_filp, passthrough_filp);
+
+ old_cred = override_creds(ff->passthrough.cred);
+ if (is_sync_kiocb(iocb_fuse)) {
+ file_start_write(passthrough_filp);
+ ret = vfs_iter_write(passthrough_filp, iter, &iocb_fuse->ki_pos,
+ iocb_to_rw_flags(iocb_fuse->ki_flags,
+ PASSTHROUGH_IOCB_MASK));
+ file_end_write(passthrough_filp);
+ if (ret > 0)
+ fuse_copyattr(fuse_filp, passthrough_filp);
+ } else {
+ struct fuse_aio_req *aio_req;
+
+ aio_req = kmalloc(sizeof(struct fuse_aio_req), GFP_KERNEL);
+ if (!aio_req) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ file_start_write(passthrough_filp);
+ __sb_writers_release(passthrough_inode->i_sb, SB_FREEZE_WRITE);
+
+ aio_req->iocb_fuse = iocb_fuse;
+ kiocb_clone(&aio_req->iocb, iocb_fuse, passthrough_filp);
+ aio_req->iocb.ki_complete = fuse_aio_rw_complete;
+ ret = call_write_iter(passthrough_filp, &aio_req->iocb, iter);
+ if (ret != -EIOCBQUEUED)
+ fuse_aio_cleanup_handler(aio_req);
+ }
+out:
+ revert_creds(old_cred);
+ inode_unlock(fuse_inode);
+
+ return ret;
+}
+
+ssize_t fuse_passthrough_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ int ret;
+ const struct cred *old_cred;
+ struct fuse_file *ff = file->private_data;
+ struct file *passthrough_filp = ff->passthrough.filp;
+
+ if (!passthrough_filp->f_op->mmap)
+ return -ENODEV;
+
+ if (WARN_ON(file != vma->vm_file))
+ return -EIO;
+
+ vma->vm_file = get_file(passthrough_filp);
+
+ old_cred = override_creds(ff->passthrough.cred);
+ ret = call_mmap(vma->vm_file, vma);
+ revert_creds(old_cred);
+
+ if (ret)
+ fput(passthrough_filp);
+ else
+ fput(file);
+
+ fuse_file_accessed(file, passthrough_filp);
+
+ return ret;
+}
+
+int fuse_passthrough_open(struct fuse_dev *fud, u32 lower_fd)
+{
+ int res;
+ struct file *passthrough_filp;
+ struct fuse_conn *fc = fud->fc;
+ struct inode *passthrough_inode;
+ struct super_block *passthrough_sb;
+ struct fuse_passthrough *passthrough;
+
+ if (!fc->passthrough)
+ return -EPERM;
+
+ passthrough_filp = fget(lower_fd);
+ if (!passthrough_filp) {
+ pr_err("FUSE: invalid file descriptor for passthrough.\n");
+ return -EBADF;
+ }
+
+ if (!passthrough_filp->f_op->read_iter ||
+ !passthrough_filp->f_op->write_iter) {
+ pr_err("FUSE: passthrough file misses file operations.\n");
+ res = -EBADF;
+ goto err_free_file;
+ }
+
+ passthrough_inode = file_inode(passthrough_filp);
+ passthrough_sb = passthrough_inode->i_sb;
+ if (passthrough_sb->s_stack_depth >= FILESYSTEM_MAX_STACK_DEPTH) {
+ pr_err("FUSE: fs stacking depth exceeded for passthrough\n");
+ res = -EINVAL;
+ goto err_free_file;
+ }
+
+ passthrough = kmalloc(sizeof(struct fuse_passthrough), GFP_KERNEL);
+ if (!passthrough) {
+ res = -ENOMEM;
+ goto err_free_file;
+ }
+
+ passthrough->filp = passthrough_filp;
+ passthrough->cred = prepare_creds();
+
+ idr_preload(GFP_KERNEL);
+ spin_lock(&fc->passthrough_req_lock);
+ res = idr_alloc(&fc->passthrough_req, passthrough, 1, 0, GFP_ATOMIC);
+ spin_unlock(&fc->passthrough_req_lock);
+ idr_preload_end();
+
+ if (res > 0)
+ return res;
+
+ fuse_passthrough_release(passthrough);
+ kfree(passthrough);
+
+err_free_file:
+ fput(passthrough_filp);
+
+ return res;
+}
+
+int fuse_passthrough_setup(struct fuse_conn *fc, struct fuse_file *ff,
+ struct fuse_open_out *openarg)
+{
+ struct fuse_passthrough *passthrough;
+ int passthrough_fh = openarg->passthrough_fh;
+
+ if (!fc->passthrough)
+ return -EPERM;
+
+ /* Default case, passthrough is not requested */
+ if (passthrough_fh <= 0)
+ return -EINVAL;
+
+ spin_lock(&fc->passthrough_req_lock);
+ passthrough = idr_remove(&fc->passthrough_req, passthrough_fh);
+ spin_unlock(&fc->passthrough_req_lock);
+
+ if (!passthrough)
+ return -EINVAL;
+
+ ff->passthrough = *passthrough;
+ kfree(passthrough);
+
+ return 0;
+}
+
+void fuse_passthrough_release(struct fuse_passthrough *passthrough)
+{
+ if (passthrough->filp) {
+ fput(passthrough->filp);
+ passthrough->filp = NULL;
+ }
+ if (passthrough->cred) {
+ put_cred(passthrough->cred);
+ passthrough->cred = NULL;
+ }
+}
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index baed54914..f78fc3a69 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -15,6 +15,8 @@#include <linux/fs.h>#include "overlayfs.h"+#define OVL_IOCB_MASK (IOCB_DSYNC | IOCB_HIPRI | IOCB_NOWAIT | IOCB_SYNC)
+static char ovl_whatisit(struct inode *inode, struct inode *realinode){if (realinode != ovl_inode_upper(inode))
@@ -223,23 +225,6 @@ static void ovl_file_accessed(struct file *file)touch_atime(&file->f_path);}-static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb)
-{
- int ifl = iocb->ki_flags;
- rwf_t flags = 0;
-
- if (ifl & IOCB_NOWAIT)
- flags |= RWF_NOWAIT;
- if (ifl & IOCB_HIPRI)
- flags |= RWF_HIPRI;
- if (ifl & IOCB_DSYNC)
- flags |= RWF_DSYNC;
- if (ifl & IOCB_SYNC)
- flags |= RWF_SYNC;
-
- return flags;
-}
-static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter){struct file *file = iocb->ki_filp;
@@ -256,7 +241,7 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)old_cred = ovl_override_creds(file_inode(file)->i_sb);ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
- ovl_iocb_to_rwf(iocb));
+ iocb_to_rw_flags(iocb->ki_flags, OVL_IOCB_MASK));revert_creds(old_cred);ovl_file_accessed(file);
@@ -291,7 +276,7 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)old_cred = ovl_override_creds(file_inode(file)->i_sb);file_start_write(real.file);ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
- ovl_iocb_to_rwf(iocb));
+ iocb_to_rw_flags(iocb->ki_flags, OVL_IOCB_MASK)); file_end_write(real.file);revert_creds(old_cred);diff --git a/include/linux/fs.h b/include/linux/fs.h
index b62715452..9303b4e98 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3456,6 +3456,11 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags)return 0;}+static inline rwf_t iocb_to_rw_flags(int ifl, int iocb_mask)
+{
+ return ifl & iocb_mask;
+}
+static inline ino_t parent_ino(struct dentry *dentry){ino_t res;
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 373cada89..53c224746 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -342,6 +342,7 @@ struct fuse_file_lock {#define FUSE_NO_OPENDIR_SUPPORT (1 << 24)#define FUSE_EXPLICIT_INVAL_DATA (1 << 25)#define FUSE_MAP_ALIGNMENT (1 << 26)
+#define FUSE_PASSTHROUGH (1 << 31)/*** CUSE INIT request/reply flags
@@ -591,7 +592,7 @@ struct fuse_create_in {struct fuse_open_out {uint64_t fh;uint32_t open_flags;
- uint32_t padding;
+ uint32_t passthrough_fh;};struct fuse_release_in {
@@ -869,7 +870,11 @@ struct fuse_notify_retrieve_in {};/* Device ioctls: */
-#define FUSE_DEV_IOC_CLONE _IOR(229, 0, uint32_t)
+#define FUSE_DEV_IOC_MAGIC 229
+#define FUSE_DEV_IOC_CLONE _IOR(FUSE_DEV_IOC_MAGIC, 0, uint32_t)
+/* 127 is reserved for the V1 interface implementation in Android (deprecated) */
+/* 126 is reserved for the V2 interface implementation in Android */
+#define FUSE_DEV_IOC_PASSTHROUGH_OPEN _IOW(FUSE_DEV_IOC_MAGIC, 126, __u32)struct fuse_lseek_in {uint64_t fh;
2.2 linux kernel 5.15版本patch (fuse-passthrough-on-kernel5.15.98.patch)
diff --git a/android_debug_symbols.h b/android_debug_symbols.h
new file mode 100644
index 000000000..3750d361c
--- /dev/null
+++ b/android_debug_symbols.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef _ANDROID_DEBUG_SYMBOLS_H
+#define _ANDROID_DEBUG_SYMBOLS_H
+
+enum android_debug_symbol {
+ ADS_SDATA = 0,
+ ADS_BSS_END,
+ ADS_PER_CPU_START,
+ ADS_PER_CPU_END,
+ ADS_START_RO_AFTER_INIT,
+ ADS_END_RO_AFTER_INIT,
+ ADS_LINUX_BANNER,
+#ifdef CONFIG_CMA
+ ADS_TOTAL_CMA,
+#endif
+ ADS_SLAB_CACHES,
+ ADS_SLAB_MUTEX,
+ ADS_MIN_LOW_PFN,
+ ADS_MAX_PFN,
+ ADS_VMALLOC_NR_PAGES,
+ ADS_PCPU_NR_PAGES,
+#ifdef CONFIG_PAGE_OWNER
+ ADS_PAGE_OWNER_ENABLED,
+#endif
+#ifdef CONFIG_SLUB_DEBUG
+ ADS_SLUB_DEBUG,
+#endif
+#ifdef CONFIG_SWAP
+ ADS_NR_SWAP_PAGES,
+#endif
+#ifdef CONFIG_MMU
+ ADS_MMAP_MIN_ADDR,
+#endif
+ ADS_STACK_GUARD_GAP,
+#ifdef CONFIG_SYSCTL
+ ADS_SYSCTL_LEGACY_VA_LAYOUT,
+#endif
+ ADS_SHOW_MEM,
+ ADS_END
+};
+
+enum android_debug_per_cpu_symbol {
+ ADS_IRQ_STACK_PTR = 0,
+ ADS_DEBUG_PER_CPU_END
+};
+
+#ifdef CONFIG_ANDROID_DEBUG_SYMBOLS
+
+void *android_debug_symbol(enum android_debug_symbol symbol);
+void *android_debug_per_cpu_symbol(enum android_debug_per_cpu_symbol symbol);
+
+void android_debug_for_each_module(int (*fn)(const char *mod_name, void *mod_addr, void *data),
+ void *data);
+
+#else /* !CONFIG_ANDROID_DEBUG_SYMBOLS */
+
+static inline void *android_debug_symbol(enum android_debug_symbol symbol)
+{
+ return NULL;
+}
+static inline void *android_debug_per_cpu_symbol(enum android_debug_per_cpu_symbol symbol)
+{
+ return NULL;
+}
+
+static inline void android_debug_for_each_module(int (*fn)(const char *mod_name, void *mod_addr,
+ void *data), void *data) {}
+#endif /* CONFIG_ANDROID_DEBUG_SYMBOLS */
+
+#endif /* _ANDROID_DEBUG_SYMBOLS_H */
diff --git a/android_vendor.h b/android_vendor.h
new file mode 100644
index 000000000..af3014ccc
--- /dev/null
+++ b/android_vendor.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * android_vendor.h - Android vendor data
+ *
+ * Copyright 2020 Google LLC
+ *
+ * These macros are to be used to reserve space in kernel data structures
+ * for use by vendor modules.
+ *
+ * These macros should be used before the kernel abi is "frozen".
+ * Fields can be added to various kernel structures that need space
+ * for functionality implemented in vendor modules. The use of
+ * these fields is vendor specific.
+ */
+#ifndef _ANDROID_VENDOR_H
+#define _ANDROID_VENDOR_H
+
+/*
+ * ANDROID_VENDOR_DATA
+ * Reserve some "padding" in a structure for potential future use.
+ * This normally placed at the end of a structure.
+ * number: the "number" of the padding variable in the structure. Start with
+ * 1 and go up.
+ *
+ * ANDROID_VENDOR_DATA_ARRAY
+ * Same as ANDROID_VENDOR_DATA but allocates an array of u64 with
+ * the specified size
+ */
+#ifdef CONFIG_ANDROID_VENDOR_OEM_DATA
+#define ANDROID_VENDOR_DATA(n) u64 android_vendor_data##n
+#define ANDROID_VENDOR_DATA_ARRAY(n, s) u64 android_vendor_data##n[s]
+
+#define ANDROID_OEM_DATA(n) u64 android_oem_data##n
+#define ANDROID_OEM_DATA_ARRAY(n, s) u64 android_oem_data##n[s]
+
+#define android_init_vendor_data(p, n) \
+ memset(&p->android_vendor_data##n, 0, sizeof(p->android_vendor_data##n))
+#define android_init_oem_data(p, n) \
+ memset(&p->android_oem_data##n, 0, sizeof(p->android_oem_data##n))
+#else
+#define ANDROID_VENDOR_DATA(n)
+#define ANDROID_VENDOR_DATA_ARRAY(n, s)
+#define ANDROID_OEM_DATA(n)
+#define ANDROID_OEM_DATA_ARRAY(n, s)
+
+#define android_init_vendor_data(p, n)
+#define android_init_oem_data(p, n)
+#endif
+
+#endif /* _ANDROID_VENDOR_H */
diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
index 0c48b35c0..d9e1b4738 100644
--- a/fs/fuse/Makefile
+++ b/fs/fuse/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_CUSE) += cuse.oobj-$(CONFIG_VIRTIO_FS) += virtiofs.ofuse-y := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o ioctl.o
+fuse-y += passthrough.ofuse-$(CONFIG_FUSE_DAX) += dax.ovirtiofs-y := virtio_fs.o
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index d6b5339c5..03ec85982 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -2288,6 +2288,15 @@ static long fuse_dev_ioctl(struct file *file, unsigned int cmd,}}break;
+ case FUSE_DEV_IOC_PASSTHROUGH_OPEN:
+ res = -EFAULT;
+ if (!get_user(oldfd, (__u32 __user *)arg)) {
+ res = -EINVAL;
+ fud = fuse_get_dev(file);
+ if (fud)
+ res = fuse_passthrough_open(fud, oldfd);
+ }
+ break;default:res = -ENOTTY;break;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 80a2181b4..51833238c 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -468,6 +468,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,{int err;struct inode *inode;
+ struct fuse_conn *fc = get_fuse_conn(dir);struct fuse_mount *fm = get_fuse_mount(dir);FUSE_ARGS(args);struct fuse_forget_link *forget;
@@ -529,6 +530,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,ff->fh = outopen.fh;ff->nodeid = outentry.nodeid;ff->open_flags = outopen.open_flags;
+ fuse_passthrough_setup(fc, ff, &outopen);inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,&outentry.attr, entry_attr_timeout(&outentry), 0);if (!inode) {
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index cc95a1c37..43650e0fe 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -146,7 +146,7 @@ struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid,if (!err) {ff->fh = outarg.fh;ff->open_flags = outarg.open_flags;
-
+ fuse_passthrough_setup(fc, ff, &outarg);} else if (err != -ENOSYS) {fuse_file_free(ff);return ERR_PTR(err);
@@ -305,6 +305,8 @@ void fuse_file_release(struct inode *inode, struct fuse_file *ff,struct fuse_release_args *ra = ff->release_args;int opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE;+ fuse_passthrough_release(&ff->passthrough);
+fuse_prepare_release(fi, ff, open_flags, opcode);if (ff->flock) {
@@ -1584,10 +1586,12 @@ static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to)if (FUSE_IS_DAX(inode))return fuse_dax_read_iter(iocb, to);- if (!(ff->open_flags & FOPEN_DIRECT_IO))
- return fuse_cache_read_iter(iocb, to);
- else
- return fuse_direct_read_iter(iocb, to);
+ if (ff->passthrough.filp)
+ return fuse_passthrough_read_iter(iocb, to);
+ else if (!(ff->open_flags & FOPEN_DIRECT_IO))
+ return fuse_cache_read_iter(iocb, to);
+ else
+ return fuse_direct_read_iter(iocb, to);}static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
@@ -1602,10 +1606,12 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)if (FUSE_IS_DAX(inode))return fuse_dax_write_iter(iocb, from);- if (!(ff->open_flags & FOPEN_DIRECT_IO))
- return fuse_cache_write_iter(iocb, from);
- else
- return fuse_direct_write_iter(iocb, from);
+ if (ff->passthrough.filp)
+ return fuse_passthrough_write_iter(iocb, from);
+ else if (!(ff->open_flags & FOPEN_DIRECT_IO))
+ return fuse_cache_write_iter(iocb, from);
+ else
+ return fuse_direct_write_iter(iocb, from);}static void fuse_writepage_free(struct fuse_writepage_args *wpa)
@@ -2406,6 +2412,9 @@ static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)if (FUSE_IS_DAX(file_inode(file)))return fuse_dax_mmap(file, vma);+ if (ff->passthrough.filp)
+ return fuse_passthrough_mmap(file, vma);
+if (ff->open_flags & FOPEN_DIRECT_IO) {/* Can't provide the coherency needed for MAP_SHARED */if (vma->vm_flags & VM_MAYSHARE)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index c3a87586a..804a42e22 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -173,6 +173,17 @@ struct fuse_conn;struct fuse_mount;struct fuse_release_args;+/**
+ * Reference to lower filesystem file for read/write operations handled in
+ * passthrough mode.
+ * This struct also tracks the credentials to be used for handling read/write
+ * operations.
+ */
+struct fuse_passthrough {
+ struct file *filp;
+ struct cred *cred;
+};
+/** FUSE specific file data */struct fuse_file {/** Fuse connection for this file */
@@ -218,6 +229,9 @@ struct fuse_file {} readdir;+ /** Container for data related to the passthrough functionality */
+ struct fuse_passthrough passthrough;
+/** RB node to be linked on fuse_conn->polled_files */struct rb_node polled_node;@@ -763,6 +777,9 @@ struct fuse_conn {/* Auto-mount submounts announced by the server */unsigned int auto_submounts:1;+ /** Passthrough mode for read/write IO */
+ unsigned int passthrough:1;
+/* Propagate syncfs() to server */unsigned int sync_fs:1;@@ -812,6 +829,12 @@ struct fuse_conn {/* New writepages go into this bucket */struct fuse_sync_bucket __rcu *curr_bucket;
+
+ /** IDR for passthrough requests */
+ struct idr passthrough_req;
+
+ /** Protects passthrough_req */
+ spinlock_t passthrough_req_lock;};/*
@@ -1283,4 +1306,13 @@ struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid,void fuse_file_release(struct inode *inode, struct fuse_file *ff,unsigned int open_flags, fl_owner_t id, bool isdir);+/* passthrough.c */
+void fuse_copyattr(struct file *dst_file, struct file *src_file);
+int fuse_passthrough_open(struct fuse_dev *fud, u32 lower_fd);
+int fuse_passthrough_setup(struct fuse_conn *fc, struct fuse_file *ff,
+ struct fuse_open_out *openarg);
+void fuse_passthrough_release(struct fuse_passthrough *passthrough);
+ssize_t fuse_passthrough_read_iter(struct kiocb *iocb, struct iov_iter *to);
+ssize_t fuse_passthrough_write_iter(struct kiocb *iocb, struct iov_iter *from);
+ssize_t fuse_passthrough_mmap(struct file *file, struct vm_area_struct *vma);#endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 50365143f..f54d482f5 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -783,6 +783,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,memset(fc, 0, sizeof(*fc));spin_lock_init(&fc->lock);spin_lock_init(&fc->bg_lock);
+ spin_lock_init(&fc->passthrough_req_lock);init_rwsem(&fc->killsb);refcount_set(&fc->count, 1);atomic_set(&fc->dev_count, 1);
@@ -791,6 +792,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,INIT_LIST_HEAD(&fc->bg_queue);INIT_LIST_HEAD(&fc->entry);INIT_LIST_HEAD(&fc->devices);
+ idr_init(&fc->passthrough_req);atomic_set(&fc->num_waiting, 0);fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND;fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD;
@@ -1150,6 +1152,12 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,fc->handle_killpriv_v2 = 1;fm->sb->s_flags |= SB_NOSEC;}
+ if (arg->flags & FUSE_PASSTHROUGH) {
+ fc->passthrough = 1;
+ /* Prevent further stacking */
+ fm->sb->s_stack_depth =
+ FILESYSTEM_MAX_STACK_DEPTH;
+ }if (arg->flags & FUSE_SETXATTR_EXT)fc->setxattr_ext = 1;} else {
@@ -1195,6 +1203,7 @@ void fuse_send_init(struct fuse_mount *fm)FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL |FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS |FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA |
+ FUSE_PASSTHROUGH |FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT;#ifdef CONFIG_FUSE_DAXif (fm->fc->dax)
@@ -1223,9 +1232,21 @@ void fuse_send_init(struct fuse_mount *fm)}EXPORT_SYMBOL_GPL(fuse_send_init);+ static int free_fuse_passthrough(int id, void *p, void *data)
+ {
+ struct fuse_passthrough *passthrough = (struct fuse_passthrough *)p;
+
+ fuse_passthrough_release(passthrough);
+ kfree(p);
+
+ return 0;
+ }
+void fuse_free_conn(struct fuse_conn *fc){WARN_ON(!list_empty(&fc->devices));
+ idr_for_each(&fc->passthrough_req, free_fuse_passthrough, NULL);
+ idr_destroy(&fc->passthrough_req);kfree_rcu(fc, rcu);}EXPORT_SYMBOL_GPL(fuse_free_conn);
diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c
new file mode 100644
index 000000000..1845e05db
--- /dev/null
+++ b/fs/fuse/passthrough.c
@@ -0,0 +1,294 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "fuse_i.h"
+
+#include <linux/fuse.h>
+#include <linux/idr.h>
+#include <linux/uio.h>
+
+#define PASSTHROUGH_IOCB_MASK \
+ (IOCB_APPEND | IOCB_DSYNC | IOCB_HIPRI | IOCB_NOWAIT | IOCB_SYNC)
+
+struct fuse_aio_req {
+ struct kiocb iocb;
+ struct kiocb *iocb_fuse;
+};
+
+static void fuse_file_accessed(struct file *dst_file, struct file *src_file)
+{
+ struct inode *dst_inode;
+ struct inode *src_inode;
+
+ if (dst_file->f_flags & O_NOATIME)
+ return;
+
+ dst_inode = file_inode(dst_file);
+ src_inode = file_inode(src_file);
+
+ if ((!timespec64_equal(&dst_inode->i_mtime, &src_inode->i_mtime) ||
+ !timespec64_equal(&dst_inode->i_ctime, &src_inode->i_ctime))) {
+ dst_inode->i_mtime = src_inode->i_mtime;
+ dst_inode->i_ctime = src_inode->i_ctime;
+ }
+
+ touch_atime(&dst_file->f_path);
+}
+
+void fuse_copyattr(struct file *dst_file, struct file *src_file)
+{
+ struct inode *dst = file_inode(dst_file);
+ struct inode *src = file_inode(src_file);
+
+ dst->i_atime = src->i_atime;
+ dst->i_mtime = src->i_mtime;
+ dst->i_ctime = src->i_ctime;
+ i_size_write(dst, i_size_read(src));
+}
+
+static void fuse_aio_cleanup_handler(struct fuse_aio_req *aio_req)
+{
+ struct kiocb *iocb = &aio_req->iocb;
+ struct kiocb *iocb_fuse = aio_req->iocb_fuse;
+
+ if (iocb->ki_flags & IOCB_WRITE) {
+ __sb_writers_acquired(file_inode(iocb->ki_filp)->i_sb,
+ SB_FREEZE_WRITE);
+ file_end_write(iocb->ki_filp);
+ fuse_copyattr(iocb_fuse->ki_filp, iocb->ki_filp);
+ }
+
+ iocb_fuse->ki_pos = iocb->ki_pos;
+ kfree(aio_req);
+}
+
+static void fuse_aio_rw_complete(struct kiocb *iocb, long res, long res2)
+{
+ struct fuse_aio_req *aio_req =
+ container_of(iocb, struct fuse_aio_req, iocb);
+ struct kiocb *iocb_fuse = aio_req->iocb_fuse;
+
+ fuse_aio_cleanup_handler(aio_req);
+ iocb_fuse->ki_complete(iocb_fuse, res, res2);
+}
+
+ssize_t fuse_passthrough_read_iter(struct kiocb *iocb_fuse,
+ struct iov_iter *iter)
+{
+ ssize_t ret;
+ const struct cred *old_cred;
+ struct file *fuse_filp = iocb_fuse->ki_filp;
+ struct fuse_file *ff = fuse_filp->private_data;
+ struct file *passthrough_filp = ff->passthrough.filp;
+
+ if (!iov_iter_count(iter))
+ return 0;
+
+ old_cred = override_creds(ff->passthrough.cred);
+ if (is_sync_kiocb(iocb_fuse)) {
+ ret = vfs_iter_read(passthrough_filp, iter, &iocb_fuse->ki_pos,
+ iocb_to_rw_flags(iocb_fuse->ki_flags,
+ PASSTHROUGH_IOCB_MASK));
+ } else {
+ struct fuse_aio_req *aio_req;
+
+ aio_req = kmalloc(sizeof(struct fuse_aio_req), GFP_KERNEL);
+ if (!aio_req) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ aio_req->iocb_fuse = iocb_fuse;
+ kiocb_clone(&aio_req->iocb, iocb_fuse, passthrough_filp);
+ aio_req->iocb.ki_complete = fuse_aio_rw_complete;
+ ret = call_read_iter(passthrough_filp, &aio_req->iocb, iter);
+ if (ret != -EIOCBQUEUED)
+ fuse_aio_cleanup_handler(aio_req);
+ }
+out:
+ revert_creds(old_cred);
+
+ fuse_file_accessed(fuse_filp, passthrough_filp);
+
+ return ret;
+}
+
+ssize_t fuse_passthrough_write_iter(struct kiocb *iocb_fuse,
+ struct iov_iter *iter)
+{
+ ssize_t ret;
+ const struct cred *old_cred;
+ struct file *fuse_filp = iocb_fuse->ki_filp;
+ struct fuse_file *ff = fuse_filp->private_data;
+ struct inode *fuse_inode = file_inode(fuse_filp);
+ struct file *passthrough_filp = ff->passthrough.filp;
+ struct inode *passthrough_inode = file_inode(passthrough_filp);
+
+ if (!iov_iter_count(iter))
+ return 0;
+
+ inode_lock(fuse_inode);
+
+ fuse_copyattr(fuse_filp, passthrough_filp);
+
+ old_cred = override_creds(ff->passthrough.cred);
+ if (is_sync_kiocb(iocb_fuse)) {
+ file_start_write(passthrough_filp);
+ ret = vfs_iter_write(passthrough_filp, iter, &iocb_fuse->ki_pos,
+ iocb_to_rw_flags(iocb_fuse->ki_flags,
+ PASSTHROUGH_IOCB_MASK));
+ file_end_write(passthrough_filp);
+ if (ret > 0)
+ fuse_copyattr(fuse_filp, passthrough_filp);
+ } else {
+ struct fuse_aio_req *aio_req;
+
+ aio_req = kmalloc(sizeof(struct fuse_aio_req), GFP_KERNEL);
+ if (!aio_req) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ file_start_write(passthrough_filp);
+ __sb_writers_release(passthrough_inode->i_sb, SB_FREEZE_WRITE);
+
+ aio_req->iocb_fuse = iocb_fuse;
+ kiocb_clone(&aio_req->iocb, iocb_fuse, passthrough_filp);
+ aio_req->iocb.ki_complete = fuse_aio_rw_complete;
+ ret = call_write_iter(passthrough_filp, &aio_req->iocb, iter);
+ if (ret != -EIOCBQUEUED)
+ fuse_aio_cleanup_handler(aio_req);
+ }
+out:
+ revert_creds(old_cred);
+ inode_unlock(fuse_inode);
+
+ return ret;
+}
+
+ssize_t fuse_passthrough_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ int ret;
+ const struct cred *old_cred;
+ struct fuse_file *ff = file->private_data;
+ struct file *passthrough_filp = ff->passthrough.filp;
+
+ if (!passthrough_filp->f_op->mmap)
+ return -ENODEV;
+
+ if (WARN_ON(file != vma->vm_file))
+ return -EIO;
+
+ vma->vm_file = get_file(passthrough_filp);
+
+ old_cred = override_creds(ff->passthrough.cred);
+ ret = call_mmap(vma->vm_file, vma);
+ revert_creds(old_cred);
+
+ if (ret)
+ fput(passthrough_filp);
+ else
+ fput(file);
+
+ fuse_file_accessed(file, passthrough_filp);
+
+ return ret;
+}
+
+int fuse_passthrough_open(struct fuse_dev *fud, u32 lower_fd)
+{
+ int res;
+ struct file *passthrough_filp;
+ struct fuse_conn *fc = fud->fc;
+ struct inode *passthrough_inode;
+ struct super_block *passthrough_sb;
+ struct fuse_passthrough *passthrough;
+
+ if (!fc->passthrough)
+ return -EPERM;
+
+ passthrough_filp = fget(lower_fd);
+ if (!passthrough_filp) {
+ pr_err("FUSE: invalid file descriptor for passthrough.\n");
+ return -EBADF;
+ }
+
+ if (!passthrough_filp->f_op->read_iter ||
+ !((passthrough_filp->f_path.mnt->mnt_flags | MNT_READONLY) ||
+ passthrough_filp->f_op->write_iter)) {
+ pr_err("FUSE: passthrough file misses file operations.\n");
+ res = -EBADF;
+ goto err_free_file;
+ }
+
+ passthrough_inode = file_inode(passthrough_filp);
+ passthrough_sb = passthrough_inode->i_sb;
+ if (passthrough_sb->s_stack_depth >= FILESYSTEM_MAX_STACK_DEPTH) {
+ pr_err("FUSE: fs stacking depth exceeded for passthrough\n");
+ res = -EINVAL;
+ goto err_free_file;
+ }
+
+ passthrough = kmalloc(sizeof(struct fuse_passthrough), GFP_KERNEL);
+ if (!passthrough) {
+ res = -ENOMEM;
+ goto err_free_file;
+ }
+
+ passthrough->filp = passthrough_filp;
+ passthrough->cred = prepare_creds();
+
+ idr_preload(GFP_KERNEL);
+ spin_lock(&fc->passthrough_req_lock);
+ res = idr_alloc(&fc->passthrough_req, passthrough, 1, 0, GFP_ATOMIC);
+ spin_unlock(&fc->passthrough_req_lock);
+ idr_preload_end();
+
+ if (res > 0)
+ return res;
+
+ fuse_passthrough_release(passthrough);
+ kfree(passthrough);
+
+err_free_file:
+ fput(passthrough_filp);
+
+ return res;
+}
+
+int fuse_passthrough_setup(struct fuse_conn *fc, struct fuse_file *ff,
+ struct fuse_open_out *openarg)
+{
+ struct fuse_passthrough *passthrough;
+ int passthrough_fh = openarg->passthrough_fh;
+
+ if (!fc->passthrough)
+ return -EPERM;
+
+ /* Default case, passthrough is not requested */
+ if (passthrough_fh <= 0)
+ return -EINVAL;
+
+ spin_lock(&fc->passthrough_req_lock);
+ passthrough = idr_remove(&fc->passthrough_req, passthrough_fh);
+ spin_unlock(&fc->passthrough_req_lock);
+
+ if (!passthrough)
+ return -EINVAL;
+
+ ff->passthrough = *passthrough;
+ kfree(passthrough);
+
+ return 0;
+}
+
+void fuse_passthrough_release(struct fuse_passthrough *passthrough)
+{
+ if (passthrough->filp) {
+ fput(passthrough->filp);
+ passthrough->filp = NULL;
+ }
+ if (passthrough->cred) {
+ put_cred(passthrough->cred);
+ passthrough->cred = NULL;
+ }
+}
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 28cb05ef0..83f9e9734 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -15,6 +15,8 @@#include <linux/fs.h>#include "overlayfs.h"+#define OVL_IOCB_MASK (IOCB_DSYNC | IOCB_HIPRI | IOCB_NOWAIT | IOCB_SYNC)
+struct ovl_aio_req {struct kiocb iocb;refcount_t ref;
@@ -237,22 +239,6 @@ static void ovl_file_accessed(struct file *file)touch_atime(&file->f_path);}-static rwf_t ovl_iocb_to_rwf(int ifl)
-{
- rwf_t flags = 0;
-
- if (ifl & IOCB_NOWAIT)
- flags |= RWF_NOWAIT;
- if (ifl & IOCB_HIPRI)
- flags |= RWF_HIPRI;
- if (ifl & IOCB_DSYNC)
- flags |= RWF_DSYNC;
- if (ifl & IOCB_SYNC)
- flags |= RWF_SYNC;
-
- return flags;
-}
-static inline void ovl_aio_put(struct ovl_aio_req *aio_req){if (refcount_dec_and_test(&aio_req->ref)) {
@@ -313,7 +299,8 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)old_cred = ovl_override_creds(file_inode(file)->i_sb);if (is_sync_kiocb(iocb)) {ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
- ovl_iocb_to_rwf(iocb->ki_flags));
+ iocb_to_rw_flags(iocb->ki_flags,
+ OVL_IOCB_MASK));} else {struct ovl_aio_req *aio_req;@@ -378,7 +365,7 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)if (is_sync_kiocb(iocb)) {file_start_write(real.file);ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
- ovl_iocb_to_rwf(ifl));
+ iocb_to_rw_flags(ifl, OVL_IOCB_MASK));file_end_write(real.file);/* Update size */ovl_copyattr(inode);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1e1ac116d..f9a42be61 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3462,6 +3462,11 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags)return 0;}+static inline rwf_t iocb_to_rw_flags(int ifl, int iocb_mask)
+{
+ return ifl & iocb_mask;
+}
+static inline ino_t parent_ino(struct dentry *dentry){ino_t res;
diff --git a/include/uapi/linux/android_fuse.h b/include/uapi/linux/android_fuse.h
new file mode 100644
index 000000000..58f3d1719
--- /dev/null
+++ b/include/uapi/linux/android_fuse.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause WITH Linux-syscall-note */
+/* Copyright (c) 2022 Google LLC */
+
+#ifndef _LINUX_ANDROID_FUSE_H
+#define _LINUX_ANDROID_FUSE_H
+
+#ifdef __KERNEL__
+#include <linux/types.h>
+#else
+#include <stdint.h>
+#endif
+
+#define FUSE_ACTION_KEEP 0
+#define FUSE_ACTION_REMOVE 1
+#define FUSE_ACTION_REPLACE 2
+
+struct fuse_entry_bpf_out {
+ uint64_t backing_action;
+ uint64_t backing_fd;
+ uint64_t bpf_action;
+ uint64_t bpf_fd;
+};
+
+struct fuse_entry_bpf {
+ struct fuse_entry_bpf_out out;
+ struct file *backing_file;
+ struct file *bpf_file;
+};
+
+struct fuse_read_out {
+ uint64_t offset;
+ uint32_t again;
+ uint32_t padding;
+};
+
+struct fuse_in_postfilter_header {
+ uint32_t len;
+ uint32_t opcode;
+ uint64_t unique;
+ uint64_t nodeid;
+ uint32_t uid;
+ uint32_t gid;
+ uint32_t pid;
+ uint32_t error_in;
+};
+
+/*
+ * Fuse BPF Args
+ *
+ * Used to communicate with bpf programs to allow checking or altering certain values.
+ * The end_offset allows the bpf verifier to check boundaries statically. This reflects
+ * the ends of the buffer. size shows the length that was actually used.
+ *
+ */
+
+/** One input argument of a request */
+struct fuse_bpf_in_arg {
+ uint32_t size;
+ uint32_t padding;
+ union {
+ const void *value;
+ uint64_t padding2;
+ };
+ union {
+ const void *end_offset;
+ uint64_t padding3;
+ };
+};
+
+/** One output argument of a request */
+struct fuse_bpf_arg {
+ uint32_t size;
+ uint32_t padding;
+ union {
+ void *value;
+ uint64_t padding2;
+ };
+ union {
+ void *end_offset;
+ uint64_t padding3;
+ };
+};
+
+#define FUSE_MAX_IN_ARGS 5
+#define FUSE_MAX_OUT_ARGS 3
+
+#define FUSE_BPF_FORCE (1 << 0)
+#define FUSE_BPF_OUT_ARGVAR (1 << 6)
+
+struct fuse_bpf_args {
+ uint64_t nodeid;
+ uint32_t opcode;
+ uint32_t error_in;
+ uint32_t in_numargs;
+ uint32_t out_numargs;
+ uint32_t flags;
+ uint32_t padding;
+ struct fuse_bpf_in_arg in_args[FUSE_MAX_IN_ARGS];
+ struct fuse_bpf_arg out_args[FUSE_MAX_OUT_ARGS];
+};
+
+#define FUSE_BPF_USER_FILTER 1
+#define FUSE_BPF_BACKING 2
+#define FUSE_BPF_POST_FILTER 4
+
+#define FUSE_OPCODE_FILTER 0x0ffff
+#define FUSE_PREFILTER 0x10000
+#define FUSE_POSTFILTER 0x20000
+
+struct bpf_prog *fuse_get_bpf_prog(struct file *file);
+
+#endif /* _LINUX_ANDROID_FUSE_H */
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 36ed09222..ea30c9ca8 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -367,6 +367,7 @@ struct fuse_file_lock {#define FUSE_SUBMOUNTS (1 << 27)#define FUSE_HANDLE_KILLPRIV_V2 (1 << 28)#define FUSE_SETXATTR_EXT (1 << 29)
+#define FUSE_PASSTHROUGH (1 << 31)/*** CUSE INIT request/reply flags
@@ -639,7 +640,7 @@ struct fuse_create_in {struct fuse_open_out {uint64_t fh;uint32_t open_flags;
- uint32_t padding;
+ uint32_t passthrough_fh;};struct fuse_release_in {
@@ -923,6 +924,7 @@ struct fuse_notify_retrieve_in {/* Device ioctls: */#define FUSE_DEV_IOC_MAGIC 229#define FUSE_DEV_IOC_CLONE _IOR(FUSE_DEV_IOC_MAGIC, 0, uint32_t)
+#define FUSE_DEV_IOC_PASSTHROUGH_OPEN _IOW(FUSE_DEV_IOC_MAGIC, 126, __u32)struct fuse_lseek_in {uint64_t fh;
3 Linux上测试 FUSE passthrough
3.1 安装必要软件包
首先确保安装了 FUSE 开发包和工具:
# Ubuntu/Debian
sudo apt-get install fuse3 libfuse3-dev pkg-config
3.2 安装 passthrough 示例
FUSE 项目提供了一个 passthrough 示例实现。你可以从源码编译安装:
git clone https://github.com/libfuse/libfuse
cd libfuse/example
mkdir build
cd build
meson ../..
ninja
3.3 运行 passthrough 示例
# 创建一个挂载点
mkdir ~/passthrough-fs# 运行 passthrough 示例
sudo ./passthrough ~/passthrough-fs -o allow_other
这会将你的根文件系统通过 FUSE 挂载到 ~/passthrough-fs
目录。
3.4 测试基本功能
# 列出文件
ls -l ~/passthrough-fs# 创建测试文件
echo "FUSE passthrough test" > ~/passthrough-fs/testfile.txt# 读取文件
cat ~/passthrough-fs/testfile.txt# 检查文件是否也出现在原始位置
cat /testfile.txt
3.5 性能测试
你可以使用简单的工具测试性能:
# 写入测试
dd if=/dev/zero of=~/passthrough-fs/testfile bs=1M count=100# 读取测试
dd if=~/passthrough-fs/testfile of=/dev/null bs=1M
3.6 高级选项
passthrough 支持多种选项:
# 以调试模式运行
./passthrough ~/passthrough-fs -d# 指定源目录(而不是整个文件系统)
./passthrough ~/passthrough-fs -o source=/path/to/source/dir# 启用属性缓存(提高性能)
./passthrough ~/passthrough-fs -o attr_timeout=10
3.7 卸载文件系统
fusermount3 -u ~/passthrough-fs
3.8 注意事项
- 使用
-o allow_other
选项可能需要修改/etc/fuse.conf
文件 - 性能会比原生文件系统稍低
- 某些特殊文件操作可能不完全支持
通过以上步骤,你可以测试 FUSE passthrough 的基本功能和性能表现。