diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-06-18 18:50:52 +0300 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-06-18 18:50:52 +0300 |
| commit | 6edc20078ad0b05ab2dc2693965d373628d65f80 (patch) | |
| tree | bfd06b37895a6b39298bbadc55e2fff8e586eedd | |
| parent | 9e7e6633458362db72427b48effad8d759131c35 (diff) | |
| parent | 7d87a5a284bb34edb3f4e7e312ef403b3385a7b7 (diff) | |
| download | linux-6edc20078ad0b05ab2dc2693965d373628d65f80.tar.xz | |
Merge tag 'fuse-update-7.2' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse
Pull fuse updates from Miklos Szeredi:
- Fix lots of bugs, most from the late 6.x era, but some going back
to 2.6.x
- Add subsystems (io-uring, passthrough) and respective maintainers
(Bernd, Joanne and Amir)
- Separate transport and fs layers (Miklos)
- Don't block on cat /dev/fuse (Joanne)
- Perform some refactoring in fuse-uring (Joanne)
- Don't use bounce-buffer for READDIR reply in virtio-fs (Matthew Ochs)
- Clean up documentation (Randy)
- Improve tracing (Amir)
- Extend page cache invalidation after DIO (Cheng Ding)
- Invalidate readdir cache on epoch change (Jun Wu)
- Misc cleanups
* tag 'fuse-update-7.2' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse: (81 commits)
fuse-uring: clear ent->fuse_req in commit_fetch error path
fuse-uring: use named constants for io-uring iovec indices
fuse-uring: refactor setting up copy state for payload copying
fuse-uring: use enum types for header copying
fuse-uring: refactor io-uring header copying from ring
fuse-uring: refactor io-uring header copying to ring
fuse-uring: separate next request fetching from sending logic
fuse: invalidate readdir cache on epoch bump
virtio-fs: avoid double-free on failed queue setup
fuse: invalidate page cache after DIO and async DIO writes
fuse: set ff->flock only on success
fuse: clean up interrupt reading
fuse: remove stray newline in fuse_dev_do_read()
fuse: use READ_ONCE in fuse_chan_num_background()
fuse: dax: Move long delayed work on system_dfl_long_wq
fuse: add fuse_request_sent tracepoint
fuse: Add SPDX ID lines to some files
fuse: use QSTR() instead of QSTR_INIT() in fuse_get_dentry
fuse: convert page array allocation to kcalloc()
fuse: use current creds for backing files
...
| -rw-r--r-- | MAINTAINERS | 26 | ||||
| -rw-r--r-- | fs/fuse/Makefile | 3 | ||||
| -rw-r--r-- | fs/fuse/acl.c | 4 | ||||
| -rw-r--r-- | fs/fuse/args.h | 65 | ||||
| -rw-r--r-- | fs/fuse/backing.c | 3 | ||||
| -rw-r--r-- | fs/fuse/control.c | 20 | ||||
| -rw-r--r-- | fs/fuse/cuse.c | 32 | ||||
| -rw-r--r-- | fs/fuse/dax.c | 2 | ||||
| -rw-r--r-- | fs/fuse/dev.c | 1372 | ||||
| -rw-r--r-- | fs/fuse/dev.h | 110 | ||||
| -rw-r--r-- | fs/fuse/dev_uring.c | 491 | ||||
| -rw-r--r-- | fs/fuse/dev_uring_i.h | 47 | ||||
| -rw-r--r-- | fs/fuse/dir.c | 52 | ||||
| -rw-r--r-- | fs/fuse/file.c | 220 | ||||
| -rw-r--r-- | fs/fuse/fuse_dev_i.h | 351 | ||||
| -rw-r--r-- | fs/fuse/fuse_i.h | 872 | ||||
| -rw-r--r-- | fs/fuse/fuse_trace.h | 26 | ||||
| -rw-r--r-- | fs/fuse/inode.c | 270 | ||||
| -rw-r--r-- | fs/fuse/notify.c | 444 | ||||
| -rw-r--r-- | fs/fuse/poll.c | 141 | ||||
| -rw-r--r-- | fs/fuse/readdir.c | 94 | ||||
| -rw-r--r-- | fs/fuse/req.c | 99 | ||||
| -rw-r--r-- | fs/fuse/req_timeout.c | 148 | ||||
| -rw-r--r-- | fs/fuse/sysctl.c | 1 | ||||
| -rw-r--r-- | fs/fuse/sysctl.h | 9 | ||||
| -rw-r--r-- | fs/fuse/virtio_fs.c | 23 | ||||
| -rw-r--r-- | fs/fuse/xattr.c | 4 | ||||
| -rw-r--r-- | fs/internal.h | 1 | ||||
| -rw-r--r-- | include/linux/fs/super.h | 2 |
29 files changed, 2786 insertions, 2146 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 2d7e28b98f3f..bbf2c7715e29 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10616,10 +10616,10 @@ L: netdev@vger.kernel.org S: Maintained F: drivers/net/ethernet/fungible/ -FUSE: FILESYSTEM IN USERSPACE +FUSE FILESYSTEM [CORE] M: Miklos Szeredi <miklos@szeredi.hu> -L: linux-fsdevel@vger.kernel.org -S: Maintained +L: fuse-devel@lists.linux.dev +S: Supported W: https://github.com/libfuse/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse.git F: Documentation/filesystems/fuse/* @@ -10627,6 +10627,26 @@ F: fs/fuse/ F: include/uapi/linux/fuse.h F: tools/testing/selftests/filesystems/fuse/ +FUSE FILESYSTEM [IO-URING] +M: Bernd Schubert <bernd@bsbernd.com> +M: Joanne Koong <joannelkoong@gmail.com> +L: fuse-devel@lists.linux.dev +S: Maintained +T: git git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse.git +F: Documentation/filesystems/fuse/fuse-io-uring.rst +F: fs/fuse/dev_uring.c +F: fs/fuse/dev_uring_i.h + +FUSE FILESYSTEM [PASSTHROUGH] +M: Amir Goldstein <amir73il@gmail.com> +L: fuse-devel@lists.linux.dev +S: Maintained +T: git git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse.git +F: Documentation/filesystems/fuse/fuse-passthrough.rst +F: fs/fuse/backing.c +F: fs/fuse/iomode.c +F: fs/fuse/passthrough.c + FUTEX SUBSYSTEM M: Thomas Gleixner <tglx@kernel.org> M: Ingo Molnar <mingo@redhat.com> diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile index 22ad9538dfc4..245e67852b03 100644 --- a/fs/fuse/Makefile +++ b/fs/fuse/Makefile @@ -11,7 +11,8 @@ obj-$(CONFIG_CUSE) += cuse.o obj-$(CONFIG_VIRTIO_FS) += virtiofs.o fuse-y := trace.o # put trace.o first so we see ftrace errors sooner -fuse-y += dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o ioctl.o +fuse-y += dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o ioctl.o req_timeout.o req.o +fuse-y += poll.o notify.o fuse-y += iomode.o fuse-$(CONFIG_FUSE_DAX) += dax.o fuse-$(CONFIG_FUSE_PASSTHROUGH) += passthrough.o backing.o diff --git a/fs/fuse/acl.c b/fs/fuse/acl.c index cbde6ac1add3..31fb50e16aed 100644 --- a/fs/fuse/acl.c +++ b/fs/fuse/acl.c @@ -1,9 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 /* * FUSE: Filesystem in Userspace * Copyright (C) 2016 Canonical Ltd. <seth.forshee@canonical.com> - * - * This program can be distributed under the terms of the GNU GPL. - * See the file COPYING. */ #include "fuse_i.h" diff --git a/fs/fuse/args.h b/fs/fuse/args.h new file mode 100644 index 000000000000..ecfe51a192af --- /dev/null +++ b/fs/fuse/args.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _FS_FUSE_ARGS_H +#define _FS_FUSE_ARGS_H + +#include <linux/types.h> + +struct fuse_mount; + +/** One input argument of a request */ +struct fuse_in_arg { + unsigned size; + const void *value; +}; + +/** One output argument of a request */ +struct fuse_arg { + unsigned size; + void *value; +}; + +struct fuse_args { + u64 nodeid; + u32 opcode; + u32 uid; + u32 gid; + u32 pid; + u8 in_numargs; + u8 out_numargs; + u8 ext_idx; + bool force:1; + bool noreply:1; + bool nocreds:1; + bool in_pages:1; + bool out_pages:1; + bool user_pages:1; + bool out_argvar:1; + bool page_zeroing:1; + bool page_replace:1; + bool may_block:1; + bool is_ext:1; + bool is_pinned:1; + bool invalidate_vmap:1; + bool abort_on_kill:1; + struct fuse_in_arg in_args[4]; + struct fuse_arg out_args[2]; + void (*end)(struct fuse_args *args, int error); + /* Used for kvec iter backed by vmalloc address */ + void *vmap_base; +}; + +/** FUSE folio descriptor */ +struct fuse_folio_desc { + unsigned int length; + unsigned int offset; +}; + +struct fuse_args_pages { + struct fuse_args args; + struct folio **folios; + struct fuse_folio_desc *descs; + unsigned int num_folios; +}; + +#endif /* _FS_FUSE_ARGS_H */ diff --git a/fs/fuse/backing.c b/fs/fuse/backing.c index d95dfa48483f..472b6afa7dff 100644 --- a/fs/fuse/backing.c +++ b/fs/fuse/backing.c @@ -5,6 +5,7 @@ * Copyright (c) 2023 CTERA Networks. */ +#include "dev.h" #include "fuse_i.h" #include <linux/file.h> @@ -118,7 +119,7 @@ int fuse_backing_open(struct fuse_conn *fc, struct fuse_backing_map *map) goto out_fput; fb->file = file; - fb->cred = prepare_creds(); + fb->cred = get_current_cred(); refcount_set(&fb->count, 1); res = fuse_backing_id_alloc(fc, fb); diff --git a/fs/fuse/control.c b/fs/fuse/control.c index f902a7fb4630..21ffde596d61 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -1,12 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 /* FUSE: Filesystem in Userspace Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu> - - This program can be distributed under the terms of the GNU GPL. - See the file COPYING. */ #include "fuse_i.h" +#include "dev.h" #include <linux/init.h> #include <linux/module.h> @@ -37,9 +36,7 @@ static ssize_t fuse_conn_abort_write(struct file *file, const char __user *buf, { struct fuse_conn *fc = fuse_ctl_file_conn_get(file); if (fc) { - if (fc->abort_err) - fc->aborted = true; - fuse_abort_conn(fc); + fuse_chan_abort(fc->chan, fc->abort_err); fuse_conn_put(fc); } return count; @@ -57,7 +54,7 @@ static ssize_t fuse_conn_waiting_read(struct file *file, char __user *buf, if (!fc) return 0; - value = atomic_read(&fc->num_waiting); + value = fuse_chan_num_waiting(fc->chan); file->private_data = (void *)value; fuse_conn_put(fc); } @@ -111,7 +108,7 @@ static ssize_t fuse_conn_max_background_read(struct file *file, if (!fc) return 0; - val = READ_ONCE(fc->max_background); + val = fuse_chan_max_background(fc->chan); fuse_conn_put(fc); return fuse_conn_limit_read(file, buf, len, ppos, val); @@ -129,12 +126,7 @@ static ssize_t fuse_conn_max_background_write(struct file *file, if (ret > 0) { struct fuse_conn *fc = fuse_ctl_file_conn_get(file); if (fc) { - spin_lock(&fc->bg_lock); - fc->max_background = val; - fc->blocked = fc->num_background >= fc->max_background; - if (!fc->blocked) - wake_up(&fc->blocked_waitq); - spin_unlock(&fc->bg_lock); + fuse_chan_max_background_set(fc->chan, val); fuse_conn_put(fc); } } diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index 174333633471..3c15b5ba16d7 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -51,6 +51,7 @@ #include <linux/uio.h> #include <linux/user_namespace.h> +#include "dev.h" #include "fuse_i.h" #include "fuse_dev_i.h" @@ -306,6 +307,7 @@ struct cuse_init_args { struct cuse_init_out out; struct folio *folio; struct fuse_folio_desc desc; + struct fuse_conn *fc; }; /** @@ -319,11 +321,10 @@ struct cuse_init_args { * required data structures for it. Please read the comment at the * top of this file for high level overview. */ -static void cuse_process_init_reply(struct fuse_mount *fm, - struct fuse_args *args, int error) +static void cuse_process_init_reply(struct fuse_args *args, int error) { - struct fuse_conn *fc = fm->fc; struct cuse_init_args *ia = container_of(args, typeof(*ia), ap.args); + struct fuse_conn *fc = ia->fc; struct fuse_args_pages *ap = &ia->ap; struct cuse_conn *cc = fc_to_cc(fc), *pos; struct cuse_init_out *arg = &ia->out; @@ -391,7 +392,7 @@ static void cuse_process_init_reply(struct fuse_mount *fm, rc = -ENOMEM; cdev = cdev_alloc(); if (!cdev) - goto err_unlock; + goto err_dev; cdev->owner = THIS_MODULE; cdev->ops = &cuse_frontend_fops; @@ -417,13 +418,15 @@ out: err_cdev: cdev_del(cdev); +err_dev: + device_del(dev); err_unlock: mutex_unlock(&cuse_lock); put_device(dev); err_region: unregister_chrdev_region(devt, 1); err: - fuse_abort_conn(fc); + fuse_chan_abort(fc->chan, false); goto out; } @@ -466,6 +469,7 @@ static int cuse_send_init(struct cuse_conn *cc) ap->descs = &ia->desc; ia->folio = folio; ia->desc.length = ap->args.out_args[1].size; + ia->fc = &cc->fc; ap->args.end = cuse_process_init_reply; rc = fuse_simple_background(fm, &ap->args, GFP_KERNEL); @@ -502,8 +506,12 @@ static int cuse_channel_open(struct inode *inode, struct file *file) { struct fuse_dev *fud; struct cuse_conn *cc; + struct fuse_chan *fch __free(fuse_chan_free) = fuse_dev_chan_new(); int rc; + if (!fch) + return -ENOMEM; + /* set up cuse_conn */ cc = kzalloc_obj(*cc); if (!cc) @@ -513,18 +521,16 @@ static int cuse_channel_open(struct inode *inode, struct file *file) * Limit the cuse channel to requests that can * be represented in file->f_cred->user_ns. */ - fuse_conn_init(&cc->fc, &cc->fm, file->f_cred->user_ns, - &fuse_dev_fiq_ops, NULL); - + fuse_conn_init(&cc->fc, &cc->fm, file->f_cred->user_ns, no_free_ptr(fch)); cc->fc.release = cuse_fc_release; - fud = fuse_dev_alloc_install(&cc->fc); + fud = fuse_dev_alloc_install(cc->fc.chan); fuse_conn_put(&cc->fc); if (!fud) return -ENOMEM; INIT_LIST_HEAD(&cc->list); - cc->fc.initialized = 1; + cc->fc.chan->initialized = 1; rc = cuse_send_init(cc); if (rc) { fuse_dev_put(fud); @@ -549,7 +555,7 @@ static int cuse_channel_open(struct inode *inode, struct file *file) static int cuse_channel_release(struct inode *inode, struct file *file) { struct fuse_dev *fud = __fuse_get_dev(file); - struct cuse_conn *cc = fc_to_cc(fud->fc); + struct cuse_conn *cc = fc_to_cc(fud->chan->conn); /* remove from the conntbl, no more access from this point on */ mutex_lock(&cuse_lock); @@ -581,7 +587,7 @@ static ssize_t cuse_class_waiting_show(struct device *dev, { struct cuse_conn *cc = dev_get_drvdata(dev); - return sprintf(buf, "%d\n", atomic_read(&cc->fc.num_waiting)); + return sprintf(buf, "%d\n", atomic_read(&cc->fc.chan->num_waiting)); } static DEVICE_ATTR(waiting, 0400, cuse_class_waiting_show, NULL); @@ -591,7 +597,7 @@ static ssize_t cuse_class_abort_store(struct device *dev, { struct cuse_conn *cc = dev_get_drvdata(dev); - fuse_abort_conn(&cc->fc); + fuse_chan_abort(cc->fc.chan, false); return count; } static DEVICE_ATTR(abort, 0200, NULL, cuse_class_abort_store); diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c index f6cf00a8938c..8b53625ac7ab 100644 --- a/fs/fuse/dax.c +++ b/fs/fuse/dax.c @@ -113,7 +113,7 @@ __kick_dmap_free_worker(struct fuse_conn_dax *fcd, unsigned long delay_ms) free_threshold = max_t(unsigned long, fcd->nr_ranges * FUSE_DAX_RECLAIM_THRESHOLD / 100, 1); if (fcd->nr_free_ranges < free_threshold) - queue_delayed_work(system_long_wq, &fcd->free_work, + queue_delayed_work(system_dfl_long_wq, &fcd->free_work, msecs_to_jiffies(delay_ms)); } diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index c105aaf9ff5d..5763a7cd3b37 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1,14 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 /* FUSE: Filesystem in Userspace Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu> - - This program can be distributed under the terms of the GNU GPL. - See the file COPYING. */ +#include "dev.h" +#include "args.h" #include "dev_uring_i.h" -#include "fuse_i.h" -#include "fuse_dev_i.h" #include <linux/init.h> #include <linux/module.h> @@ -30,124 +28,33 @@ MODULE_ALIAS_MISCDEV(FUSE_MINOR); MODULE_ALIAS("devname:fuse"); -static struct kmem_cache *fuse_req_cachep; - -const unsigned long fuse_timeout_timer_freq = - secs_to_jiffies(FUSE_TIMEOUT_TIMER_FREQ); - -bool fuse_request_expired(struct fuse_conn *fc, struct list_head *list) -{ - struct fuse_req *req; - - req = list_first_entry_or_null(list, struct fuse_req, list); - if (!req) - return false; - return time_is_before_jiffies(req->create_time + fc->timeout.req_timeout); -} - -static bool fuse_fpq_processing_expired(struct fuse_conn *fc, struct list_head *processing) -{ - int i; - - for (i = 0; i < FUSE_PQ_HASH_SIZE; i++) - if (fuse_request_expired(fc, &processing[i])) - return true; - - return false; -} +static DECLARE_WAIT_QUEUE_HEAD(fuse_dev_waitq); -/* - * Check if any requests aren't being completed by the time the request timeout - * elapses. To do so, we: - * - check the fiq pending list - * - check the bg queue - * - check the fpq io and processing lists - * - * To make this fast, we only check against the head request on each list since - * these are generally queued in order of creation time (eg newer requests get - * queued to the tail). We might miss a few edge cases (eg requests transitioning - * between lists, re-sent requests at the head of the pending list having a - * later creation time than other requests on that list, etc.) but that is fine - * since if the request never gets fulfilled, it will eventually be caught. - */ -void fuse_check_timeout(struct work_struct *work) -{ - struct delayed_work *dwork = to_delayed_work(work); - struct fuse_conn *fc = container_of(dwork, struct fuse_conn, - timeout.work); - struct fuse_iqueue *fiq = &fc->iq; - struct fuse_dev *fud; - struct fuse_pqueue *fpq; - bool expired = false; - - if (!atomic_read(&fc->num_waiting)) - goto out; - - spin_lock(&fiq->lock); - expired = fuse_request_expired(fc, &fiq->pending); - spin_unlock(&fiq->lock); - if (expired) - goto abort_conn; - - spin_lock(&fc->bg_lock); - expired = fuse_request_expired(fc, &fc->bg_queue); - spin_unlock(&fc->bg_lock); - if (expired) - goto abort_conn; - - spin_lock(&fc->lock); - if (!fc->connected) { - spin_unlock(&fc->lock); - return; - } - list_for_each_entry(fud, &fc->devices, entry) { - fpq = &fud->pq; - spin_lock(&fpq->lock); - if (fuse_request_expired(fc, &fpq->io) || - fuse_fpq_processing_expired(fc, fpq->processing)) { - spin_unlock(&fpq->lock); - spin_unlock(&fc->lock); - goto abort_conn; - } - - spin_unlock(&fpq->lock); - } - spin_unlock(&fc->lock); - - if (fuse_uring_request_expired(fc)) - goto abort_conn; - -out: - queue_delayed_work(system_percpu_wq, &fc->timeout.work, - fuse_timeout_timer_freq); - return; - -abort_conn: - fuse_abort_conn(fc); -} +static struct kmem_cache *fuse_req_cachep; -static void fuse_request_init(struct fuse_mount *fm, struct fuse_req *req) +static void fuse_request_init(struct fuse_chan *fch, struct fuse_req *req) { INIT_LIST_HEAD(&req->list); INIT_LIST_HEAD(&req->intr_entry); init_waitqueue_head(&req->waitq); refcount_set(&req->count, 1); __set_bit(FR_PENDING, &req->flags); - req->fm = fm; + req->chan = fch; req->create_time = jiffies; } -static struct fuse_req *fuse_request_alloc(struct fuse_mount *fm, gfp_t flags) +static struct fuse_req *fuse_request_alloc(struct fuse_chan *fch, gfp_t flags) { struct fuse_req *req = kmem_cache_zalloc(fuse_req_cachep, flags); if (req) - fuse_request_init(fm, req); + fuse_request_init(fch, req); return req; } static void fuse_request_free(struct fuse_req *req) { + WARN_ON(!list_empty(&req->intr_entry)); kmem_cache_free(fuse_req_cachep, req); } @@ -162,110 +69,86 @@ static void __fuse_put_request(struct fuse_req *req) refcount_dec(&req->count); } -void fuse_set_initialized(struct fuse_conn *fc) +void fuse_chan_set_initialized(struct fuse_chan *fch, struct fuse_chan_param *param) { + if (param) { + fch->minor = param->minor; + fch->max_write = param->max_write; + fch->max_pages = param->max_pages; + } + /* Make sure stores before this are seen on another CPU */ smp_wmb(); - fc->initialized = 1; + fch->initialized = 1; + wake_up_all(&fch->blocked_waitq); } -static bool fuse_block_alloc(struct fuse_conn *fc, bool for_background) +static bool fuse_block_alloc(struct fuse_chan *fch, bool for_background) { - return !fc->initialized || (for_background && fc->blocked) || - (fc->io_uring && fc->connected && !fuse_uring_ready(fc)); + return !fch->initialized || (for_background && fch->blocked) || + (fch->io_uring && fch->connected && !fuse_uring_ready(fch)); } -static void fuse_drop_waiting(struct fuse_conn *fc) +static void fuse_drop_waiting(struct fuse_chan *fch) { /* - * lockess check of fc->connected is okay, because atomic_dec_and_test() - * provides a memory barrier matched with the one in fuse_wait_aborted() + * lockess check of fch->connected is okay, because atomic_dec_and_test() + * provides a memory barrier matched with the one in fuse_chan_wait_aborted() * to ensure no wake-up is missed. */ - if (atomic_dec_and_test(&fc->num_waiting) && - !READ_ONCE(fc->connected)) { + if (atomic_dec_and_test(&fch->num_waiting) && + !READ_ONCE(fch->connected)) { /* wake up aborters */ - wake_up_all(&fc->blocked_waitq); + wake_up_all(&fch->blocked_waitq); } } static void fuse_put_request(struct fuse_req *req); -static struct fuse_req *fuse_get_req(struct mnt_idmap *idmap, - struct fuse_mount *fm, - bool for_background) +static struct fuse_req *fuse_get_req(struct fuse_chan *fch, bool for_background) { - struct fuse_conn *fc = fm->fc; struct fuse_req *req; - bool no_idmap = !fm->sb || (fm->sb->s_iflags & SB_I_NOIDMAP); - kuid_t fsuid; - kgid_t fsgid; int err; - atomic_inc(&fc->num_waiting); + atomic_inc(&fch->num_waiting); - if (fuse_block_alloc(fc, for_background)) { + if (fuse_block_alloc(fch, for_background)) { err = -EINTR; - if (wait_event_state_exclusive(fc->blocked_waitq, - !fuse_block_alloc(fc, for_background), + if (wait_event_state_exclusive(fch->blocked_waitq, + !fuse_block_alloc(fch, for_background), (TASK_KILLABLE | TASK_FREEZABLE))) goto out; } - /* Matches smp_wmb() in fuse_set_initialized() */ + + /* Matches smp_wmb() in fuse_chan_set_initialized() */ smp_rmb(); err = -ENOTCONN; - if (!fc->connected) - goto out; - - err = -ECONNREFUSED; - if (fc->conn_error) + if (!fch->connected) goto out; - req = fuse_request_alloc(fm, GFP_KERNEL); + req = fuse_request_alloc(fch, GFP_KERNEL); err = -ENOMEM; if (!req) { if (for_background) - wake_up(&fc->blocked_waitq); + wake_up(&fch->blocked_waitq); goto out; } - req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns); - __set_bit(FR_WAITING, &req->flags); if (for_background) __set_bit(FR_BACKGROUND, &req->flags); - /* - * Keep the old behavior when idmappings support was not - * declared by a FUSE server. - * - * For those FUSE servers who support idmapped mounts, - * we send UID/GID only along with "inode creation" - * fuse requests, otherwise idmap == &invalid_mnt_idmap and - * req->in.h.{u,g}id will be equal to FUSE_INVALID_UIDGID. - */ - fsuid = no_idmap ? current_fsuid() : mapped_fsuid(idmap, fc->user_ns); - fsgid = no_idmap ? current_fsgid() : mapped_fsgid(idmap, fc->user_ns); - req->in.h.uid = from_kuid(fc->user_ns, fsuid); - req->in.h.gid = from_kgid(fc->user_ns, fsgid); - - if (no_idmap && unlikely(req->in.h.uid == ((uid_t)-1) || - req->in.h.gid == ((gid_t)-1))) { - fuse_put_request(req); - return ERR_PTR(-EOVERFLOW); - } - return req; out: - fuse_drop_waiting(fc); + fuse_drop_waiting(fch); return ERR_PTR(err); } static void fuse_put_request(struct fuse_req *req) { - struct fuse_conn *fc = req->fm->fc; + struct fuse_chan *fch = req->chan; if (refcount_dec_and_test(&req->count)) { if (test_bit(FR_BACKGROUND, &req->flags)) { @@ -273,15 +156,15 @@ static void fuse_put_request(struct fuse_req *req) * We get here in the unlikely case that a background * request was allocated but not sent */ - spin_lock(&fc->bg_lock); - if (!fc->blocked) - wake_up(&fc->blocked_waitq); - spin_unlock(&fc->bg_lock); + spin_lock(&fch->bg_lock); + if (!fch->blocked) + wake_up(&fch->blocked_waitq); + spin_unlock(&fch->bg_lock); } if (test_bit(FR_WAITING, &req->flags)) { __clear_bit(FR_WAITING, &req->flags); - fuse_drop_waiting(fc); + fuse_drop_waiting(fch); } fuse_request_free(req); @@ -335,6 +218,11 @@ __releases(fiq->lock) spin_unlock(&fiq->lock); } +struct fuse_forget_link *fuse_alloc_forget(void) +{ + return kzalloc_obj(struct fuse_forget_link, GFP_KERNEL_ACCOUNT); +} + void fuse_dev_queue_forget(struct fuse_iqueue *fiq, struct fuse_forget_link *forget) { @@ -406,12 +294,271 @@ static void fuse_dev_queue_req(struct fuse_iqueue *fiq, struct fuse_req *req) } } -const struct fuse_iqueue_ops fuse_dev_fiq_ops = { +static const struct fuse_iqueue_ops fuse_dev_fiq_ops = { .send_forget = fuse_dev_queue_forget, .send_interrupt = fuse_dev_queue_interrupt, .send_req = fuse_dev_queue_req, }; -EXPORT_SYMBOL_GPL(fuse_dev_fiq_ops); + +void fuse_iqueue_init(struct fuse_iqueue *fiq, const struct fuse_iqueue_ops *ops, void *priv) +{ + spin_lock_init(&fiq->lock); + init_waitqueue_head(&fiq->waitq); + INIT_LIST_HEAD(&fiq->pending); + INIT_LIST_HEAD(&fiq->interrupts); + fiq->forget_list_tail = &fiq->forget_list_head; + fiq->connected = 1; + fiq->ops = ops; + fiq->priv = priv; +} +EXPORT_SYMBOL_GPL(fuse_iqueue_init); + +void fuse_chan_release(struct fuse_chan *fch) +{ + struct fuse_iqueue *fiq = &fch->iq; + + if (fiq->ops->release) + fiq->ops->release(fiq); + + if (fch->timeout.req_timeout) + cancel_delayed_work_sync(&fch->timeout.work); +} + +void fuse_chan_free(struct fuse_chan *fch) +{ + WARN_ON(!list_empty(&fch->devices)); + kfree(fch->pq_prealloc); + kfree(fch); +} +EXPORT_SYMBOL_GPL(fuse_chan_free); + +struct fuse_chan *fuse_chan_new(void) +{ + struct fuse_chan *fch = kzalloc_obj(struct fuse_chan); + if (!fch) + return NULL; + + spin_lock_init(&fch->lock); + INIT_LIST_HEAD(&fch->devices); + spin_lock_init(&fch->bg_lock); + INIT_LIST_HEAD(&fch->bg_queue); + init_waitqueue_head(&fch->blocked_waitq); + atomic_set(&fch->num_waiting, 0); + fch->max_background = FUSE_DEFAULT_MAX_BACKGROUND; + fch->initialized = 0; + fch->blocked = 0; + fch->connected = 1; + fch->timeout.req_timeout = 0; + + return fch; +} +EXPORT_SYMBOL_GPL(fuse_chan_new); + +struct list_head *fuse_pqueue_alloc(void) +{ + struct list_head *pq = kzalloc_objs(struct list_head, FUSE_PQ_HASH_SIZE); + + if (pq) { + for (int i = 0; i < FUSE_PQ_HASH_SIZE; i++) + INIT_LIST_HEAD(&pq[i]); + } + return pq; +} + +struct fuse_chan *fuse_dev_chan_new(void) +{ + struct fuse_chan *fch __free(kfree) = fuse_chan_new(); + if (!fch) + return NULL; + + fch->pq_prealloc = fuse_pqueue_alloc(); + if (!fch->pq_prealloc) + return NULL; + + fuse_iqueue_init(&fch->iq, &fuse_dev_fiq_ops, NULL); + + return no_free_ptr(fch); +} +EXPORT_SYMBOL_GPL(fuse_dev_chan_new); + +unsigned int fuse_chan_num_background(struct fuse_chan *fch) +{ + return READ_ONCE(fch->num_background); +} + +unsigned int fuse_chan_max_background(struct fuse_chan *fch) +{ + return READ_ONCE(fch->max_background); +} + +void fuse_chan_max_background_set(struct fuse_chan *fch, unsigned int val) +{ + spin_lock(&fch->bg_lock); + fch->max_background = val; + fch->blocked = fch->num_background >= fch->max_background; + if (!fch->blocked) + wake_up(&fch->blocked_waitq); + spin_unlock(&fch->bg_lock); +} + +unsigned int fuse_chan_num_waiting(struct fuse_chan *fch) +{ + return atomic_read(&fch->num_waiting); +} + +void fuse_chan_set_fc(struct fuse_chan *fch, struct fuse_conn *fc) +{ + fch->conn = fc; +} + +void fuse_chan_io_uring_enable(struct fuse_chan *fch) +{ + fch->io_uring = 1; +} + +void fuse_pqueue_init(struct fuse_pqueue *fpq) +{ + spin_lock_init(&fpq->lock); + INIT_LIST_HEAD(&fpq->io); + fpq->connected = 1; + fpq->processing = NULL; +} + +static struct fuse_dev *fuse_dev_alloc_no_pq(void) +{ + struct fuse_dev *fud; + + fud = kzalloc_obj(struct fuse_dev); + if (!fud) + return NULL; + + refcount_set(&fud->ref, 1); + fuse_pqueue_init(&fud->pq); + + return fud; +} + +struct fuse_dev *fuse_dev_alloc(void) +{ + struct fuse_dev *fud __free(kfree) = fuse_dev_alloc_no_pq(); + if (!fud) + return NULL; + + fud->pq.processing = fuse_pqueue_alloc(); + if (!fud->pq.processing) + return NULL; + + return no_free_ptr(fud); +} +EXPORT_SYMBOL_GPL(fuse_dev_alloc); + +/* + * Installs @fch into @fud, return true on success. "Consumes" @pq in either case. + */ +static bool fuse_dev_install_with_pq(struct fuse_dev *fud, struct fuse_chan *fch, + struct list_head *pq) +{ + struct fuse_chan *old_fch; + + guard(spinlock)(&fch->lock); + /* + * Pairs with: + * - xchg() in fuse_dev_release() + * - smp_load_acquire() in fuse_dev_fc_get() + */ + old_fch = cmpxchg(&fud->chan, NULL, fch); + if (old_fch) { + /* + * failed to set fud->chan because + * - it was already set to a different fc + * - it was set to disconneted + */ + kfree(pq); + return false; + } + if (pq) { + WARN_ON(fud->pq.processing); + fud->pq.processing = pq; + } + list_add_tail(&fud->entry, &fch->devices); + fuse_conn_get(fch->conn); + wake_up_all(&fuse_dev_waitq); + return true; +} + +void fuse_dev_install(struct fuse_dev *fud, struct fuse_chan *fch) +{ + struct list_head *pq = fch->pq_prealloc; + + fch->pq_prealloc = NULL; + if (!fuse_dev_install_with_pq(fud, fch, pq)) { + /* Channel is not usable without a dev */ + fuse_chan_abort(fch, false); + } +} +EXPORT_SYMBOL_GPL(fuse_dev_install); + +struct fuse_dev *fuse_dev_alloc_install(struct fuse_chan *fch) +{ + struct fuse_dev *fud; + + fud = fuse_dev_alloc_no_pq(); + if (!fud) + return NULL; + + fuse_dev_install(fud, fch); + return fud; +} +EXPORT_SYMBOL_GPL(fuse_dev_alloc_install); + +void fuse_dev_put(struct fuse_dev *fud) +{ + struct fuse_chan *fch; + + if (!refcount_dec_and_test(&fud->ref)) + return; + + fch = fuse_dev_chan_get(fud); + if (fch && fch != FUSE_DEV_CHAN_DISCONNECTED) { + /* This is the virtiofs case (fuse_dev_release() not called) */ + spin_lock(&fch->lock); + list_del(&fud->entry); + spin_unlock(&fch->lock); + + fuse_conn_put(fch->conn); + } + kfree(fud->pq.processing); + kfree(fud); +} +EXPORT_SYMBOL_GPL(fuse_dev_put); + +bool fuse_dev_is_installed(struct fuse_dev *fud) +{ + struct fuse_chan *fch = fuse_dev_chan_get(fud); + + return fch != NULL && fch != FUSE_DEV_CHAN_DISCONNECTED; +} + +/* + * Checks if @fc matches the one installed in @fud + */ +bool fuse_dev_verify(struct fuse_dev *fud, struct fuse_chan *fch) +{ + return fuse_dev_chan_get(fud) == fch; +} + +bool fuse_dev_is_sync_init(struct fuse_dev *fud) +{ + return fud->sync_init; +} + +struct fuse_dev *fuse_dev_grab(struct file *file) +{ + struct fuse_dev *fud = fuse_file_to_fud(file); + + refcount_inc(&fud->ref); + return fud; +} static void fuse_send_one(struct fuse_iqueue *fiq, struct fuse_req *req) { @@ -421,10 +568,10 @@ static void fuse_send_one(struct fuse_iqueue *fiq, struct fuse_req *req) fiq->ops->send_req(fiq, req); } -void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, - u64 nodeid, u64 nlookup) +void fuse_chan_queue_forget(struct fuse_chan *fch, struct fuse_forget_link *forget, + u64 nodeid, u64 nlookup) { - struct fuse_iqueue *fiq = &fc->iq; + struct fuse_iqueue *fiq = &fch->iq; forget->forget_one.nodeid = nodeid; forget->forget_one.nlookup = nlookup; @@ -432,21 +579,44 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, fiq->ops->send_forget(fiq, forget); } -static void flush_bg_queue(struct fuse_conn *fc) +static void flush_bg_queue(struct fuse_chan *fch) { - struct fuse_iqueue *fiq = &fc->iq; + struct fuse_iqueue *fiq = &fch->iq; - while (fc->active_background < fc->max_background && - !list_empty(&fc->bg_queue)) { + while (fch->active_background < fch->max_background && + !list_empty(&fch->bg_queue)) { struct fuse_req *req; - req = list_first_entry(&fc->bg_queue, struct fuse_req, list); + req = list_first_entry(&fch->bg_queue, struct fuse_req, list); list_del(&req->list); - fc->active_background++; + fch->active_background++; fuse_send_one(fiq, req); } } +void fuse_request_bg_finish(struct fuse_chan *fch, struct fuse_req *req) +{ + lockdep_assert_held(&fch->bg_lock); + + clear_bit(FR_BACKGROUND, &req->flags); + if (fch->num_background == fch->max_background) { + fch->blocked = 0; + wake_up(&fch->blocked_waitq); + } else if (!fch->blocked) { + /* + * Wake up next waiter, if any. It's okay to use + * waitqueue_active(), as we've already synced up + * fch->blocked with waiters with the wake_up() call + * above. + */ + if (waitqueue_active(&fch->blocked_waitq)) + wake_up(&fch->blocked_waitq); + } + + fch->num_background--; + fch->active_background--; +} + /* * This function is called when a request is finished. Either a reply * has arrived or it was aborted (and not yet sent) or some error @@ -457,9 +627,8 @@ static void flush_bg_queue(struct fuse_conn *fc) */ void fuse_request_end(struct fuse_req *req) { - struct fuse_mount *fm = req->fm; - struct fuse_conn *fc = fm->fc; - struct fuse_iqueue *fiq = &fc->iq; + struct fuse_chan *fch = req->chan; + struct fuse_iqueue *fiq = &fch->iq; if (test_and_set_bit(FR_FINISHED, &req->flags)) goto put_request; @@ -478,33 +647,17 @@ void fuse_request_end(struct fuse_req *req) WARN_ON(test_bit(FR_PENDING, &req->flags)); WARN_ON(test_bit(FR_SENT, &req->flags)); if (test_bit(FR_BACKGROUND, &req->flags)) { - spin_lock(&fc->bg_lock); - clear_bit(FR_BACKGROUND, &req->flags); - if (fc->num_background == fc->max_background) { - fc->blocked = 0; - wake_up(&fc->blocked_waitq); - } else if (!fc->blocked) { - /* - * Wake up next waiter, if any. It's okay to use - * waitqueue_active(), as we've already synced up - * fc->blocked with waiters with the wake_up() call - * above. - */ - if (waitqueue_active(&fc->blocked_waitq)) - wake_up(&fc->blocked_waitq); - } - - fc->num_background--; - fc->active_background--; - flush_bg_queue(fc); - spin_unlock(&fc->bg_lock); + spin_lock(&fch->bg_lock); + fuse_request_bg_finish(fch, req); + flush_bg_queue(fch); + spin_unlock(&fch->bg_lock); } else { /* Wake up waiter sleeping in request_wait_answer() */ wake_up(&req->waitq); } if (test_bit(FR_ASYNC, &req->flags)) - req->args->end(fm, req->args, req->out.h.error); + req->args->end(req->args, req->out.h.error); put_request: fuse_put_request(req); } @@ -512,7 +665,7 @@ EXPORT_SYMBOL_GPL(fuse_request_end); static int queue_interrupt(struct fuse_req *req) { - struct fuse_iqueue *fiq = &req->fm->fc->iq; + struct fuse_iqueue *fiq = &req->chan->iq; /* Check for we've sent request to interrupt this req */ if (unlikely(!test_bit(FR_INTERRUPTED, &req->flags))) @@ -543,11 +696,11 @@ bool fuse_remove_pending_req(struct fuse_req *req, spinlock_t *lock) static void request_wait_answer(struct fuse_req *req) { - struct fuse_conn *fc = req->fm->fc; - struct fuse_iqueue *fiq = &fc->iq; + struct fuse_chan *fch = req->chan; + struct fuse_iqueue *fiq = &fch->iq; int err; - if (!fc->no_interrupt) { + if (!fch->no_interrupt) { /* Any signal may interrupt this */ err = wait_event_interruptible(req->waitq, test_bit(FR_FINISHED, &req->flags)); @@ -571,7 +724,7 @@ static void request_wait_answer(struct fuse_req *req) return; if (req->args->abort_on_kill) { - fuse_abort_conn(fc); + fuse_chan_abort(fch, false); return; } @@ -592,7 +745,7 @@ static void request_wait_answer(struct fuse_req *req) static void __fuse_request_send(struct fuse_req *req) { - struct fuse_iqueue *fiq = &req->fm->fc->iq; + struct fuse_iqueue *fiq = &req->chan->iq; BUG_ON(test_bit(FR_BACKGROUND, &req->flags)); @@ -606,12 +759,12 @@ static void __fuse_request_send(struct fuse_req *req) smp_rmb(); } -static void fuse_adjust_compat(struct fuse_conn *fc, struct fuse_args *args) +static void fuse_adjust_compat(struct fuse_chan *fch, struct fuse_args *args) { - if (fc->minor < 4 && args->opcode == FUSE_STATFS) + if (fch->minor < 4 && args->opcode == FUSE_STATFS) args->out_args[0].size = FUSE_COMPAT_STATFS_SIZE; - if (fc->minor < 9) { + if (fch->minor < 9) { switch (args->opcode) { case FUSE_LOOKUP: case FUSE_CREATE: @@ -627,7 +780,7 @@ static void fuse_adjust_compat(struct fuse_conn *fc, struct fuse_args *args) break; } } - if (fc->minor < 12) { + if (fch->minor < 12) { switch (args->opcode) { case FUSE_CREATE: args->in_args[0].size = sizeof(struct fuse_open_in); @@ -639,25 +792,13 @@ static void fuse_adjust_compat(struct fuse_conn *fc, struct fuse_args *args) } } -static void fuse_force_creds(struct fuse_req *req) -{ - struct fuse_conn *fc = req->fm->fc; - - if (!req->fm->sb || req->fm->sb->s_iflags & SB_I_NOIDMAP) { - req->in.h.uid = from_kuid_munged(fc->user_ns, current_fsuid()); - req->in.h.gid = from_kgid_munged(fc->user_ns, current_fsgid()); - } else { - req->in.h.uid = FUSE_INVALID_UIDGID; - req->in.h.gid = FUSE_INVALID_UIDGID; - } - - req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns); -} - static void fuse_args_to_req(struct fuse_req *req, struct fuse_args *args) { req->in.h.opcode = args->opcode; req->in.h.nodeid = args->nodeid; + req->in.h.uid = args->uid; + req->in.h.gid = args->gid; + req->in.h.pid = args->pid; req->args = args; if (args->is_ext) req->in.h.total_extlen = args->in_args[args->ext_idx].size / 8; @@ -665,33 +806,26 @@ static void fuse_args_to_req(struct fuse_req *req, struct fuse_args *args) __set_bit(FR_ASYNC, &req->flags); } -ssize_t __fuse_simple_request(struct mnt_idmap *idmap, - struct fuse_mount *fm, - struct fuse_args *args) +ssize_t fuse_chan_send(struct fuse_chan *fch, struct fuse_args *args) { - struct fuse_conn *fc = fm->fc; struct fuse_req *req; ssize_t ret; if (args->force) { - atomic_inc(&fc->num_waiting); - req = fuse_request_alloc(fm, GFP_KERNEL | __GFP_NOFAIL); - - if (!args->nocreds) - fuse_force_creds(req); + atomic_inc(&fch->num_waiting); + req = fuse_request_alloc(fch, GFP_KERNEL | __GFP_NOFAIL); __set_bit(FR_WAITING, &req->flags); if (!args->abort_on_kill) __set_bit(FR_FORCE, &req->flags); } else { - WARN_ON(args->nocreds); - req = fuse_get_req(idmap, fm, false); + req = fuse_get_req(fch, false); if (IS_ERR(req)) return PTR_ERR(req); } - /* Needs to be done after fuse_get_req() so that fc->minor is valid */ - fuse_adjust_compat(fc, args); + /* Needs to be done after fuse_get_req() so that fch->minor is valid */ + fuse_adjust_compat(fch, args); fuse_args_to_req(req, args); if (!args->noreply) @@ -708,10 +842,9 @@ ssize_t __fuse_simple_request(struct mnt_idmap *idmap, } #ifdef CONFIG_FUSE_IO_URING -static bool fuse_request_queue_background_uring(struct fuse_conn *fc, - struct fuse_req *req) +static bool fuse_request_queue_background_uring(struct fuse_req *req) { - struct fuse_iqueue *fiq = &fc->iq; + struct fuse_iqueue *fiq = &req->chan->iq; req->in.h.len = sizeof(struct fuse_in_header) + fuse_len_args(req->args->in_numargs, @@ -727,50 +860,46 @@ static bool fuse_request_queue_background_uring(struct fuse_conn *fc, */ static int fuse_request_queue_background(struct fuse_req *req) { - struct fuse_mount *fm = req->fm; - struct fuse_conn *fc = fm->fc; + struct fuse_chan *fch = req->chan; bool queued = false; WARN_ON(!test_bit(FR_BACKGROUND, &req->flags)); if (!test_bit(FR_WAITING, &req->flags)) { __set_bit(FR_WAITING, &req->flags); - atomic_inc(&fc->num_waiting); + atomic_inc(&fch->num_waiting); } __set_bit(FR_ISREPLY, &req->flags); #ifdef CONFIG_FUSE_IO_URING - if (fuse_uring_ready(fc)) - return fuse_request_queue_background_uring(fc, req); + if (fuse_uring_ready(fch)) + return fuse_request_queue_background_uring(req); #endif - spin_lock(&fc->bg_lock); - if (likely(fc->connected)) { - fc->num_background++; - if (fc->num_background == fc->max_background) - fc->blocked = 1; - list_add_tail(&req->list, &fc->bg_queue); - flush_bg_queue(fc); + spin_lock(&fch->bg_lock); + if (likely(fch->connected)) { + fch->num_background++; + if (fch->num_background == fch->max_background) + fch->blocked = 1; + list_add_tail(&req->list, &fch->bg_queue); + flush_bg_queue(fch); queued = true; } - spin_unlock(&fc->bg_lock); + spin_unlock(&fch->bg_lock); return queued; } -int fuse_simple_background(struct fuse_mount *fm, struct fuse_args *args, - gfp_t gfp_flags) +int fuse_chan_send_bg(struct fuse_chan *fch, struct fuse_args *args, gfp_t gfp_flags) { struct fuse_req *req; if (args->force) { - WARN_ON(!args->nocreds); - req = fuse_request_alloc(fm, gfp_flags); + req = fuse_request_alloc(fch, gfp_flags); if (!req) return -ENOMEM; __set_bit(FR_BACKGROUND, &req->flags); } else { - WARN_ON(args->nocreds); - req = fuse_get_req(&invalid_mnt_idmap, fm, true); + req = fuse_get_req(fch, true); if (IS_ERR(req)) return PTR_ERR(req); } @@ -784,15 +913,13 @@ int fuse_simple_background(struct fuse_mount *fm, struct fuse_args *args, return 0; } -EXPORT_SYMBOL_GPL(fuse_simple_background); -static int fuse_simple_notify_reply(struct fuse_mount *fm, - struct fuse_args *args, u64 unique) +int fuse_chan_send_notify_reply(struct fuse_chan *fch, struct fuse_args *args, u64 unique) { struct fuse_req *req; - struct fuse_iqueue *fiq = &fm->fc->iq; + struct fuse_iqueue *fiq = &fch->iq; - req = fuse_get_req(&invalid_mnt_idmap, fm, false); + req = fuse_get_req(fch, false); if (IS_ERR(req)) return PTR_ERR(req); @@ -1037,6 +1164,10 @@ static int fuse_try_move_folio(struct fuse_copy_state *cs, struct folio **foliop if (WARN_ON(folio_test_mlocked(oldfolio))) goto out_fallback_unlock; + err = lock_request(cs->req); + if (err) + goto out_fallback_unlock; + replace_page_cache_folio(oldfolio, newfolio); folio_get(newfolio); @@ -1050,20 +1181,7 @@ static int fuse_try_move_folio(struct fuse_copy_state *cs, struct folio **foliop */ pipe_buf_release(cs->pipe, buf); - err = 0; - spin_lock(&cs->req->waitq.lock); - if (test_bit(FR_ABORTED, &cs->req->flags)) - err = -ENOENT; - else - *foliop = newfolio; - spin_unlock(&cs->req->waitq.lock); - - if (err) { - folio_unlock(newfolio); - folio_put(newfolio); - goto out_put_old; - } - + *foliop = newfolio; folio_unlock(oldfolio); /* Drop ref for ap->pages[] array */ folio_put(oldfolio); @@ -1115,15 +1233,15 @@ static int fuse_ref_folio(struct fuse_copy_state *cs, struct folio *folio, cs->nr_segs++; cs->len = 0; - return 0; + return lock_request(cs->req); } /* * Copy a folio in the request to/from the userspace buffer. Must be * done atomically */ -static int fuse_copy_folio(struct fuse_copy_state *cs, struct folio **foliop, - unsigned offset, unsigned count, int zeroing) +int fuse_copy_folio(struct fuse_copy_state *cs, struct folio **foliop, + unsigned offset, unsigned count, int zeroing) { int err; struct folio *folio = *foliop; @@ -1204,7 +1322,7 @@ static int fuse_copy_folios(struct fuse_copy_state *cs, unsigned nbytes, } /* Copy a single argument in the request to/from userspace buffer */ -static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size) +int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size) { while (size) { if (!cs->len) { @@ -1254,34 +1372,29 @@ static int request_pending(struct fuse_iqueue *fiq) * * Called with fiq->lock held, releases it */ -static int fuse_read_interrupt(struct fuse_iqueue *fiq, - struct fuse_copy_state *cs, - size_t nbytes, struct fuse_req *req) +static int fuse_read_interrupt(struct fuse_iqueue *fiq, struct fuse_copy_state *cs) __releases(fiq->lock) { - struct fuse_in_header ih; - struct fuse_interrupt_in arg; - unsigned reqsize = sizeof(ih) + sizeof(arg); + struct fuse_req *req = list_first_entry(&fiq->interrupts, struct fuse_req, intr_entry); + struct fuse_interrupt_in arg = { + .unique = req->in.h.unique, + }; + struct fuse_in_header ih = { + .opcode = FUSE_INTERRUPT, + .unique = (req->in.h.unique | FUSE_INT_REQ_BIT), + .len = sizeof(ih) + sizeof(arg), + }; int err; list_del_init(&req->intr_entry); - memset(&ih, 0, sizeof(ih)); - memset(&arg, 0, sizeof(arg)); - ih.len = reqsize; - ih.opcode = FUSE_INTERRUPT; - ih.unique = (req->in.h.unique | FUSE_INT_REQ_BIT); - arg.unique = req->in.h.unique; - spin_unlock(&fiq->lock); - if (nbytes < reqsize) - return -EINVAL; err = fuse_copy_one(cs, &ih, sizeof(ih)); if (!err) err = fuse_copy_one(cs, &arg, sizeof(arg)); fuse_copy_finish(cs); - return err ? err : reqsize; + return err ? err : ih.len; } static struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq, @@ -1307,8 +1420,7 @@ static struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq, } static int fuse_read_single_forget(struct fuse_iqueue *fiq, - struct fuse_copy_state *cs, - size_t nbytes) + struct fuse_copy_state *cs) __releases(fiq->lock) { int err; @@ -1325,8 +1437,6 @@ __releases(fiq->lock) spin_unlock(&fiq->lock); kfree(forget); - if (nbytes < ih.len) - return -EINVAL; err = fuse_copy_one(cs, &ih, sizeof(ih)); if (!err) @@ -1354,11 +1464,6 @@ __releases(fiq->lock) .len = sizeof(ih) + sizeof(arg), }; - if (nbytes < ih.len) { - spin_unlock(&fiq->lock); - return -EINVAL; - } - max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one); head = fuse_dequeue_forget(fiq, max_forgets, &count); spin_unlock(&fiq->lock); @@ -1388,13 +1493,13 @@ __releases(fiq->lock) return ih.len; } -static int fuse_read_forget(struct fuse_conn *fc, struct fuse_iqueue *fiq, +static int fuse_read_forget(struct fuse_chan *fch, struct fuse_iqueue *fiq, struct fuse_copy_state *cs, size_t nbytes) __releases(fiq->lock) { - if (fc->minor < 16 || fiq->forget_list_head.next->next == NULL) - return fuse_read_single_forget(fiq, cs, nbytes); + if (fch->minor < 16 || fiq->forget_list_head.next->next == NULL) + return fuse_read_single_forget(fiq, cs); else return fuse_read_batch_forget(fiq, cs, nbytes); } @@ -1412,8 +1517,8 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, struct fuse_copy_state *cs, size_t nbytes) { ssize_t err; - struct fuse_conn *fc = fud->fc; - struct fuse_iqueue *fiq = &fc->iq; + struct fuse_chan *fch = fud->chan; + struct fuse_iqueue *fiq = &fch->iq; struct fuse_pqueue *fpq = &fud->pq; struct fuse_req *req; struct fuse_args *args; @@ -1435,7 +1540,7 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, if (nbytes < max_t(size_t, FUSE_MIN_READ_BUFFER, sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in) + - fc->max_write)) + fch->max_write)) return -EINVAL; restart: @@ -1454,19 +1559,16 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, } if (!fiq->connected) { - err = fc->aborted ? -ECONNABORTED : -ENODEV; + err = fch->abort_with_err ? -ECONNABORTED : -ENODEV; goto err_unlock; } - if (!list_empty(&fiq->interrupts)) { - req = list_entry(fiq->interrupts.next, struct fuse_req, - intr_entry); - return fuse_read_interrupt(fiq, cs, nbytes, req); - } + if (!list_empty(&fiq->interrupts)) + return fuse_read_interrupt(fiq, cs); if (forget_pending(fiq)) { if (list_empty(&fiq->pending) || fiq->forget_batch-- > 0) - return fuse_read_forget(fc, fiq, cs, nbytes); + return fuse_read_forget(fch, fiq, cs, nbytes); if (fiq->forget_batch <= -8) fiq->forget_batch = 16; @@ -1492,12 +1594,11 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, spin_lock(&fpq->lock); /* * Must not put request on fpq->io queue after having been shut down by - * fuse_abort_conn() + * fuse_chan_abort() */ if (!fpq->connected) { req->out.h.error = err = -ECONNABORTED; goto out_end; - } list_add(&req->list, &fpq->io); spin_unlock(&fpq->lock); @@ -1510,7 +1611,7 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, spin_lock(&fpq->lock); clear_bit(FR_LOCKED, &req->flags); if (!fpq->connected) { - err = fc->aborted ? -ECONNABORTED : -ENODEV; + err = fch->abort_with_err ? -ECONNABORTED : -ENODEV; goto out_end; } if (err) { @@ -1525,6 +1626,7 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, list_move_tail(&req->list, &fpq->processing[hash]); __fuse_get_request(req); set_bit(FR_SENT, &req->flags); + trace_fuse_request_sent(req); spin_unlock(&fpq->lock); /* matches barrier in request_wait_answer() */ smp_mb__after_atomic(); @@ -1548,7 +1650,7 @@ out_end: static int fuse_dev_open(struct inode *inode, struct file *file) { - struct fuse_dev *fud = fuse_dev_alloc(); + struct fuse_dev *fud = fuse_dev_alloc_no_pq(); if (!fud) return -ENOMEM; @@ -1562,9 +1664,15 @@ struct fuse_dev *fuse_get_dev(struct file *file) struct fuse_dev *fud = fuse_file_to_fud(file); int err; - err = wait_event_interruptible(fuse_dev_waitq, fuse_dev_fc_get(fud) != NULL); - if (err) - return ERR_PTR(err); + if (unlikely(!fuse_dev_chan_get(fud))) { + /* only block waiting for mount if sync init was requested */ + if (!fud->sync_init) + return ERR_PTR(-EPERM); + + err = wait_event_interruptible(fuse_dev_waitq, fuse_dev_chan_get(fud) != NULL); + if (err) + return ERR_PTR(err); + } return fud; } @@ -1636,355 +1744,6 @@ out: return ret; } -static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size, - struct fuse_copy_state *cs) -{ - struct fuse_notify_poll_wakeup_out outarg; - int err; - - if (size != sizeof(outarg)) - return -EINVAL; - - err = fuse_copy_one(cs, &outarg, sizeof(outarg)); - if (err) - return err; - - fuse_copy_finish(cs); - return fuse_notify_poll_wakeup(fc, &outarg); -} - -static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size, - struct fuse_copy_state *cs) -{ - struct fuse_notify_inval_inode_out outarg; - int err; - - if (size != sizeof(outarg)) - return -EINVAL; - - err = fuse_copy_one(cs, &outarg, sizeof(outarg)); - if (err) - return err; - fuse_copy_finish(cs); - - down_read(&fc->killsb); - err = fuse_reverse_inval_inode(fc, outarg.ino, - outarg.off, outarg.len); - up_read(&fc->killsb); - return err; -} - -static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size, - struct fuse_copy_state *cs) -{ - struct fuse_notify_inval_entry_out outarg; - int err; - char *buf; - struct qstr name; - - if (size < sizeof(outarg)) - return -EINVAL; - - err = fuse_copy_one(cs, &outarg, sizeof(outarg)); - if (err) - return err; - - if (outarg.namelen > fc->name_max) - return -ENAMETOOLONG; - - err = -EINVAL; - if (size != sizeof(outarg) + outarg.namelen + 1) - return -EINVAL; - - buf = kzalloc(outarg.namelen + 1, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - name.name = buf; - name.len = outarg.namelen; - err = fuse_copy_one(cs, buf, outarg.namelen + 1); - if (err) - goto err; - fuse_copy_finish(cs); - buf[outarg.namelen] = 0; - - down_read(&fc->killsb); - err = fuse_reverse_inval_entry(fc, outarg.parent, 0, &name, outarg.flags); - up_read(&fc->killsb); -err: - kfree(buf); - return err; -} - -static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size, - struct fuse_copy_state *cs) -{ - struct fuse_notify_delete_out outarg; - int err; - char *buf; - struct qstr name; - - if (size < sizeof(outarg)) - return -EINVAL; - - err = fuse_copy_one(cs, &outarg, sizeof(outarg)); - if (err) - return err; - - if (outarg.namelen > fc->name_max) - return -ENAMETOOLONG; - - if (size != sizeof(outarg) + outarg.namelen + 1) - return -EINVAL; - - buf = kzalloc(outarg.namelen + 1, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - name.name = buf; - name.len = outarg.namelen; - err = fuse_copy_one(cs, buf, outarg.namelen + 1); - if (err) - goto err; - fuse_copy_finish(cs); - buf[outarg.namelen] = 0; - - down_read(&fc->killsb); - err = fuse_reverse_inval_entry(fc, outarg.parent, outarg.child, &name, 0); - up_read(&fc->killsb); -err: - kfree(buf); - return err; -} - -static int fuse_notify_store(struct fuse_conn *fc, unsigned int size, - struct fuse_copy_state *cs) -{ - struct fuse_notify_store_out outarg; - struct inode *inode; - struct address_space *mapping; - u64 nodeid; - int err; - unsigned int num; - loff_t file_size; - loff_t pos; - loff_t end; - - if (size < sizeof(outarg)) - return -EINVAL; - - err = fuse_copy_one(cs, &outarg, sizeof(outarg)); - if (err) - return err; - - if (size - sizeof(outarg) != outarg.size) - return -EINVAL; - - if (outarg.offset >= MAX_LFS_FILESIZE) - return -EINVAL; - - nodeid = outarg.nodeid; - pos = outarg.offset; - num = min(outarg.size, MAX_LFS_FILESIZE - pos); - - down_read(&fc->killsb); - - err = -ENOENT; - inode = fuse_ilookup(fc, nodeid, NULL); - if (!inode) - goto out_up_killsb; - if (!S_ISREG(inode->i_mode)) { - err = -EINVAL; - goto out_iput; - } - - mapping = inode->i_mapping; - file_size = i_size_read(inode); - end = pos + num; - if (end > file_size) { - file_size = end; - fuse_write_update_attr(inode, file_size, num); - } - - while (num) { - struct folio *folio; - unsigned int folio_offset; - unsigned int nr_bytes; - pgoff_t index = pos >> PAGE_SHIFT; - - folio = filemap_grab_folio(mapping, index); - err = PTR_ERR(folio); - if (IS_ERR(folio)) - goto out_iput; - - folio_offset = offset_in_folio(folio, pos); - nr_bytes = min(num, folio_size(folio) - folio_offset); - - err = fuse_copy_folio(cs, &folio, folio_offset, nr_bytes, 0); - if (!folio_test_uptodate(folio) && !err && folio_offset == 0 && - (nr_bytes == folio_size(folio) || file_size == end)) { - folio_zero_segment(folio, nr_bytes, folio_size(folio)); - folio_mark_uptodate(folio); - } - folio_unlock(folio); - folio_put(folio); - - if (err) - goto out_iput; - - pos += nr_bytes; - num -= nr_bytes; - } - - err = 0; - -out_iput: - iput(inode); -out_up_killsb: - up_read(&fc->killsb); - return err; -} - -struct fuse_retrieve_args { - struct fuse_args_pages ap; - struct fuse_notify_retrieve_in inarg; -}; - -static void fuse_retrieve_end(struct fuse_mount *fm, struct fuse_args *args, - int error) -{ - struct fuse_retrieve_args *ra = - container_of(args, typeof(*ra), ap.args); - - release_pages(ra->ap.folios, ra->ap.num_folios); - kfree(ra); -} - -static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode, - struct fuse_notify_retrieve_out *outarg) -{ - int err; - struct address_space *mapping = inode->i_mapping; - loff_t file_size; - unsigned int num; - unsigned int offset; - size_t total_len = 0; - unsigned int num_pages; - struct fuse_conn *fc = fm->fc; - struct fuse_retrieve_args *ra; - size_t args_size = sizeof(*ra); - struct fuse_args_pages *ap; - struct fuse_args *args; - loff_t pos = outarg->offset; - - offset = offset_in_page(pos); - file_size = i_size_read(inode); - - num = min(outarg->size, fc->max_write); - if (pos > file_size) - num = 0; - else if (num > file_size - pos) - num = file_size - pos; - - num_pages = DIV_ROUND_UP(num + offset, PAGE_SIZE); - num_pages = min(num_pages, fc->max_pages); - num = min(num, num_pages << PAGE_SHIFT); - - args_size += num_pages * (sizeof(ap->folios[0]) + sizeof(ap->descs[0])); - - ra = kzalloc(args_size, GFP_KERNEL); - if (!ra) - return -ENOMEM; - - ap = &ra->ap; - ap->folios = (void *) (ra + 1); - ap->descs = (void *) (ap->folios + num_pages); - - args = &ap->args; - args->nodeid = outarg->nodeid; - args->opcode = FUSE_NOTIFY_REPLY; - args->in_numargs = 3; - args->in_pages = true; - args->end = fuse_retrieve_end; - - while (num && ap->num_folios < num_pages) { - struct folio *folio; - unsigned int folio_offset; - unsigned int nr_bytes; - pgoff_t index = pos >> PAGE_SHIFT; - - folio = filemap_get_folio(mapping, index); - if (IS_ERR(folio)) - break; - if (!folio_test_uptodate(folio)) { - folio_put(folio); - break; - } - - folio_offset = offset_in_folio(folio, pos); - nr_bytes = min(folio_size(folio) - folio_offset, num); - - ap->folios[ap->num_folios] = folio; - ap->descs[ap->num_folios].offset = folio_offset; - ap->descs[ap->num_folios].length = nr_bytes; - ap->num_folios++; - - pos += nr_bytes; - num -= nr_bytes; - total_len += nr_bytes; - } - ra->inarg.offset = outarg->offset; - ra->inarg.size = total_len; - fuse_set_zero_arg0(args); - args->in_args[1].size = sizeof(ra->inarg); - args->in_args[1].value = &ra->inarg; - args->in_args[2].size = total_len; - - err = fuse_simple_notify_reply(fm, args, outarg->notify_unique); - if (err) - fuse_retrieve_end(fm, args, err); - - return err; -} - -static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size, - struct fuse_copy_state *cs) -{ - struct fuse_notify_retrieve_out outarg; - struct fuse_mount *fm; - struct inode *inode; - u64 nodeid; - int err; - - if (size != sizeof(outarg)) - return -EINVAL; - - err = fuse_copy_one(cs, &outarg, sizeof(outarg)); - if (err) - return err; - - fuse_copy_finish(cs); - - if (outarg.offset >= MAX_LFS_FILESIZE) - return -EINVAL; - - down_read(&fc->killsb); - err = -ENOENT; - nodeid = outarg.nodeid; - - inode = fuse_ilookup(fc, nodeid, &fm); - if (inode) { - if (!S_ISREG(inode->i_mode)) - err = -EINVAL; - else - err = fuse_retrieve(fm, inode, &outarg); - iput(inode); - } - up_read(&fc->killsb); - - return err; -} - /* * Resending all processing queue requests. * @@ -1998,21 +1757,21 @@ static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size, * if the FUSE daemon takes careful measures to avoid processing duplicated * non-idempotent requests. */ -static void fuse_resend(struct fuse_conn *fc) +void fuse_chan_resend(struct fuse_chan *fch) { struct fuse_dev *fud; struct fuse_req *req, *next; - struct fuse_iqueue *fiq = &fc->iq; + struct fuse_iqueue *fiq = &fch->iq; LIST_HEAD(to_queue); unsigned int i; - spin_lock(&fc->lock); - if (!fc->connected) { - spin_unlock(&fc->lock); + spin_lock(&fch->lock); + if (!fch->connected) { + spin_unlock(&fch->lock); return; } - list_for_each_entry(fud, &fc->devices, entry) { + list_for_each_entry(fud, &fch->devices, entry) { struct fuse_pqueue *fpq = &fud->pq; spin_lock(&fpq->lock); @@ -2020,7 +1779,7 @@ static void fuse_resend(struct fuse_conn *fc) list_splice_tail_init(&fpq->processing[i], &to_queue); spin_unlock(&fpq->lock); } - spin_unlock(&fc->lock); + spin_unlock(&fch->lock); list_for_each_entry_safe(req, next, &to_queue, list) { set_bit(FR_PENDING, &req->flags); @@ -2037,111 +1796,17 @@ static void fuse_resend(struct fuse_conn *fc) fuse_dev_end_requests(&to_queue); return; } - /* iq and pq requests are both oldest to newest */ - list_splice(&to_queue, &fiq->pending); - fuse_dev_wake_and_unlock(fiq); -} - -static int fuse_notify_resend(struct fuse_conn *fc) -{ - fuse_resend(fc); - return 0; -} - -/* - * Increments the fuse connection epoch. This will result of dentries from - * previous epochs to be invalidated. Additionally, if inval_wq is set, a work - * queue is scheduled to trigger the invalidation. - */ -static int fuse_notify_inc_epoch(struct fuse_conn *fc) -{ - atomic_inc(&fc->epoch); - if (inval_wq) - schedule_work(&fc->epoch_work); - - return 0; -} - -static int fuse_notify_prune(struct fuse_conn *fc, unsigned int size, - struct fuse_copy_state *cs) -{ - struct fuse_notify_prune_out outarg; - const unsigned int batch = 512; - u64 *nodeids __free(kfree) = kmalloc(sizeof(u64) * batch, GFP_KERNEL); - unsigned int num, i; - int err; - - if (!nodeids) - return -ENOMEM; - - if (size < sizeof(outarg)) - return -EINVAL; - - err = fuse_copy_one(cs, &outarg, sizeof(outarg)); - if (err) - return err; - - if (size - sizeof(outarg) != outarg.count * sizeof(u64)) - return -EINVAL; - - for (; outarg.count; outarg.count -= num) { - num = min(batch, outarg.count); - err = fuse_copy_one(cs, nodeids, num * sizeof(u64)); - if (err) - return err; - - scoped_guard(rwsem_read, &fc->killsb) { - for (i = 0; i < num; i++) - fuse_try_prune_one_inode(fc, nodeids[i]); - } - } - return 0; -} - -static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, - unsigned int size, struct fuse_copy_state *cs) -{ /* - * Only allow notifications during while the connection is in an - * initialized and connected state + * Remove interrupt entries for resent requests to prevent stale + * intr_entry on fiq->interrupts after the request is re-queued. */ - if (!fc->initialized || !fc->connected) - return -EINVAL; - - /* Don't try to move folios (yet) */ - cs->move_folios = false; - - switch (code) { - case FUSE_NOTIFY_POLL: - return fuse_notify_poll(fc, size, cs); - - case FUSE_NOTIFY_INVAL_INODE: - return fuse_notify_inval_inode(fc, size, cs); - - case FUSE_NOTIFY_INVAL_ENTRY: - return fuse_notify_inval_entry(fc, size, cs); - - case FUSE_NOTIFY_STORE: - return fuse_notify_store(fc, size, cs); - - case FUSE_NOTIFY_RETRIEVE: - return fuse_notify_retrieve(fc, size, cs); - - case FUSE_NOTIFY_DELETE: - return fuse_notify_delete(fc, size, cs); - - case FUSE_NOTIFY_RESEND: - return fuse_notify_resend(fc); - - case FUSE_NOTIFY_INC_EPOCH: - return fuse_notify_inc_epoch(fc); - - case FUSE_NOTIFY_PRUNE: - return fuse_notify_prune(fc, size, cs); - - default: - return -EINVAL; + list_for_each_entry(req, &to_queue, list) { + if (test_bit(FR_INTERRUPTED, &req->flags)) + list_del_init(&req->intr_entry); } + /* iq and pq requests are both oldest to newest */ + list_splice(&to_queue, &fiq->pending); + fuse_dev_wake_and_unlock(fiq); } /* Look up request on processing list by unique ID */ @@ -2196,7 +1861,7 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud, struct fuse_copy_state *cs, size_t nbytes) { int err; - struct fuse_conn *fc = fud->fc; + struct fuse_chan *fch = fud->chan; struct fuse_pqueue *fpq = &fud->pq; struct fuse_req *req; struct fuse_out_header oh; @@ -2218,7 +1883,18 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud, * and error contains notification code. */ if (!oh.unique) { - err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), cs); + /* + * Only allow notifications during while the connection is in an + * initialized and connected state + */ + err = -EINVAL; + if (!fch->initialized || !fch->connected) + goto copy_finish; + + /* Don't try to move folios (yet) */ + cs->move_folios = false; + + err = fuse_notify(fch->conn, oh.error, nbytes - sizeof(oh), cs); goto copy_finish; } @@ -2246,7 +1922,7 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud, if (nbytes != sizeof(struct fuse_out_header)) err = -EINVAL; else if (oh.error == -ENOSYS) - fc->no_interrupt = 1; + fch->no_interrupt = 1; else if (oh.error == -EAGAIN) err = queue_interrupt(req); @@ -2406,7 +2082,7 @@ static __poll_t fuse_dev_poll(struct file *file, poll_table *wait) if (IS_ERR(fud)) return EPOLLERR; - fiq = &fud->fc->iq; + fiq = &fud->chan->iq; poll_wait(file, &fiq->waitq, wait); spin_lock(&fiq->lock); @@ -2432,21 +2108,6 @@ void fuse_dev_end_requests(struct list_head *head) } } -static void end_polls(struct fuse_conn *fc) -{ - struct rb_node *p; - - p = rb_first(&fc->polled_files); - - while (p) { - struct fuse_file *ff; - ff = rb_entry(p, struct fuse_file, polled_node); - wake_up_interruptible_all(&ff->poll_wait); - - p = rb_next(p); - } -} - /* * Abort all requests. * @@ -2465,27 +2126,29 @@ static void end_polls(struct fuse_conn *fc) * is OK, the request will in that case be removed from the list before we touch * it. */ -void fuse_abort_conn(struct fuse_conn *fc) +void fuse_chan_abort(struct fuse_chan *fch, bool abort_with_err) { - struct fuse_iqueue *fiq = &fc->iq; + struct fuse_iqueue *fiq = &fch->iq; - spin_lock(&fc->lock); - if (fc->connected) { + fch->abort_with_err = abort_with_err; + + spin_lock(&fch->lock); + if (fch->connected) { struct fuse_dev *fud; struct fuse_req *req, *next; LIST_HEAD(to_end); unsigned int i; - if (fc->timeout.req_timeout) - cancel_delayed_work(&fc->timeout.work); + if (fch->timeout.req_timeout) + cancel_delayed_work(&fch->timeout.work); - /* Background queuing checks fc->connected under bg_lock */ - spin_lock(&fc->bg_lock); - fc->connected = 0; - spin_unlock(&fc->bg_lock); + /* Background queuing checks fch->connected under bg_lock */ + spin_lock(&fch->bg_lock); + fch->connected = 0; + spin_unlock(&fch->bg_lock); - fuse_set_initialized(fc); - list_for_each_entry(fud, &fc->devices, entry) { + fuse_chan_set_initialized(fch, NULL); + list_for_each_entry(fud, &fch->devices, entry) { struct fuse_pqueue *fpq = &fud->pq; spin_lock(&fpq->lock); @@ -2506,11 +2169,11 @@ void fuse_abort_conn(struct fuse_conn *fc) &to_end); spin_unlock(&fpq->lock); } - spin_lock(&fc->bg_lock); - fc->blocked = 0; - fc->max_background = UINT_MAX; - flush_bg_queue(fc); - spin_unlock(&fc->bg_lock); + spin_lock(&fch->bg_lock); + fch->blocked = 0; + fch->max_background = UINT_MAX; + flush_bg_queue(fch); + spin_unlock(&fch->bg_lock); spin_lock(&fiq->lock); fiq->connected = 0; @@ -2522,63 +2185,64 @@ void fuse_abort_conn(struct fuse_conn *fc) wake_up_all(&fiq->waitq); spin_unlock(&fiq->lock); kill_fasync(&fiq->fasync, SIGIO, POLL_IN); - end_polls(fc); - wake_up_all(&fc->blocked_waitq); - spin_unlock(&fc->lock); + fuse_end_polls(fch->conn); + wake_up_all(&fch->blocked_waitq); + spin_unlock(&fch->lock); fuse_dev_end_requests(&to_end); /* - * fc->lock must not be taken to avoid conflicts with io-uring + * fch->lock must not be taken to avoid conflicts with io-uring * locks */ - fuse_uring_abort(fc); + fuse_uring_abort(fch); } else { - spin_unlock(&fc->lock); + spin_unlock(&fch->lock); } } -EXPORT_SYMBOL_GPL(fuse_abort_conn); +EXPORT_SYMBOL_GPL(fuse_chan_abort); -void fuse_wait_aborted(struct fuse_conn *fc) +void fuse_chan_wait_aborted(struct fuse_chan *fch) { /* matches implicit memory barrier in fuse_drop_waiting() */ smp_mb(); - wait_event(fc->blocked_waitq, atomic_read(&fc->num_waiting) == 0); + wait_event(fch->blocked_waitq, fuse_chan_num_waiting(fch) == 0); - fuse_uring_wait_stopped_queues(fc); + fuse_uring_wait_stopped_queues(fch); } int fuse_dev_release(struct inode *inode, struct file *file) { struct fuse_dev *fud = fuse_file_to_fud(file); /* Pairs with cmpxchg() in fuse_dev_install() */ - struct fuse_conn *fc = xchg(&fud->fc, FUSE_DEV_FC_DISCONNECTED); + struct fuse_chan *fch = xchg(&fud->chan, FUSE_DEV_CHAN_DISCONNECTED); - if (fc) { + if (fch) { struct fuse_pqueue *fpq = &fud->pq; LIST_HEAD(to_end); unsigned int i; bool last; + /* Make sure fuse_dev_install_with_pq() has finished */ + spin_lock(&fch->lock); spin_lock(&fpq->lock); WARN_ON(!list_empty(&fpq->io)); for (i = 0; i < FUSE_PQ_HASH_SIZE; i++) list_splice_init(&fpq->processing[i], &to_end); spin_unlock(&fpq->lock); - fuse_dev_end_requests(&to_end); - - spin_lock(&fc->lock); list_del(&fud->entry); /* Are we the last open device? */ - last = list_empty(&fc->devices); - spin_unlock(&fc->lock); + last = list_empty(&fch->devices); + spin_unlock(&fch->lock); + + fuse_dev_end_requests(&to_end); if (last) { - WARN_ON(fc->iq.fasync != NULL); - fuse_abort_conn(fc); + WARN_ON(fch->iq.fasync != NULL); + fuse_chan_abort(fch, false); } - fuse_conn_put(fc); + fuse_conn_put(fch->conn); } fuse_dev_put(fud); return 0; @@ -2593,13 +2257,14 @@ static int fuse_dev_fasync(int fd, struct file *file, int on) return PTR_ERR(fud); /* No locking - fasync_helper does its own locking */ - return fasync_helper(fd, file, on, &fud->fc->iq.fasync); + return fasync_helper(fd, file, on, &fud->chan->iq.fasync); } static long fuse_dev_ioctl_clone(struct file *file, __u32 __user *argp) { int oldfd; struct fuse_dev *fud, *new_fud; + struct list_head *pq; if (get_user(oldfd, argp)) return -EFAULT; @@ -2619,12 +2284,14 @@ static long fuse_dev_ioctl_clone(struct file *file, __u32 __user *argp) if (IS_ERR(fud)) return PTR_ERR(fud); + pq = fuse_pqueue_alloc(); + if (!pq) + return -ENOMEM; + new_fud = fuse_file_to_fud(file); - if (fuse_dev_fc_get(new_fud)) + if (!fuse_dev_install_with_pq(new_fud, fud->chan, pq)) return -EINVAL; - fuse_dev_install(new_fud, fud->fc); - return 0; } @@ -2643,7 +2310,7 @@ static long fuse_dev_ioctl_backing_open(struct file *file, if (copy_from_user(&map, argp, sizeof(map))) return -EFAULT; - return fuse_backing_open(fud->fc, &map); + return fuse_backing_open(fud->chan->conn, &map); } static long fuse_dev_ioctl_backing_close(struct file *file, __u32 __user *argp) @@ -2660,21 +2327,18 @@ static long fuse_dev_ioctl_backing_close(struct file *file, __u32 __user *argp) if (get_user(backing_id, argp)) return -EFAULT; - return fuse_backing_close(fud->fc, backing_id); + return fuse_backing_close(fud->chan->conn, backing_id); } static long fuse_dev_ioctl_sync_init(struct file *file) { - int err = -EINVAL; struct fuse_dev *fud = fuse_file_to_fud(file); - mutex_lock(&fuse_mutex); - if (!fuse_dev_fc_get(fud)) { - fud->sync_init = true; - err = 0; - } - mutex_unlock(&fuse_mutex); - return err; + if (fuse_dev_chan_get(fud)) + return -EINVAL; + + fud->sync_init = true; + return 0; } static long fuse_dev_ioctl(struct file *file, unsigned int cmd, @@ -2707,7 +2371,7 @@ static void fuse_dev_show_fdinfo(struct seq_file *seq, struct file *file) if (!fud) return; - seq_printf(seq, "fuse_connection:\t%u\n", fud->fc->dev); + seq_printf(seq, "fuse_connection:\t%u\n", fuse_conn_get_id(fud->chan->conn)); } #endif diff --git a/fs/fuse/dev.h b/fs/fuse/dev.h new file mode 100644 index 000000000000..aed69fd14c41 --- /dev/null +++ b/fs/fuse/dev.h @@ -0,0 +1,110 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _FS_FUSE_DEV_H +#define _FS_FUSE_DEV_H + +#include <linux/cleanup.h> + +/** Maximum number of outstanding background requests */ +#define FUSE_DEFAULT_MAX_BACKGROUND 12 + +struct fuse_conn; +struct fuse_chan; +struct fuse_dev; +struct fuse_args; +struct fuse_copy_state; +struct fuse_backing_map; +struct file; +struct folio; +enum fuse_notify_code; + +struct fuse_chan_param { + unsigned int minor; + unsigned int max_write; + unsigned int max_pages; +}; + +struct fuse_chan *fuse_chan_new(void); +struct fuse_chan *fuse_dev_chan_new(void); +void fuse_chan_release(struct fuse_chan *fch); +void fuse_chan_free(struct fuse_chan *fch); +unsigned int fuse_chan_num_background(struct fuse_chan *fch); +unsigned int fuse_chan_max_background(struct fuse_chan *fch); +void fuse_chan_max_background_set(struct fuse_chan *fch, unsigned int val); +unsigned int fuse_chan_num_waiting(struct fuse_chan *fch); +void fuse_chan_set_fc(struct fuse_chan *fch, struct fuse_conn *fc); +void fuse_chan_set_initialized(struct fuse_chan *fch, struct fuse_chan_param *param); +void fuse_chan_io_uring_enable(struct fuse_chan *fch); +ssize_t fuse_chan_send(struct fuse_chan *fch, struct fuse_args *args); +int fuse_chan_send_bg(struct fuse_chan *fch, struct fuse_args *args, gfp_t gfp_flags); +int fuse_chan_send_notify_reply(struct fuse_chan *fch, struct fuse_args *args, u64 unique); +void fuse_chan_resend(struct fuse_chan *fch); + +struct fuse_forget_link *fuse_alloc_forget(void); +void fuse_chan_queue_forget(struct fuse_chan *fch, struct fuse_forget_link *forget, + u64 nodeid, u64 nlookup); + +DEFINE_FREE(fuse_chan_free, struct fuse_chan *, if (_T) fuse_chan_free(_T)) + +/** + * Initialize the client device + */ +int fuse_dev_init(void); + +/** + * Cleanup the client device + */ +void fuse_dev_cleanup(void); + +void fuse_dev_install(struct fuse_dev *fud, struct fuse_chan *fch); +bool fuse_dev_verify(struct fuse_dev *fud, struct fuse_chan *fch); +void fuse_dev_put(struct fuse_dev *fud); +bool fuse_dev_is_installed(struct fuse_dev *fud); +bool fuse_dev_is_sync_init(struct fuse_dev *fud); +struct fuse_dev *fuse_dev_grab(struct file *file); + +void fuse_init_server_timeout(struct fuse_chan *fch, unsigned int timeout); + +/* Abort all requests */ +void fuse_chan_abort(struct fuse_chan *fch, bool abort_with_err); +void fuse_chan_wait_aborted(struct fuse_chan *fch); + +/** + * Acquire reference to fuse_conn + */ +struct fuse_conn *fuse_conn_get(struct fuse_conn *fc); + +/** + * Release reference to fuse_conn + */ +void fuse_conn_put(struct fuse_conn *fc); + +dev_t fuse_conn_get_id(struct fuse_conn *fc); + +void fuse_end_polls(struct fuse_conn *fc); +int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, + unsigned int size, struct fuse_copy_state *cs); + +int fuse_backing_open(struct fuse_conn *fc, struct fuse_backing_map *map); +int fuse_backing_close(struct fuse_conn *fc, int backing_id); + +int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size); +int fuse_copy_folio(struct fuse_copy_state *cs, struct folio **foliop, + unsigned offset, unsigned count, int zeroing); +void fuse_copy_finish(struct fuse_copy_state *cs); + +#ifdef CONFIG_FUSE_IO_URING +bool fuse_uring_enabled(void); +void fuse_uring_destruct(struct fuse_chan *fch); +#else /* CONFIG_FUSE_IO_URING */ +static inline bool fuse_uring_enabled(void) +{ + return false; +} + +static inline void fuse_uring_destruct(struct fuse_chan *fch) +{ +} +#endif /* CONFIG_FUSE_IO_URING */ + +#endif /* _FS_FUSE_DEV_H */ diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c index 7b9822e8837b..77c8cec43d9c 100644 --- a/fs/fuse/dev_uring.c +++ b/fs/fuse/dev_uring.c @@ -4,9 +4,9 @@ * Copyright (c) 2023-2024 DataDirect Networks. */ -#include "fuse_i.h" +#include "dev.h" +#include "args.h" #include "dev_uring_i.h" -#include "fuse_dev_i.h" #include "fuse_trace.h" #include <linux/fs.h> @@ -18,7 +18,8 @@ MODULE_PARM_DESC(enable_uring, "Enable userspace communication through io-uring"); #define FUSE_URING_IOV_SEGS 2 /* header and payload */ - +#define FUSE_URING_IOV_HEADERS 0 +#define FUSE_URING_IOV_PAYLOAD 1 bool fuse_uring_enabled(void) { @@ -31,6 +32,15 @@ struct fuse_uring_pdu { static const struct fuse_iqueue_ops fuse_io_uring_ops; +enum fuse_uring_header_type { + /* struct fuse_in_header / struct fuse_out_header */ + FUSE_URING_HEADER_IN_OUT, + /* per op code header */ + FUSE_URING_HEADER_OP, + /* struct fuse_uring_ent_in_out header */ + FUSE_URING_HEADER_RING_ENT, +}; + static void uring_cmd_set_ring_ent(struct io_uring_cmd *cmd, struct fuse_ring_ent *ring_ent) { @@ -51,10 +61,10 @@ static struct fuse_ring_ent *uring_cmd_to_ring_ent(struct io_uring_cmd *cmd) static void fuse_uring_flush_bg(struct fuse_ring_queue *queue) { struct fuse_ring *ring = queue->ring; - struct fuse_conn *fc = ring->fc; + struct fuse_chan *fch = ring->chan; lockdep_assert_held(&queue->lock); - lockdep_assert_held(&fc->bg_lock); + lockdep_assert_held(&fch->bg_lock); /* * Allow one bg request per queue, ignoring global fc limits. @@ -62,14 +72,14 @@ static void fuse_uring_flush_bg(struct fuse_ring_queue *queue) * eliminates the need for remote queue wake-ups when global * limits are met but this queue has no more waiting requests. */ - while ((fc->active_background < fc->max_background || + while ((fch->active_background < fch->max_background || !queue->active_background) && (!list_empty(&queue->fuse_req_bg_queue))) { struct fuse_req *req; req = list_first_entry(&queue->fuse_req_bg_queue, struct fuse_req, list); - fc->active_background++; + fch->active_background++; queue->active_background++; list_move_tail(&req->list, &queue->fuse_req_queue); @@ -81,7 +91,7 @@ static void fuse_uring_req_end(struct fuse_ring_ent *ent, struct fuse_req *req, { struct fuse_ring_queue *queue = ent->queue; struct fuse_ring *ring = queue->ring; - struct fuse_conn *fc = ring->fc; + struct fuse_chan *fch = ring->chan; lockdep_assert_not_held(&queue->lock); spin_lock(&queue->lock); @@ -89,9 +99,10 @@ static void fuse_uring_req_end(struct fuse_ring_ent *ent, struct fuse_req *req, list_del_init(&req->list); if (test_bit(FR_BACKGROUND, &req->flags)) { queue->active_background--; - spin_lock(&fc->bg_lock); + spin_lock(&fch->bg_lock); + fuse_request_bg_finish(fch, req); fuse_uring_flush_bg(queue); - spin_unlock(&fc->bg_lock); + spin_unlock(&fch->bg_lock); } spin_unlock(&queue->lock); @@ -123,26 +134,25 @@ void fuse_uring_abort_end_requests(struct fuse_ring *ring) { int qid; struct fuse_ring_queue *queue; - struct fuse_conn *fc = ring->fc; + struct fuse_chan *fch = ring->chan; for (qid = 0; qid < ring->nr_queues; qid++) { queue = READ_ONCE(ring->queues[qid]); if (!queue) continue; - queue->stopped = true; - - WARN_ON_ONCE(ring->fc->max_background != UINT_MAX); + WARN_ON_ONCE(fch->max_background != UINT_MAX); spin_lock(&queue->lock); - spin_lock(&fc->bg_lock); + queue->stopped = true; + spin_lock(&fch->bg_lock); fuse_uring_flush_bg(queue); - spin_unlock(&fc->bg_lock); + spin_unlock(&fch->bg_lock); spin_unlock(&queue->lock); fuse_uring_abort_end_queue_requests(queue); } } -static bool ent_list_request_expired(struct fuse_conn *fc, struct list_head *list) +static bool ent_list_request_expired(struct fuse_chan *fch, struct list_head *list) { struct fuse_ring_ent *ent; struct fuse_req *req; @@ -154,12 +164,12 @@ static bool ent_list_request_expired(struct fuse_conn *fc, struct list_head *lis req = ent->fuse_req; return time_is_before_jiffies(req->create_time + - fc->timeout.req_timeout); + fch->timeout.req_timeout); } -bool fuse_uring_request_expired(struct fuse_conn *fc) +bool fuse_uring_request_expired(struct fuse_chan *fch) { - struct fuse_ring *ring = fc->ring; + struct fuse_ring *ring = fch->ring; struct fuse_ring_queue *queue; int qid; @@ -172,10 +182,10 @@ bool fuse_uring_request_expired(struct fuse_conn *fc) continue; spin_lock(&queue->lock); - if (fuse_request_expired(fc, &queue->fuse_req_queue) || - fuse_request_expired(fc, &queue->fuse_req_bg_queue) || - ent_list_request_expired(fc, &queue->ent_w_req_queue) || - ent_list_request_expired(fc, &queue->ent_in_userspace)) { + if (fuse_request_expired(fch, &queue->fuse_req_queue) || + fuse_request_expired(fch, &queue->fuse_req_bg_queue) || + ent_list_request_expired(fch, &queue->ent_w_req_queue) || + ent_list_request_expired(fch, &queue->ent_in_userspace)) { spin_unlock(&queue->lock); return true; } @@ -185,9 +195,9 @@ bool fuse_uring_request_expired(struct fuse_conn *fc) return false; } -void fuse_uring_destruct(struct fuse_conn *fc) +void fuse_uring_destruct(struct fuse_chan *fch) { - struct fuse_ring *ring = fc->ring; + struct fuse_ring *ring = fch->ring; int qid; if (!ring) @@ -218,20 +228,20 @@ void fuse_uring_destruct(struct fuse_conn *fc) kfree(ring->queues); kfree(ring); - fc->ring = NULL; + fch->ring = NULL; } /* * Basic ring setup for this connection based on the provided configuration */ -static struct fuse_ring *fuse_uring_create(struct fuse_conn *fc) +static struct fuse_ring *fuse_uring_create(struct fuse_chan *fch) { struct fuse_ring *ring; size_t nr_queues = num_possible_cpus(); struct fuse_ring *res = NULL; size_t max_payload_size; - ring = kzalloc_obj(*fc->ring, GFP_KERNEL_ACCOUNT); + ring = kzalloc_obj(*ring, GFP_KERNEL_ACCOUNT); if (!ring) return NULL; @@ -240,25 +250,29 @@ static struct fuse_ring *fuse_uring_create(struct fuse_conn *fc) if (!ring->queues) goto out_err; - max_payload_size = max(FUSE_MIN_READ_BUFFER, fc->max_write); - max_payload_size = max(max_payload_size, fc->max_pages * PAGE_SIZE); + max_payload_size = max(FUSE_MIN_READ_BUFFER, fch->max_write); + max_payload_size = max(max_payload_size, fch->max_pages * PAGE_SIZE); - spin_lock(&fc->lock); - if (fc->ring) { + spin_lock(&fch->lock); + if (!fch->connected) { + spin_unlock(&fch->lock); + goto out_err; + } + if (fch->ring) { /* race, another thread created the ring in the meantime */ - spin_unlock(&fc->lock); - res = fc->ring; + spin_unlock(&fch->lock); + res = fch->ring; goto out_err; } init_waitqueue_head(&ring->stop_waitq); ring->nr_queues = nr_queues; - ring->fc = fc; + ring->chan = fch; ring->max_payload_sz = max_payload_size; - smp_store_release(&fc->ring, ring); + smp_store_release(&fch->ring, ring); - spin_unlock(&fc->lock); + spin_unlock(&fch->lock); return ring; out_err: @@ -270,14 +284,14 @@ out_err: static struct fuse_ring_queue *fuse_uring_create_queue(struct fuse_ring *ring, int qid) { - struct fuse_conn *fc = ring->fc; + struct fuse_chan *fch = ring->chan; struct fuse_ring_queue *queue; struct list_head *pq; queue = kzalloc_obj(*queue, GFP_KERNEL_ACCOUNT); if (!queue) return NULL; - pq = kzalloc_objs(struct list_head, FUSE_PQ_HASH_SIZE); + pq = fuse_pqueue_alloc(); if (!pq) { kfree(queue); return NULL; @@ -295,12 +309,12 @@ static struct fuse_ring_queue *fuse_uring_create_queue(struct fuse_ring *ring, INIT_LIST_HEAD(&queue->fuse_req_bg_queue); INIT_LIST_HEAD(&queue->ent_released); - queue->fpq.processing = pq; fuse_pqueue_init(&queue->fpq); + queue->fpq.processing = pq; - spin_lock(&fc->lock); + spin_lock(&fch->lock); if (ring->queues[qid]) { - spin_unlock(&fc->lock); + spin_unlock(&fch->lock); kfree(queue->fpq.processing); kfree(queue); return ring->queues[qid]; @@ -310,7 +324,7 @@ static struct fuse_ring_queue *fuse_uring_create_queue(struct fuse_ring *ring, * write_once and lock as the caller mostly doesn't take the lock at all */ WRITE_ONCE(ring->queues[qid], queue); - spin_unlock(&fc->lock); + spin_unlock(&fch->lock); return queue; } @@ -466,6 +480,7 @@ static void fuse_uring_async_stop_queues(struct work_struct *work) FUSE_URING_TEARDOWN_INTERVAL); } else { wake_up_all(&ring->stop_waitq); + fuse_conn_put(ring->chan->conn); } } @@ -477,6 +492,7 @@ void fuse_uring_stop_queues(struct fuse_ring *ring) fuse_uring_teardown_all_queues(ring); if (atomic_read(&ring->queue_refs) > 0) { + fuse_conn_get(ring->chan->conn); ring->teardown_time = jiffies; INIT_DELAYED_WORK(&ring->async_teardown_work, fuse_uring_async_stop_queues); @@ -507,8 +523,7 @@ static void fuse_uring_cancel(struct io_uring_cmd *cmd, queue = ent->queue; spin_lock(&queue->lock); if (ent->state == FRRS_AVAILABLE) { - ent->state = FRRS_USERSPACE; - list_move_tail(&ent->list, &queue->ent_in_userspace); + list_del_init(&ent->list); need_cmd_done = true; ent->cmd = NULL; } @@ -517,6 +532,9 @@ static void fuse_uring_cancel(struct io_uring_cmd *cmd, if (need_cmd_done) { /* no queue lock to avoid lock order issues */ io_uring_cmd_done(cmd, -ENOTCONN, issue_flags); + kfree(ent); + if (atomic_dec_and_test(&queue->ring->queue_refs)) + wake_up_all(&queue->ring->stop_waitq); } } @@ -531,8 +549,7 @@ static void fuse_uring_prepare_cancel(struct io_uring_cmd *cmd, int issue_flags, * Checks for errors and stores it into the request */ static int fuse_uring_out_header_has_err(struct fuse_out_header *oh, - struct fuse_req *req, - struct fuse_conn *fc) + struct fuse_req *req) { int err; @@ -571,6 +588,82 @@ err: return err; } +static int ring_header_type_offset(enum fuse_uring_header_type type) +{ + switch (type) { + case FUSE_URING_HEADER_IN_OUT: + return 0; + case FUSE_URING_HEADER_OP: + return offsetof(struct fuse_uring_req_header, op_in); + case FUSE_URING_HEADER_RING_ENT: + return offsetof(struct fuse_uring_req_header, ring_ent_in_out); + default: + WARN_ONCE(1, "Invalid header type: %d\n", type); + return -EINVAL; + } +} + +static int copy_header_to_ring(struct fuse_ring_ent *ent, + enum fuse_uring_header_type type, + const void *header, size_t header_size) +{ + int offset = ring_header_type_offset(type); + void __user *ring; + + if (offset < 0) + return offset; + + ring = (void __user *)ent->headers + offset; + + if (copy_to_user(ring, header, header_size)) { + pr_info_ratelimited("Copying header to ring failed.\n"); + return -EFAULT; + } + + return 0; +} + +static int copy_header_from_ring(struct fuse_ring_ent *ent, + enum fuse_uring_header_type type, void *header, + size_t header_size) +{ + int offset = ring_header_type_offset(type); + const void __user *ring; + + if (offset < 0) + return offset; + + ring = (void __user *)ent->headers + offset; + + if (copy_from_user(header, ring, header_size)) { + pr_info_ratelimited("Copying header from ring failed.\n"); + return -EFAULT; + } + + return 0; +} + +static int setup_fuse_copy_state(struct fuse_copy_state *cs, + struct fuse_ring *ring, struct fuse_req *req, + struct fuse_ring_ent *ent, int dir, + struct iov_iter *iter) +{ + int err; + + err = import_ubuf(dir, ent->payload, ring->max_payload_sz, iter); + if (err) { + pr_info_ratelimited("fuse: Import of user buffer failed\n"); + return err; + } + + fuse_copy_init(cs, dir == ITER_DEST, iter); + + cs->is_uring = true; + cs->req = req; + + return 0; +} + static int fuse_uring_copy_from_ring(struct fuse_ring *ring, struct fuse_req *req, struct fuse_ring_ent *ent) @@ -581,20 +674,15 @@ static int fuse_uring_copy_from_ring(struct fuse_ring *ring, int err; struct fuse_uring_ent_in_out ring_in_out; - err = copy_from_user(&ring_in_out, &ent->headers->ring_ent_in_out, - sizeof(ring_in_out)); + err = copy_header_from_ring(ent, FUSE_URING_HEADER_RING_ENT, + &ring_in_out, sizeof(ring_in_out)); if (err) - return -EFAULT; + return err; - err = import_ubuf(ITER_SOURCE, ent->payload, ring->max_payload_sz, - &iter); + err = setup_fuse_copy_state(&cs, ring, req, ent, ITER_SOURCE, &iter); if (err) return err; - fuse_copy_init(&cs, false, &iter); - cs.is_uring = true; - cs.req = req; - err = fuse_copy_out_args(&cs, args, ring_in_out.payload_sz); fuse_copy_finish(&cs); return err; @@ -617,15 +705,9 @@ static int fuse_uring_args_to_ring(struct fuse_ring *ring, struct fuse_req *req, .commit_id = req->in.h.unique, }; - err = import_ubuf(ITER_DEST, ent->payload, ring->max_payload_sz, &iter); - if (err) { - pr_info_ratelimited("fuse: Import of user buffer failed\n"); + err = setup_fuse_copy_state(&cs, ring, req, ent, ITER_DEST, &iter); + if (err) return err; - } - - fuse_copy_init(&cs, true, &iter); - cs.is_uring = true; - cs.req = req; if (num_args > 0) { /* @@ -633,13 +715,11 @@ static int fuse_uring_args_to_ring(struct fuse_ring *ring, struct fuse_req *req, * Some op code have that as zero size. */ if (args->in_args[0].size > 0) { - err = copy_to_user(&ent->headers->op_in, in_args->value, - in_args->size); - if (err) { - pr_info_ratelimited( - "Copying the header failed.\n"); - return -EFAULT; - } + err = copy_header_to_ring(ent, FUSE_URING_HEADER_OP, + in_args->value, + in_args->size); + if (err) + return err; } in_args++; num_args--; @@ -655,9 +735,8 @@ static int fuse_uring_args_to_ring(struct fuse_ring *ring, struct fuse_req *req, } ent_in_out.payload_sz = cs.ring.copied_sz; - err = copy_to_user(&ent->headers->ring_ent_in_out, &ent_in_out, - sizeof(ent_in_out)); - return err ? -EFAULT : 0; + return copy_header_to_ring(ent, FUSE_URING_HEADER_RING_ENT, + &ent_in_out, sizeof(ent_in_out)); } static int fuse_uring_copy_to_ring(struct fuse_ring_ent *ent, @@ -686,14 +765,8 @@ static int fuse_uring_copy_to_ring(struct fuse_ring_ent *ent, } /* copy fuse_in_header */ - err = copy_to_user(&ent->headers->in_out, &req->in.h, - sizeof(req->in.h)); - if (err) { - err = -EFAULT; - return err; - } - - return 0; + return copy_header_to_ring(ent, FUSE_URING_HEADER_IN_OUT, &req->in.h, + sizeof(req->in.h)); } static int fuse_uring_prepare_send(struct fuse_ring_ent *ent, @@ -702,40 +775,36 @@ static int fuse_uring_prepare_send(struct fuse_ring_ent *ent, int err; err = fuse_uring_copy_to_ring(ent, req); - if (!err) + if (!err) { set_bit(FR_SENT, &req->flags); - else + trace_fuse_request_sent(req); + } else { + /* + * Copying the request failed. Remove the entry from the + * ent_w_req_queue list and terminate the request + */ + spin_lock(&ent->queue->lock); + list_del_init(&ent->list); + ent->state = FRRS_INVALID; + spin_unlock(&ent->queue->lock); + fuse_uring_req_end(ent, req, err); + } return err; } -/* - * Write data to the ring buffer and send the request to userspace, - * userspace will read it - * This is comparable with classical read(/dev/fuse) - */ -static int fuse_uring_send_next_to_ring(struct fuse_ring_ent *ent, - struct fuse_req *req, - unsigned int issue_flags) +/* Used to find the request on SQE commit */ +static void fuse_uring_add_to_pq(struct fuse_ring_ent *ent) { struct fuse_ring_queue *queue = ent->queue; - int err; - struct io_uring_cmd *cmd; - - err = fuse_uring_prepare_send(ent, req); - if (err) - return err; - - spin_lock(&queue->lock); - cmd = ent->cmd; - ent->cmd = NULL; - ent->state = FRRS_USERSPACE; - list_move_tail(&ent->list, &queue->ent_in_userspace); - spin_unlock(&queue->lock); + struct fuse_pqueue *fpq = &queue->fpq; + unsigned int hash; + struct fuse_req *req = ent->fuse_req; - io_uring_cmd_done(cmd, 0, issue_flags); - return 0; + req->ring_entry = ent; + hash = fuse_req_hash(req->in.h.unique); + list_move_tail(&req->list, &fpq->processing[hash]); } /* @@ -749,19 +818,6 @@ static void fuse_uring_ent_avail(struct fuse_ring_ent *ent, ent->state = FRRS_AVAILABLE; } -/* Used to find the request on SQE commit */ -static void fuse_uring_add_to_pq(struct fuse_ring_ent *ent, - struct fuse_req *req) -{ - struct fuse_ring_queue *queue = ent->queue; - struct fuse_pqueue *fpq = &queue->fpq; - unsigned int hash; - - req->ring_entry = ent; - hash = fuse_req_hash(req->in.h.unique); - list_move_tail(&req->list, &fpq->processing[hash]); -} - /* * Assign a fuse queue entry to the given entry */ @@ -779,10 +835,13 @@ static void fuse_uring_add_req_to_ring_ent(struct fuse_ring_ent *ent, } clear_bit(FR_PENDING, &req->flags); + + /* Until fuse_uring_add_to_pq() the req is not attached to any list */ + list_del_init(&req->list); + ent->fuse_req = req; ent->state = FRRS_FUSE_REQ; list_move_tail(&ent->list, &queue->ent_w_req_queue); - fuse_uring_add_to_pq(ent, req); } /* Fetch the next fuse request if available */ @@ -812,17 +871,13 @@ static void fuse_uring_commit(struct fuse_ring_ent *ent, struct fuse_req *req, unsigned int issue_flags) { struct fuse_ring *ring = ent->queue->ring; - struct fuse_conn *fc = ring->fc; - ssize_t err = 0; + ssize_t err = -EFAULT; - err = copy_from_user(&req->out.h, &ent->headers->in_out, - sizeof(req->out.h)); - if (err) { - req->out.h.error = -EFAULT; + if (copy_header_from_ring(ent, FUSE_URING_HEADER_IN_OUT, &req->out.h, + sizeof(req->out.h))) goto out; - } - err = fuse_uring_out_header_has_err(&req->out.h, req, fc); + err = fuse_uring_out_header_has_err(&req->out.h, req); if (err) { /* req->out.h.error already set */ goto out; @@ -834,11 +889,13 @@ out: } /* - * Get the next fuse req and send it + * Get the next fuse req. + * + * Returns true if the next fuse request has been assigned to the ent. + * Else, there is no next fuse request and this returns false. */ -static void fuse_uring_next_fuse_req(struct fuse_ring_ent *ent, - struct fuse_ring_queue *queue, - unsigned int issue_flags) +static bool fuse_uring_get_next_fuse_req(struct fuse_ring_ent *ent, + struct fuse_ring_queue *queue) { int err; struct fuse_req *req; @@ -850,10 +907,12 @@ retry: spin_unlock(&queue->lock); if (req) { - err = fuse_uring_send_next_to_ring(ent, req, issue_flags); + err = fuse_uring_prepare_send(ent, req); if (err) goto retry; } + + return req != NULL; } static int fuse_ring_ent_set_commit(struct fuse_ring_ent *ent) @@ -871,15 +930,30 @@ static int fuse_ring_ent_set_commit(struct fuse_ring_ent *ent) return 0; } +static void fuse_uring_send(struct fuse_ring_ent *ent, struct io_uring_cmd *cmd, + ssize_t ret, unsigned int issue_flags) +{ + struct fuse_ring_queue *queue = ent->queue; + + spin_lock(&queue->lock); + ent->state = FRRS_USERSPACE; + list_move_tail(&ent->list, &queue->ent_in_userspace); + ent->cmd = NULL; + fuse_uring_add_to_pq(ent); + spin_unlock(&queue->lock); + + io_uring_cmd_done(cmd, ret, issue_flags); +} + /* FUSE_URING_CMD_COMMIT_AND_FETCH handler */ static int fuse_uring_commit_fetch(struct io_uring_cmd *cmd, int issue_flags, - struct fuse_conn *fc) + struct fuse_chan *fch) { const struct fuse_uring_cmd_req *cmd_req = io_uring_sqe128_cmd(cmd->sqe, struct fuse_uring_cmd_req); struct fuse_ring_ent *ent; int err; - struct fuse_ring *ring = fc->ring; + struct fuse_ring *ring = fch->ring; struct fuse_ring_queue *queue; uint64_t commit_id = READ_ONCE(cmd_req->commit_id); unsigned int qid = READ_ONCE(cmd_req->qid); @@ -898,10 +972,15 @@ static int fuse_uring_commit_fetch(struct io_uring_cmd *cmd, int issue_flags, return err; fpq = &queue->fpq; - if (!READ_ONCE(fc->connected) || READ_ONCE(queue->stopped)) + if (!READ_ONCE(fch->connected)) return err; spin_lock(&queue->lock); + if (unlikely(queue->stopped)) { + spin_unlock(&queue->lock); + return err; + } + /* Find a request based on the unique ID of the fuse request * This should get revised, as it needs a hash calculation and list * search. And full struct fuse_pqueue is needed (memory overhead). @@ -924,9 +1003,7 @@ static int fuse_uring_commit_fetch(struct io_uring_cmd *cmd, int issue_flags, pr_info_ratelimited("qid=%d commit_id %llu state %d", queue->qid, commit_id, ent->state); spin_unlock(&queue->lock); - req->out.h.error = err; - clear_bit(FR_SENT, &req->flags); - fuse_request_end(req); + fuse_uring_req_end(ent, req, err); return err; } @@ -943,7 +1020,8 @@ static int fuse_uring_commit_fetch(struct io_uring_cmd *cmd, int issue_flags, * and fetching is done in one step vs legacy fuse, which has separated * read (fetch request) and write (commit result). */ - fuse_uring_next_fuse_req(ent, queue, issue_flags); + if (fuse_uring_get_next_fuse_req(ent, queue)) + fuse_uring_send(ent, cmd, 0, issue_flags); return 0; } @@ -975,14 +1053,25 @@ static bool is_ring_ready(struct fuse_ring *ring, int current_qid) /* * fuse_uring_req_fetch command handling */ -static void fuse_uring_do_register(struct fuse_ring_ent *ent, - struct io_uring_cmd *cmd, - unsigned int issue_flags) +static int fuse_uring_do_register(struct fuse_ring_ent *ent, + struct io_uring_cmd *cmd, + unsigned int issue_flags) { struct fuse_ring_queue *queue = ent->queue; struct fuse_ring *ring = queue->ring; - struct fuse_conn *fc = ring->fc; - struct fuse_iqueue *fiq = &fc->iq; + struct fuse_chan *fch = ring->chan; + struct fuse_iqueue *fiq = &fch->iq; + + spin_lock(&fch->lock); + /* abort teardown path is running or has run */ + if (!fch->connected) { + spin_unlock(&fch->lock); + if (atomic_dec_and_test(&ring->queue_refs)) + wake_up_all(&ring->stop_waitq); + kfree(ent); + return -ECONNABORTED; + } + spin_unlock(&fch->lock); fuse_uring_prepare_cancel(cmd, issue_flags, ent); @@ -991,20 +1080,21 @@ static void fuse_uring_do_register(struct fuse_ring_ent *ent, fuse_uring_ent_avail(ent, queue); spin_unlock(&queue->lock); - if (!ring->ready) { + if (!READ_ONCE(ring->ready)) { bool ready = is_ring_ready(ring, queue->qid); if (ready) { WRITE_ONCE(fiq->ops, &fuse_io_uring_ops); - WRITE_ONCE(ring->ready, true); - wake_up_all(&fc->blocked_waitq); + smp_store_release(&ring->ready, true); + wake_up_all(&fch->blocked_waitq); } } + return 0; } /* - * sqe->addr is a ptr to an iovec array, iov[0] has the headers, iov[1] - * the payload + * sqe->addr is a ptr to an iovec array, iov[FUSE_URING_IOV_HEADERS] has the + * headers, iov[FUSE_URING_IOV_PAYLOAD] the payload */ static int fuse_uring_get_iovec_from_sqe(const struct io_uring_sqe *sqe, struct iovec iov[FUSE_URING_IOV_SEGS]) @@ -1034,8 +1124,8 @@ fuse_uring_create_ring_ent(struct io_uring_cmd *cmd, { struct fuse_ring *ring = queue->ring; struct fuse_ring_ent *ent; - size_t payload_size; struct iovec iov[FUSE_URING_IOV_SEGS]; + struct iovec *headers, *payload; int err; err = fuse_uring_get_iovec_from_sqe(cmd->sqe, iov); @@ -1046,15 +1136,16 @@ fuse_uring_create_ring_ent(struct io_uring_cmd *cmd, } err = -EINVAL; - if (iov[0].iov_len < sizeof(struct fuse_uring_req_header)) { - pr_info_ratelimited("Invalid header len %zu\n", iov[0].iov_len); + headers = &iov[FUSE_URING_IOV_HEADERS]; + if (headers->iov_len < sizeof(struct fuse_uring_req_header)) { + pr_info_ratelimited("Invalid header len %zu\n", headers->iov_len); return ERR_PTR(err); } - payload_size = iov[1].iov_len; - if (payload_size < ring->max_payload_sz) { + payload = &iov[FUSE_URING_IOV_PAYLOAD]; + if (payload->iov_len < ring->max_payload_sz) { pr_info_ratelimited("Invalid req payload len %zu\n", - payload_size); + payload->iov_len); return ERR_PTR(err); } @@ -1066,8 +1157,8 @@ fuse_uring_create_ring_ent(struct io_uring_cmd *cmd, INIT_LIST_HEAD(&ent->list); ent->queue = queue; - ent->headers = iov[0].iov_base; - ent->payload = iov[1].iov_base; + ent->headers = headers->iov_base; + ent->payload = payload->iov_base; atomic_inc(&ring->queue_refs); return ent; @@ -1078,11 +1169,11 @@ fuse_uring_create_ring_ent(struct io_uring_cmd *cmd, * entry as "ready to get fuse requests" on the queue */ static int fuse_uring_register(struct io_uring_cmd *cmd, - unsigned int issue_flags, struct fuse_conn *fc) + unsigned int issue_flags, struct fuse_chan *fch) { const struct fuse_uring_cmd_req *cmd_req = io_uring_sqe128_cmd(cmd->sqe, struct fuse_uring_cmd_req); - struct fuse_ring *ring = smp_load_acquire(&fc->ring); + struct fuse_ring *ring = smp_load_acquire(&fch->ring); struct fuse_ring_queue *queue; struct fuse_ring_ent *ent; int err; @@ -1090,7 +1181,7 @@ static int fuse_uring_register(struct io_uring_cmd *cmd, err = -ENOMEM; if (!ring) { - ring = fuse_uring_create(fc); + ring = fuse_uring_create(fch); if (!ring) return err; } @@ -1116,9 +1207,7 @@ static int fuse_uring_register(struct io_uring_cmd *cmd, if (IS_ERR(ent)) return PTR_ERR(ent); - fuse_uring_do_register(ent, cmd, issue_flags); - - return 0; + return fuse_uring_do_register(ent, cmd, issue_flags); } /* @@ -1128,7 +1217,7 @@ static int fuse_uring_register(struct io_uring_cmd *cmd, int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) { struct fuse_dev *fud; - struct fuse_conn *fc; + struct fuse_chan *fch; u32 cmd_op = cmd->cmd_op; int err; @@ -1146,39 +1235,39 @@ int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) pr_info_ratelimited("No fuse device found\n"); return PTR_ERR(fud); } - fc = fud->fc; + fch = fud->chan; /* Once a connection has io-uring enabled on it, it can't be disabled */ - if (!enable_uring && !fc->io_uring) { + if (!enable_uring && !fch->io_uring) { pr_info_ratelimited("fuse-io-uring is disabled\n"); return -EOPNOTSUPP; } - if (fc->aborted) + if (fch->abort_with_err) return -ECONNABORTED; - if (!fc->connected) + if (!fch->connected) return -ENOTCONN; /* * fuse_uring_register() needs the ring to be initialized, * we need to know the max payload size */ - if (!fc->initialized) + if (!fch->initialized) return -EAGAIN; switch (cmd_op) { case FUSE_IO_URING_CMD_REGISTER: - err = fuse_uring_register(cmd, issue_flags, fc); + err = fuse_uring_register(cmd, issue_flags, fch); if (err) { pr_info_once("FUSE_IO_URING_CMD_REGISTER failed err=%d\n", err); - fc->io_uring = 0; - wake_up_all(&fc->blocked_waitq); + fch->io_uring = 0; + wake_up_all(&fch->blocked_waitq); return err; } break; case FUSE_IO_URING_CMD_COMMIT_AND_FETCH: - err = fuse_uring_commit_fetch(cmd, issue_flags, fc); + err = fuse_uring_commit_fetch(cmd, issue_flags, fch); if (err) { pr_info_once("FUSE_IO_URING_COMMIT_AND_FETCH failed err=%d\n", err); @@ -1192,20 +1281,6 @@ int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) return -EIOCBQUEUED; } -static void fuse_uring_send(struct fuse_ring_ent *ent, struct io_uring_cmd *cmd, - ssize_t ret, unsigned int issue_flags) -{ - struct fuse_ring_queue *queue = ent->queue; - - spin_lock(&queue->lock); - ent->state = FRRS_USERSPACE; - list_move_tail(&ent->list, &queue->ent_in_userspace); - ent->cmd = NULL; - spin_unlock(&queue->lock); - - io_uring_cmd_done(cmd, ret, issue_flags); -} - /* * This prepares and sends the ring request in fuse-uring task context. * User buffers are not mapped yet - the application does not have permission @@ -1222,14 +1297,25 @@ static void fuse_uring_send_in_task(struct io_tw_req tw_req, io_tw_token_t tw) if (!tw.cancel) { err = fuse_uring_prepare_send(ent, ent->fuse_req); if (err) { - fuse_uring_next_fuse_req(ent, queue, issue_flags); - return; + if (!fuse_uring_get_next_fuse_req(ent, queue)) + return; + err = 0; } + fuse_uring_send(ent, cmd, err, issue_flags); } else { err = -ECANCELED; - } - fuse_uring_send(ent, cmd, err, issue_flags); + spin_lock(&queue->lock); + list_del_init(&ent->list); + spin_unlock(&queue->lock); + + io_uring_cmd_done(cmd, err, issue_flags); + + fuse_uring_req_end(ent, ent->fuse_req, err); + kfree(ent); + if (atomic_dec_and_test(&queue->ring->queue_refs)) + wake_up_all(&queue->ring->stop_waitq); + } } static struct fuse_ring_queue *fuse_uring_task_to_queue(struct fuse_ring *ring) @@ -1261,8 +1347,7 @@ static void fuse_uring_dispatch_ent(struct fuse_ring_ent *ent) /* queue a fuse request and send it if a ring entry is available */ void fuse_uring_queue_fuse_req(struct fuse_iqueue *fiq, struct fuse_req *req) { - struct fuse_conn *fc = req->fm->fc; - struct fuse_ring *ring = fc->ring; + struct fuse_ring *ring = req->chan->ring; struct fuse_ring_queue *queue; struct fuse_ring_ent *ent = NULL; int err; @@ -1304,8 +1389,8 @@ err: bool fuse_uring_queue_bq_req(struct fuse_req *req) { - struct fuse_conn *fc = req->fm->fc; - struct fuse_ring *ring = fc->ring; + struct fuse_chan *fch = req->chan; + struct fuse_ring *ring = fch->ring; struct fuse_ring_queue *queue; struct fuse_ring_ent *ent = NULL; @@ -1325,12 +1410,12 @@ bool fuse_uring_queue_bq_req(struct fuse_req *req) ent = list_first_entry_or_null(&queue->ent_avail_queue, struct fuse_ring_ent, list); - spin_lock(&fc->bg_lock); - fc->num_background++; - if (fc->num_background == fc->max_background) - fc->blocked = 1; + spin_lock(&fch->bg_lock); + fch->num_background++; + if (fch->num_background == fch->max_background) + fch->blocked = 1; fuse_uring_flush_bg(queue); - spin_unlock(&fc->bg_lock); + spin_unlock(&fch->bg_lock); /* * Due to bg_queue flush limits there might be other bg requests diff --git a/fs/fuse/dev_uring_i.h b/fs/fuse/dev_uring_i.h index 51a563922ce1..55f8d04e4b0b 100644 --- a/fs/fuse/dev_uring_i.h +++ b/fs/fuse/dev_uring_i.h @@ -7,7 +7,7 @@ #ifndef _FS_FUSE_DEV_URING_I_H #define _FS_FUSE_DEV_URING_I_H -#include "fuse_i.h" +#include "fuse_dev_i.h" #ifdef CONFIG_FUSE_IO_URING @@ -101,13 +101,13 @@ struct fuse_ring_queue { bool stopped; }; -/** +/* * Describes if uring is for communication and holds alls the data needed * for uring communication */ struct fuse_ring { /* back pointer */ - struct fuse_conn *fc; + struct fuse_chan *chan; /* number of ring queues */ size_t nr_queues; @@ -135,63 +135,54 @@ struct fuse_ring { bool ready; }; -bool fuse_uring_enabled(void); -void fuse_uring_destruct(struct fuse_conn *fc); void fuse_uring_stop_queues(struct fuse_ring *ring); void fuse_uring_abort_end_requests(struct fuse_ring *ring); int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags); void fuse_uring_queue_fuse_req(struct fuse_iqueue *fiq, struct fuse_req *req); bool fuse_uring_queue_bq_req(struct fuse_req *req); bool fuse_uring_remove_pending_req(struct fuse_req *req); -bool fuse_uring_request_expired(struct fuse_conn *fc); +bool fuse_uring_request_expired(struct fuse_chan *fch); -static inline void fuse_uring_abort(struct fuse_conn *fc) +static inline void fuse_uring_abort(struct fuse_chan *fch) { - struct fuse_ring *ring = fc->ring; + struct fuse_ring *ring = fch->ring; if (ring == NULL) return; - if (atomic_read(&ring->queue_refs) > 0) { - fuse_uring_abort_end_requests(ring); + fuse_uring_abort_end_requests(ring); + + if (atomic_read(&ring->queue_refs) > 0) fuse_uring_stop_queues(ring); - } } -static inline void fuse_uring_wait_stopped_queues(struct fuse_conn *fc) +static inline void fuse_uring_wait_stopped_queues(struct fuse_chan *fch) { - struct fuse_ring *ring = fc->ring; + struct fuse_ring *ring = fch->ring; if (ring) wait_event(ring->stop_waitq, atomic_read(&ring->queue_refs) == 0); } -static inline bool fuse_uring_ready(struct fuse_conn *fc) +static inline bool fuse_uring_ready(struct fuse_chan *fch) { - return fc->ring && fc->ring->ready; -} + struct fuse_ring *ring = READ_ONCE(fch->ring); -#else /* CONFIG_FUSE_IO_URING */ - -static inline void fuse_uring_destruct(struct fuse_conn *fc) -{ + return ring && smp_load_acquire(&ring->ready); } -static inline bool fuse_uring_enabled(void) -{ - return false; -} +#else /* CONFIG_FUSE_IO_URING */ -static inline void fuse_uring_abort(struct fuse_conn *fc) +static inline void fuse_uring_abort(struct fuse_chan *fch) { } -static inline void fuse_uring_wait_stopped_queues(struct fuse_conn *fc) +static inline void fuse_uring_wait_stopped_queues(struct fuse_chan *fch) { } -static inline bool fuse_uring_ready(struct fuse_conn *fc) +static inline bool fuse_uring_ready(struct fuse_chan *fch) { return false; } @@ -201,7 +192,7 @@ static inline bool fuse_uring_remove_pending_req(struct fuse_req *req) return false; } -static inline bool fuse_uring_request_expired(struct fuse_conn *fc) +static inline bool fuse_uring_request_expired(struct fuse_chan *fch) { return false; } diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index d8e8ea7280bc..0e2a1039fa43 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1,11 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 /* FUSE: Filesystem in Userspace Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu> - - This program can be distributed under the terms of the GNU GPL. - See the file COPYING. */ +#include "dev.h" #include "fuse_i.h" #include <linux/pagemap.h> @@ -317,7 +316,7 @@ void fuse_invalidate_attr(struct inode *inode) static void fuse_dir_changed(struct inode *dir) { - fuse_invalidate_attr(dir); + fuse_invalidate_attr_mask(dir, FUSE_STATX_MODDIR); inode_maybe_inc_iversion(dir, false); } @@ -430,7 +429,7 @@ static int fuse_dentry_revalidate(struct inode *dir, const struct qstr *name, fi = get_fuse_inode(inode); if (outarg.nodeid != get_node_id(inode) || (bool) IS_AUTOMOUNT(inode) != (bool) (outarg.attr.flags & FUSE_ATTR_SUBMOUNT)) { - fuse_queue_forget(fm->fc, forget, + fuse_chan_queue_forget(fm->fc->chan, forget, outarg.nodeid, 1); goto invalid; } @@ -593,7 +592,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name attr_version, evict_ctr); err = -ENOMEM; if (!*inode) { - fuse_queue_forget(fm->fc, forget, outarg->nodeid, 1); + fuse_chan_queue_forget(fm->fc->chan, forget, outarg->nodeid, 1); goto out; } err = 0; @@ -837,7 +836,6 @@ static int fuse_create_open(struct mnt_idmap *idmap, struct inode *dir, if (!forget) goto out_err; - err = -ENOMEM; ff = fuse_file_alloc(fm, true); if (!ff) goto out_put_forget_req; @@ -894,7 +892,7 @@ static int fuse_create_open(struct mnt_idmap *idmap, struct inode *dir, if (!inode) { flags &= ~(O_CREAT | O_EXCL | O_TRUNC); fuse_sync_release(NULL, ff, flags); - fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1); + fuse_chan_queue_forget(fm->fc->chan, forget, outentry.nodeid, 1); err = -ENOMEM; goto out_err; } @@ -1019,7 +1017,7 @@ static struct dentry *create_new_entry(struct mnt_idmap *idmap, struct fuse_moun inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, &outarg.attr, ATTR_TIMEOUT(&outarg), 0, 0); if (!inode) { - fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1); + fuse_chan_queue_forget(fm->fc->chan, forget, outarg.nodeid, 1); return ERR_PTR(-ENOMEM); } kfree(forget); @@ -1587,40 +1585,34 @@ int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid, { int err = -ENOTDIR; struct inode *parent; - struct dentry *dir = NULL; - struct dentry *entry = NULL; + struct dentry *dir; + struct dentry *entry; parent = fuse_ilookup(fc, parent_nodeid, NULL); if (!parent) return -ENOENT; + inode_lock_nested(parent, I_MUTEX_PARENT); if (!S_ISDIR(parent->i_mode)) - goto put_parent; + goto unlock; err = -ENOENT; dir = d_find_alias(parent); if (!dir) - goto put_parent; - while (!entry) { - struct dentry *child = try_lookup_noperm(name, dir); - if (!child || IS_ERR(child)) - goto put_parent; - entry = start_removing_dentry(dir, child); - dput(child); - if (IS_ERR(entry)) - goto put_parent; - if (!d_same_name(entry, dir, name)) { - end_removing(entry); - entry = NULL; - } - } + goto unlock; + + name->hash = full_name_hash(dir, name->name, name->len); + entry = d_lookup(dir, name); + dput(dir); + if (!entry) + goto unlock; fuse_dir_changed(parent); if (!(flags & FUSE_EXPIRE_ONLY)) d_invalidate(entry); fuse_invalidate_entry_cache(entry); - if (child_nodeid != 0) { + if (child_nodeid != 0 && d_really_is_positive(entry)) { inode_lock(d_inode(entry)); if (get_node_id(d_inode(entry)) != child_nodeid) { err = -ENOENT; @@ -1648,10 +1640,10 @@ int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid, } else { err = 0; } + dput(entry); - end_removing(entry); - put_parent: - dput(dir); + unlock: + inode_unlock(parent); iput(parent); return err; } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index f94f3dc082c6..e052a0d44dee 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1,12 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 /* FUSE: Filesystem in Userspace Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu> - - This program can be distributed under the terms of the GNU GPL. - See the file COPYING. */ #include "fuse_i.h" +#include "dev.h" #include <linux/pagemap.h> #include <linux/slab.h> @@ -91,8 +90,7 @@ static struct fuse_file *fuse_file_get(struct fuse_file *ff) return ff; } -static void fuse_release_end(struct fuse_mount *fm, struct fuse_args *args, - int error) +static void fuse_release_end(struct fuse_args *args, int error) { struct fuse_release_args *ra = container_of(args, typeof(*ra), args); @@ -112,10 +110,10 @@ static void fuse_file_put(struct fuse_file *ff, bool sync) if (!args) { /* Do nothing when server does not implement 'opendir' */ } else if (args->opcode == FUSE_RELEASE && ff->fm->fc->no_open) { - fuse_release_end(ff->fm, args, 0); + fuse_release_end(args, 0); } else if (sync) { fuse_simple_request(ff->fm, args); - fuse_release_end(ff->fm, args, 0); + fuse_release_end(args, 0); } else { /* * DAX inodes may need to issue a number of synchronous @@ -126,7 +124,7 @@ static void fuse_file_put(struct fuse_file *ff, bool sync) args->end = fuse_release_end; if (fuse_simple_background(ff->fm, args, GFP_KERNEL | __GFP_NOFAIL)) - fuse_release_end(ff->fm, args, -ENOTCONN); + fuse_release_end(args, -ENOTCONN); } kfree(ff); } @@ -380,8 +378,14 @@ void fuse_file_release(struct inode *inode, struct fuse_file *ff, * aio and closes the fd before the aio completes. Since aio takes its * own ref to the file, the IO completion has to drop the ref, which is * how the fuse server can end up closing its clients' files. + * + * Exception is virtio-fs, which is not affected by the above (server is + * on host, cannot close open files in guest). Virtio-fs needs sync + * release, because the num_waiting mechanism to wait for all requests + * before commencing with fs shutdown doesn't work if submounts are + * used. */ - fuse_file_put(ff, false); + fuse_file_put(ff, ff->fm->fc->auto_submounts); } void fuse_release_common(struct file *file, bool isdir) @@ -635,6 +639,19 @@ static ssize_t fuse_get_res_by_io(struct fuse_io_priv *io) return io->bytes < 0 ? io->size : io->bytes; } +static void fuse_aio_invalidate_worker(struct work_struct *work) +{ + struct fuse_io_priv *io = container_of(work, struct fuse_io_priv, work); + struct address_space *mapping = io->iocb->ki_filp->f_mapping; + ssize_t res = fuse_get_res_by_io(io); + pgoff_t start = io->offset >> PAGE_SHIFT; + pgoff_t end = (io->offset + res - 1) >> PAGE_SHIFT; + + invalidate_inode_pages2_range(mapping, start, end); + io->iocb->ki_complete(io->iocb, res); + kref_put(&io->refcnt, fuse_io_release); +} + /* * In case of short read, the caller sets 'pos' to the position of * actual end of fuse request in IO request. Otherwise, if bytes_requested @@ -667,10 +684,11 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos) spin_unlock(&io->lock); if (!left && !io->blocking) { + struct inode *inode = file_inode(io->iocb->ki_filp); + struct address_space *mapping = io->iocb->ki_filp->f_mapping; ssize_t res = fuse_get_res_by_io(io); if (res >= 0) { - struct inode *inode = file_inode(io->iocb->ki_filp); struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); @@ -679,6 +697,17 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos) spin_unlock(&fi->lock); } + if (io->write && res > 0 && mapping->nrpages) { + /* + * As in generic_file_direct_write(), invalidate after the + * write, to invalidate read-ahead cache that may have competed + * with the write. + */ + INIT_WORK(&io->work, fuse_aio_invalidate_worker); + queue_work(inode->i_sb->s_dio_done_wq, &io->work); + return; + } + io->iocb->ki_complete(io->iocb, res); } @@ -709,8 +738,7 @@ static void fuse_io_free(struct fuse_io_args *ia) kfree(ia); } -static void fuse_aio_complete_req(struct fuse_mount *fm, struct fuse_args *args, - int err) +static void fuse_aio_complete_req(struct fuse_args *args, int err) { struct fuse_io_args *ia = container_of(args, typeof(*ia), ap.args); struct fuse_io_priv *io = ia->io; @@ -758,7 +786,7 @@ static ssize_t fuse_async_req_send(struct fuse_mount *fm, ia->ap.args.may_block = io->should_dirty; err = fuse_simple_background(fm, &ia->ap.args, GFP_KERNEL); if (err) - fuse_aio_complete_req(fm, &ia->ap.args, err); + fuse_aio_complete_req(&ia->ap.args, err); return num_bytes; } @@ -902,7 +930,7 @@ static int fuse_handle_readahead(struct folio *folio, ia = NULL; } if (!ia) { - if (fc->num_background >= fc->congestion_threshold && + if (fuse_chan_num_background(fc->chan) >= fc->congestion_threshold && rac->ra->async_size >= readahead_count(rac)) /* * Congested and only async pages left, so skip the @@ -1001,8 +1029,7 @@ static int fuse_iomap_read_folio_range(const struct iomap_iter *iter, return fuse_do_readfolio(file, folio, off, len); } -static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args, - int err) +static void fuse_readpages_end(struct fuse_args *args, int err) { int i; struct fuse_io_args *ia = container_of(args, typeof(*ia), ap.args); @@ -1068,7 +1095,7 @@ static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file, res = fuse_simple_request(fm, &ap->args); err = res < 0 ? res : 0; } - fuse_readpages_end(fm, &ap->args, err); + fuse_readpages_end(&ap->args, err); } static void fuse_readahead(struct readahead_control *rac) @@ -1586,7 +1613,7 @@ static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii, * manually extract pages using iov_iter_extract_pages() and then * copy that to a folios array. */ - struct page **pages = kzalloc(max_pages * sizeof(struct page *), + struct page **pages = kcalloc(max_pages, sizeof(struct page *), GFP_KERNEL); if (!pages) { ret = -ENOMEM; @@ -1743,15 +1770,6 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, if (res > 0) *ppos = pos; - if (res > 0 && write && fopen_direct_io) { - /* - * As in generic_file_direct_write(), invalidate after the - * write, to invalidate read-ahead cache that may have competed - * with the write. - */ - invalidate_inode_pages2_range(mapping, idx_from, idx_to); - } - return res > 0 ? res : err; } EXPORT_SYMBOL_GPL(fuse_direct_io); @@ -1790,6 +1808,8 @@ static ssize_t fuse_direct_read_iter(struct kiocb *iocb, struct iov_iter *to) static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct inode *inode = file_inode(iocb->ki_filp); + struct address_space *mapping = inode->i_mapping; + loff_t pos = iocb->ki_pos; ssize_t res; bool exclusive; @@ -1806,6 +1826,16 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from) FUSE_DIO_WRITE); fuse_write_update_attr(inode, iocb->ki_pos, res); } + if (res > 0 && mapping->nrpages) { + /* + * As in generic_file_direct_write(), invalidate after + * write, to invalidate read-ahead cache that may have + * with the write. + */ + invalidate_inode_pages2_range(mapping, + pos >> PAGE_SHIFT, + (pos + res - 1) >> PAGE_SHIFT); + } } fuse_dio_unlock(iocb, exclusive); @@ -1985,8 +2015,7 @@ __acquires(fi->lock) } } -static void fuse_writepage_end(struct fuse_mount *fm, struct fuse_args *args, - int error) +static void fuse_writepage_end(struct fuse_args *args, int error) { struct fuse_writepage_args *wpa = container_of(args, typeof(*wpa), ia.ap.args); @@ -2297,7 +2326,7 @@ static int fuse_writepages(struct address_space *mapping, return -EIO; if (wbc->sync_mode == WB_SYNC_NONE && - fc->num_background >= fc->congestion_threshold) + fuse_chan_num_background(fc->chan) >= fc->congestion_threshold) return 0; return iomap_writepages(&wpc); @@ -2571,8 +2600,9 @@ static int fuse_file_flock(struct file *file, int cmd, struct file_lock *fl) struct fuse_file *ff = file->private_data; /* emulate flock with POSIX locks */ - ff->flock = true; err = fuse_setlk(file, fl, 1); + if (!err) + ff->flock = true; } return err; @@ -2683,125 +2713,6 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int whence) return retval; } -/* - * All files which have been polled are linked to RB tree - * fuse_conn->polled_files which is indexed by kh. Walk the tree and - * find the matching one. - */ -static struct rb_node **fuse_find_polled_node(struct fuse_conn *fc, u64 kh, - struct rb_node **parent_out) -{ - struct rb_node **link = &fc->polled_files.rb_node; - struct rb_node *last = NULL; - - while (*link) { - struct fuse_file *ff; - - last = *link; - ff = rb_entry(last, struct fuse_file, polled_node); - - if (kh < ff->kh) - link = &last->rb_left; - else if (kh > ff->kh) - link = &last->rb_right; - else - return link; - } - - if (parent_out) - *parent_out = last; - return link; -} - -/* - * The file is about to be polled. Make sure it's on the polled_files - * RB tree. Note that files once added to the polled_files tree are - * not removed before the file is released. This is because a file - * polled once is likely to be polled again. - */ -static void fuse_register_polled_file(struct fuse_conn *fc, - struct fuse_file *ff) -{ - spin_lock(&fc->lock); - if (RB_EMPTY_NODE(&ff->polled_node)) { - struct rb_node **link, *parent; - - link = fuse_find_polled_node(fc, ff->kh, &parent); - BUG_ON(*link); - rb_link_node(&ff->polled_node, parent, link); - rb_insert_color(&ff->polled_node, &fc->polled_files); - } - spin_unlock(&fc->lock); -} - -__poll_t fuse_file_poll(struct file *file, poll_table *wait) -{ - struct fuse_file *ff = file->private_data; - struct fuse_mount *fm = ff->fm; - struct fuse_poll_in inarg = { .fh = ff->fh, .kh = ff->kh }; - struct fuse_poll_out outarg; - FUSE_ARGS(args); - int err; - - if (fm->fc->no_poll) - return DEFAULT_POLLMASK; - - poll_wait(file, &ff->poll_wait, wait); - inarg.events = mangle_poll(poll_requested_events(wait)); - - /* - * Ask for notification iff there's someone waiting for it. - * The client may ignore the flag and always notify. - */ - if (waitqueue_active(&ff->poll_wait)) { - inarg.flags |= FUSE_POLL_SCHEDULE_NOTIFY; - fuse_register_polled_file(fm->fc, ff); - } - - args.opcode = FUSE_POLL; - args.nodeid = ff->nodeid; - args.in_numargs = 1; - args.in_args[0].size = sizeof(inarg); - args.in_args[0].value = &inarg; - args.out_numargs = 1; - args.out_args[0].size = sizeof(outarg); - args.out_args[0].value = &outarg; - err = fuse_simple_request(fm, &args); - - if (!err) - return demangle_poll(outarg.revents); - if (err == -ENOSYS) { - fm->fc->no_poll = 1; - return DEFAULT_POLLMASK; - } - return EPOLLERR; -} -EXPORT_SYMBOL_GPL(fuse_file_poll); - -/* - * This is called from fuse_handle_notify() on FUSE_NOTIFY_POLL and - * wakes up the poll waiters. - */ -int fuse_notify_poll_wakeup(struct fuse_conn *fc, - struct fuse_notify_poll_wakeup_out *outarg) -{ - u64 kh = outarg->kh; - struct rb_node **link; - - spin_lock(&fc->lock); - - link = fuse_find_polled_node(fc, kh, NULL); - if (*link) { - struct fuse_file *ff; - - ff = rb_entry(*link, struct fuse_file, polled_node); - wake_up_interruptible_sync(&ff->poll_wait); - } - - spin_unlock(&fc->lock); - return 0; -} - static void fuse_do_truncate(struct file *file) { struct inode *inode = file->f_mapping->host; @@ -2834,6 +2745,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter) size_t count = iov_iter_count(iter), shortened = 0; loff_t offset = iocb->ki_pos; struct fuse_io_priv *io; + bool async = ff->fm->fc->async_dio; pos = offset; inode = file->f_mapping->host; @@ -2842,6 +2754,12 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter) if ((iov_iter_rw(iter) == READ) && (offset >= i_size)) return 0; + if ((iov_iter_rw(iter) == WRITE) && async && !inode->i_sb->s_dio_done_wq) { + ret = sb_init_dio_done_wq(inode->i_sb); + if (ret < 0) + return ret; + } + io = kmalloc_obj(struct fuse_io_priv); if (!io) return -ENOMEM; @@ -2857,7 +2775,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter) * By default, we want to optimize all I/Os with async request * submission to the client filesystem if supported. */ - io->async = ff->fm->fc->async_dio; + io->async = async; io->iocb = iocb; io->blocking = is_sync_kiocb(iocb); diff --git a/fs/fuse/fuse_dev_i.h b/fs/fuse/fuse_dev_i.h index 910f883cd090..668c8391d61c 100644 --- a/fs/fuse/fuse_dev_i.h +++ b/fs/fuse/fuse_dev_i.h @@ -6,20 +6,311 @@ #ifndef _FS_FUSE_DEV_I_H #define _FS_FUSE_DEV_I_H +#include <linux/fuse.h> #include <linux/types.h> +#include <linux/refcount.h> +#include <linux/wait.h> +#include <linux/workqueue.h> +#include <linux/fs.h> /* Ordinary requests have even IDs, while interrupts IDs are odd */ #define FUSE_INT_REQ_BIT (1ULL << 0) #define FUSE_REQ_ID_STEP (1ULL << 1) -extern struct wait_queue_head fuse_dev_waitq; - struct fuse_arg; struct fuse_args; struct fuse_pqueue; -struct fuse_req; struct fuse_iqueue; -struct fuse_forget_link; + +/** + * enum fuse_req_flag - Request flags + * + * @FR_ISREPLY: set if the request has reply + * @FR_FORCE: force sending of the request even if interrupted + * @FR_BACKGROUND: request is sent in the background + * @FR_WAITING: request is counted as "waiting" + * @FR_ABORTED: the request was aborted + * @FR_INTERRUPTED: the request has been interrupted + * @FR_LOCKED: data is being copied to/from the request + * @FR_PENDING: request is not yet in userspace + * @FR_SENT: request is in userspace, waiting for an answer + * @FR_FINISHED: request is finished + * @FR_PRIVATE: request is on private list + * @FR_ASYNC: request is asynchronous + * @FR_URING: request is handled through fuse-io-uring + */ +enum fuse_req_flag { + FR_ISREPLY, + FR_FORCE, + FR_BACKGROUND, + FR_WAITING, + FR_ABORTED, + FR_INTERRUPTED, + FR_LOCKED, + FR_PENDING, + FR_SENT, + FR_FINISHED, + FR_PRIVATE, + FR_ASYNC, + FR_URING, +}; + +/** + * struct fuse_req - A request to the client + * + * .waitq.lock protects the following fields: + * - FR_ABORTED + * - FR_LOCKED (may also be modified under fpq->lock, tested under both) + */ +struct fuse_req { + /** + * @list: This can be on either pending processing or io lists in + * fuse_conn + */ + struct list_head list; + + /** @intr_entry: Entry on the interrupts list */ + struct list_head intr_entry; + + /** @args: Input/output arguments */ + struct fuse_args *args; + + /** @count: refcount */ + refcount_t count; + + /** @flags: Request flags, updated with test/set/clear_bit() */ + unsigned long flags; + + /** @in: The request input header */ + struct { + /** @in.h: The request input header */ + struct fuse_in_header h; + } in; + + /** @out: The request output header */ + struct { + /** @out.h: The request output header */ + struct fuse_out_header h; + } out; + + /** @waitq: Used to wake up the task waiting for completion of request */ + wait_queue_head_t waitq; + +#if IS_ENABLED(CONFIG_VIRTIO_FS) + /** + * @argbuf: virtio-fs's physically contiguous buffer for in and out + * args + */ + void *argbuf; +#endif + + /** @chan: fuse_chan this request belongs to */ + struct fuse_chan *chan; + +#ifdef CONFIG_FUSE_IO_URING + void *ring_entry; + void *ring_queue; +#endif + /** @create_time: When (in jiffies) the request was created */ + unsigned long create_time; +}; + +/* One forget request */ +struct fuse_forget_link { + struct fuse_forget_one forget_one; + struct fuse_forget_link *next; +}; + +/** + * struct fuse_iqueue_ops - Input queue callbacks + * + * Input queue signalling is device-specific. For example, the /dev/fuse file + * uses fiq->waitq and fasync to wake processes that are waiting on queue + * readiness. These callbacks allow other device types to respond to input + * queue activity. + */ +struct fuse_iqueue_ops { + /** + * @send_forget: Send one forget + */ + void (*send_forget)(struct fuse_iqueue *fiq, struct fuse_forget_link *link); + + /** + * @send_interrupt: Send interrupt for request + */ + void (*send_interrupt)(struct fuse_iqueue *fiq, struct fuse_req *req); + + /** + * @send_req: Send one request + */ + void (*send_req)(struct fuse_iqueue *fiq, struct fuse_req *req); + + /** + * @release: Clean up when fuse_iqueue is destroyed + */ + void (*release)(struct fuse_iqueue *fiq); +}; + +struct fuse_iqueue { + /** Connection established */ + unsigned connected; + + /** Lock protecting accesses to members of this structure */ + spinlock_t lock; + + /** Readers of the connection are waiting on this */ + wait_queue_head_t waitq; + + /** The next unique request id */ + u64 reqctr; + + /** The list of pending requests */ + struct list_head pending; + + /** Pending interrupts */ + struct list_head interrupts; + + /** Queue of pending forgets */ + struct fuse_forget_link forget_list_head; + struct fuse_forget_link *forget_list_tail; + + /** Batching of FORGET requests (positive indicates FORGET batch) */ + int forget_batch; + + /** O_ASYNC requests */ + struct fasync_struct *fasync; + + /** Device-specific callbacks */ + const struct fuse_iqueue_ops *ops; + + /** Device-specific state */ + void *priv; +}; + +struct fuse_chan { + /** Lock protecting: + - devices + - connected + - ring + - ring->queues[qid] + */ + spinlock_t lock; + + /* back pointer: fc->chan->conn == fc */ + struct fuse_conn *conn; + + /** Input queue */ + struct fuse_iqueue iq; + + /** List of device instances belonging to this connection */ + struct list_head devices; + + /** Maximum number of outstanding background requests */ + unsigned max_background; + + /** Number of requests currently in the background */ + unsigned num_background; + + /** Number of background requests currently queued for userspace */ + unsigned active_background; + + /** The list of background requests set aside for later queuing */ + struct list_head bg_queue; + + /** Protects: max_background, num_background, active_background, bg_queue, blocked */ + spinlock_t bg_lock; + + /** Flag indicating that INIT reply has been received. Allocating + * any fuse request will be suspended until the flag is set */ + int initialized; + + /** Flag indicating if connection is blocked. This will be + the case before the INIT reply is received, and if there + are too many outstading backgrounds requests */ + int blocked; + + /** waitq for blocked connection */ + wait_queue_head_t blocked_waitq; + + /** Connection established, cleared on umount, connection + abort and device release */ + unsigned connected; + + /** The number of requests waiting for completion */ + atomic_t num_waiting; + + /** Is interrupt not implemented by fs? */ + bool no_interrupt; + + /* Use io_uring for communication */ + unsigned int io_uring; + + /* Negotiated minor version */ + unsigned int minor; + + /* Maximum write size */ + unsigned int max_write; + + /* Maximum number of pages that can be used in a single request */ + unsigned int max_pages; + + /* Before being installed into fud, contains the preallocated pq array*/ + struct list_head *pq_prealloc; + + /** Connection aborted via sysfs, respond with ECONNABORTED on device I/O */ + bool abort_with_err; + +#ifdef CONFIG_FUSE_IO_URING + /** uring connection information*/ + struct fuse_ring *ring; +#endif + + /** Only used if the connection opts into request timeouts */ + struct { + /* Worker for checking if any requests have timed out */ + struct delayed_work work; + + /* Request timeout (in jiffies). 0 = no timeout */ + unsigned int req_timeout; + } timeout; +}; + +#define FUSE_PQ_HASH_BITS 8 +#define FUSE_PQ_HASH_SIZE (1 << FUSE_PQ_HASH_BITS) + +struct fuse_pqueue { + /** Connection established */ + unsigned connected; + + /** Lock protecting accessess to members of this structure */ + spinlock_t lock; + + /** Hash table of requests being processed */ + struct list_head *processing; + + /** The list of requests under I/O */ + struct list_head io; +}; + +/** + * struct fuse_dev - Fuse device instance + */ +struct fuse_dev { + /** @ref: Reference count of this object */ + refcount_t ref; + + /** @sync_init: Issue FUSE_INIT synchronously */ + bool sync_init; + + /** @chan: Fuse channel for this device */ + struct fuse_chan *chan; + + /** @pq: Processing queue */ + struct fuse_pqueue pq; + + /** @entry: list entry on fch->devices */ + struct list_head entry; +}; struct fuse_copy_state { struct fuse_req *req; @@ -39,21 +330,21 @@ struct fuse_copy_state { } ring; }; -/* fud->fc gets assigned to this value when /dev/fuse is closed */ -#define FUSE_DEV_FC_DISCONNECTED ((struct fuse_conn *) 1) +/* fud->chan gets assigned to this value when /dev/fuse is closed */ +#define FUSE_DEV_CHAN_DISCONNECTED ((struct fuse_chan *) 1) /* - * Lockless access is OK, because fud->fc is set once during mount and is valid + * Lockless access is OK, because fud->chan is set once during mount and is valid * until the file is released. * - * fud->fc is set to FUSE_DEV_FC_DISCONNECTED only after the containing file is + * fud->chan is set to FUSE_DEV_CHAN_DISCONNECTED only after the containing file is * released, so result is safe to dereference in most cases. Exceptions are: * fuse_dev_put() and fuse_fill_super_common(). */ -static inline struct fuse_conn *fuse_dev_fc_get(struct fuse_dev *fud) +static inline struct fuse_chan *fuse_dev_chan_get(struct fuse_dev *fud) { /* Pairs with xchg() in fuse_dev_install() */ - return smp_load_acquire(&fud->fc); + return smp_load_acquire(&fud->chan); } static inline struct fuse_dev *fuse_file_to_fud(struct file *file) @@ -65,22 +356,29 @@ static inline struct fuse_dev *__fuse_get_dev(struct file *file) { struct fuse_dev *fud = fuse_file_to_fud(file); - if (!fuse_dev_fc_get(fud)) + if (!fuse_dev_chan_get(fud)) return NULL; return fud; } +void fuse_iqueue_init(struct fuse_iqueue *fiq, const struct fuse_iqueue_ops *ops, void *priv); + struct fuse_dev *fuse_get_dev(struct file *file); unsigned int fuse_req_hash(u64 unique); struct fuse_req *fuse_request_find(struct fuse_pqueue *fpq, u64 unique); void fuse_dev_end_requests(struct list_head *head); +void fuse_request_bg_finish(struct fuse_chan *fch, struct fuse_req *req); void fuse_copy_init(struct fuse_copy_state *cs, bool write, struct iov_iter *iter); -void fuse_copy_finish(struct fuse_copy_state *cs); +/* + * Return the number of bytes in an arguments list + */ +unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args); + int fuse_copy_args(struct fuse_copy_state *cs, unsigned int numargs, unsigned int argpages, struct fuse_arg *args, int zeroing); @@ -91,7 +389,34 @@ void fuse_dev_queue_forget(struct fuse_iqueue *fiq, void fuse_dev_queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req); bool fuse_remove_pending_req(struct fuse_req *req, spinlock_t *lock); -bool fuse_request_expired(struct fuse_conn *fc, struct list_head *list); +bool fuse_request_expired(struct fuse_chan *fch, struct list_head *list); + +/* + * Assign a unique id to a fuse request + */ +void fuse_request_assign_unique(struct fuse_iqueue *fiq, struct fuse_req *req); + +/* + * Get the next unique ID for a request + */ +u64 fuse_get_unique(struct fuse_iqueue *fiq); + +struct fuse_dev *fuse_dev_alloc_install(struct fuse_chan *fch); +struct fuse_dev *fuse_dev_alloc(void); + +int fuse_dev_release(struct inode *inode, struct file *file); + +struct list_head *fuse_pqueue_alloc(void); + +/* + * Initialize the fuse processing queue + */ +void fuse_pqueue_init(struct fuse_pqueue *fpq); + +/* + * End a finished request + */ +void fuse_request_end(struct fuse_req *req); #endif diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 17423d4e3cfa..85f738c53122 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -1,9 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* FUSE: Filesystem in Userspace Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu> - - This program can be distributed under the terms of the GNU GPL. - See the file COPYING. */ #ifndef _FS_FUSE_I_H @@ -13,6 +11,7 @@ # define pr_fmt(fmt) "fuse: " fmt #endif +#include "args.h" #include <linux/fuse.h> #include <linux/fs.h> #include <linux/mount.h> @@ -48,12 +47,6 @@ /** Number of dentries for each connection in the control filesystem */ #define FUSE_CTL_NUM_DENTRIES 5 -/* Frequency (in seconds) of request timeout checks, if opted into */ -#define FUSE_TIMEOUT_TIMER_FREQ 15 - -/** Frequency (in jiffies) of request timeout checks, if opted into */ -extern const unsigned long fuse_timeout_timer_freq; - /* * Dentries invalidation workqueue period, in seconds. The value of this * parameter shall be >= FUSE_DENTRY_INVAL_FREQ_MIN seconds, or 0 (zero), in @@ -63,16 +56,6 @@ extern unsigned inval_wq __read_mostly; /** Maximum of max_pages received in init_out */ extern unsigned int fuse_max_pages_limit; -/* - * Default timeout (in seconds) for the server to reply to a request - * before the connection is aborted, if no timeout was specified on mount. - */ -extern unsigned int fuse_default_req_timeout; -/* - * Max timeout (in seconds) for the server to reply to a request before - * the connection is aborted. - */ -extern unsigned int fuse_max_req_timeout; /** List of active connections */ extern struct list_head fuse_conn_list; @@ -84,143 +67,173 @@ extern struct mutex fuse_mutex; extern unsigned int max_user_bgreq; extern unsigned int max_user_congthresh; -/* One forget request */ -struct fuse_forget_link { - struct fuse_forget_one forget_one; - struct fuse_forget_link *next; -}; +struct fuse_forget_link; -/* Submount lookup tracking */ +/** + * struct fuse_submount_lookup - Submount lookup tracking + */ struct fuse_submount_lookup { - /** Refcount */ + /** @count: Refcount */ refcount_t count; - /** Unique ID, which identifies the inode between userspace - * and kernel */ + /** + * @nodeid: Unique ID, which identifies the inode between userspace + * and kernel + */ u64 nodeid; - /** The request used for sending the FORGET message */ + /** @forget: The request used for sending the FORGET message */ struct fuse_forget_link *forget; }; -/** Container for data related to mapping to backing file */ +/* Container for data related to mapping to backing file */ struct fuse_backing { struct file *file; - struct cred *cred; + const struct cred *cred; - /** refcount */ + /* refcount */ refcount_t count; struct rcu_head rcu; }; -/** FUSE inode */ +/** + * struct fuse_inode - FUSE inode + */ struct fuse_inode { - /** Inode data */ + /** @inode: Inode data */ struct inode inode; - /** Unique ID, which identifies the inode between userspace - * and kernel */ + /** + * @nodeid: Unique ID, which identifies the inode between userspace + * and kernel + */ u64 nodeid; - /** Number of lookups on this inode */ + /** @nlookup: Number of lookups on this inode */ u64 nlookup; - /** The request used for sending the FORGET message */ + /** @forget: The request used for sending the FORGET message */ struct fuse_forget_link *forget; - /** Time in jiffies until the file attributes are valid */ + /** @i_time: Time in jiffies until the file attributes are valid */ u64 i_time; - /* Which attributes are invalid */ + /** @inval_mask: Which attributes are invalid */ u32 inval_mask; - /** The sticky bit in inode->i_mode may have been removed, so - preserve the original mode */ + /** + * @orig_i_mode: The sticky bit in inode->i_mode may have been removed, + * so preserve the original mode + */ umode_t orig_i_mode; - /* Cache birthtime */ + /** @i_btime: Cache birthtime */ struct timespec64 i_btime; - /** 64 bit inode number */ + /** @orig_ino: 64-bit inode number */ u64 orig_ino; - /** Version of last attribute change */ + /** @attr_version: Version of last attribute change */ u64 attr_version; union { /* read/write io cache (regular file only) */ struct { - /* Files usable in writepage. Protected by fi->lock */ + /** + * @write_files: Files usable in writepage. + * Protected by fi->lock + */ struct list_head write_files; - /* Writepages pending on truncate or fsync */ + /** + * @queued_writes: Writepages pending on truncate or + * fsync + */ struct list_head queued_writes; - /* Number of sent writes, a negative bias - * (FUSE_NOWRITE) means more writes are blocked */ + /** + * @writectr: Number of sent writes, a negative bias + * (FUSE_NOWRITE) means more writes are blocked + */ int writectr; - /** Number of files/maps using page cache */ + /** @iocachectr: Number of files/maps using page cache */ int iocachectr; - /* Waitq for writepage completion */ + /** @page_waitq: Waitq for writepage completion */ wait_queue_head_t page_waitq; - /* waitq for direct-io completion */ + /** @direct_io_waitq: waitq for direct-io completion */ wait_queue_head_t direct_io_waitq; }; - /* readdir cache (directory only) */ + /** @rdc: readdir cache (directory only) */ struct { - /* true if fully cached */ + /** @cached: true if fully cached */ bool cached; - /* size of cache */ + /** @size: size of cache */ loff_t size; - /* position at end of cache (position of next entry) */ + /** + * @pos: position at end of cache (position of next + * entry) + */ loff_t pos; - /* version of the cache */ + /** @version: version of the cache */ u64 version; - /* modification time of directory when cache was - * started */ + /** + * @mtime: modification time of directory when cache was + * started + */ struct timespec64 mtime; - /* iversion of directory when cache was started */ + /** + * @epoch: epoch of fc when cache was started + */ + int epoch; + + /** + * @iversion: iversion of directory when cache was + * started + */ u64 iversion; - /* protects above fields */ + /** @lock: protects above fields */ spinlock_t lock; } rdc; }; - /** Miscellaneous bits describing inode state */ + /** @state: Miscellaneous bits describing inode state */ unsigned long state; - /** Lock for serializing lookup and readdir for back compatibility*/ + /** + * @mutex: Lock for serializing lookup and readdir for back + * compatibility + */ struct mutex mutex; - /** Lock to protect write related fields */ + /** @lock: Lock to protect write-related fields */ spinlock_t lock; #ifdef CONFIG_FUSE_DAX - /* - * Dax specific inode data + /** + * @dax: Dax specific inode data */ struct fuse_inode_dax *dax; #endif - /** Submount specific lookup tracking */ + /** @submount_lookup: Submount specific lookup tracking */ struct fuse_submount_lookup *submount_lookup; #ifdef CONFIG_FUSE_PASSTHROUGH - /** Reference to backing file in passthrough mode */ + /** @fb: Reference to backing file in passthrough mode */ struct fuse_backing *fb; #endif - /* - * The underlying inode->i_blkbits value will not be modified, - * so preserve the blocksize specified by the server. + /** + * @cached_i_blkbits: The underlying inode->i_blkbits value will not + * be modified, so preserve the blocksize specified by the server. */ u8 cached_i_blkbits; }; @@ -250,116 +263,67 @@ struct fuse_conn; struct fuse_mount; union fuse_file_args; -/** FUSE specific file data */ +/** + * struct fuse_file - FUSE-specific file data + */ struct fuse_file { - /** Fuse connection for this file */ + /** @fm: Fuse connection for this file */ struct fuse_mount *fm; - /* Argument space reserved for open/release */ + /** @args: Argument space reserved for open/release */ union fuse_file_args *args; - /** Kernel file handle guaranteed to be unique */ + /** @kh: Kernel file handle guaranteed to be unique */ u64 kh; - /** File handle used by userspace */ + /** @fh: File handle used by userspace */ u64 fh; - /** Node id of this file */ + /** @nodeid: Node id of this file */ u64 nodeid; - /** Refcount */ + /** @count: Refcount */ refcount_t count; - /** FOPEN_* flags returned by open */ + /** @open_flags: FOPEN_* flags returned by open */ u32 open_flags; - /** Entry on inode's write_files list */ + /** @write_entry: Entry on inode's write_files list */ struct list_head write_entry; - /* Readdir related */ + /** @readdir: Readdir-related */ struct { - /* Dir stream position */ + /** @pos: Dir stream position */ loff_t pos; - /* Offset in cache */ + /** @cache_off: Offset in cache */ loff_t cache_off; - /* Version of cache we are reading */ + /** @version: Version of cache we are reading */ u64 version; } readdir; - /** RB node to be linked on fuse_conn->polled_files */ + /** @polled_node: RB node to be linked on fuse_conn->polled_files */ struct rb_node polled_node; - /** Wait queue head for poll */ + /** @poll_wait: Wait queue head for poll */ wait_queue_head_t poll_wait; - /** Does file hold a fi->iocachectr refcount? */ + /** @iomode: Does file hold a fi->iocachectr refcount? */ enum { IOM_NONE, IOM_CACHED, IOM_UNCACHED } iomode; #ifdef CONFIG_FUSE_PASSTHROUGH - /** Reference to backing file in passthrough mode */ + /** @passthrough: Reference to backing file in passthrough mode */ struct file *passthrough; + /** @cred: passthrough file credentials */ const struct cred *cred; #endif - /** Has flock been performed on this file? */ + /** @flock: Has flock been performed on this file? */ bool flock:1; }; -/** One input argument of a request */ -struct fuse_in_arg { - unsigned size; - const void *value; -}; - -/** One output argument of a request */ -struct fuse_arg { - unsigned size; - void *value; -}; - -/** FUSE folio descriptor */ -struct fuse_folio_desc { - unsigned int length; - unsigned int offset; -}; - -struct fuse_args { - uint64_t nodeid; - uint32_t opcode; - uint8_t in_numargs; - uint8_t out_numargs; - uint8_t ext_idx; - bool force:1; - bool noreply:1; - bool nocreds:1; - bool in_pages:1; - bool out_pages:1; - bool user_pages:1; - bool out_argvar:1; - bool page_zeroing:1; - bool page_replace:1; - bool may_block:1; - bool is_ext:1; - bool is_pinned:1; - bool invalidate_vmap:1; - bool abort_on_kill:1; - struct fuse_in_arg in_args[4]; - struct fuse_arg out_args[2]; - void (*end)(struct fuse_mount *fm, struct fuse_args *args, int error); - /* Used for kvec iter backed by vmalloc address */ - void *vmap_base; -}; - -struct fuse_args_pages { - struct fuse_args args; - struct folio **folios; - struct fuse_folio_desc *descs; - unsigned int num_folios; -}; - struct fuse_release_args { struct fuse_args args; struct fuse_release_in inarg; @@ -378,6 +342,7 @@ union fuse_file_args { /** The request IO state (for asynchronous processing) */ struct fuse_io_priv { struct kref refcnt; + struct work_struct work; int async; spinlock_t lock; unsigned reqs; @@ -399,200 +364,6 @@ struct fuse_io_priv { .iocb = i, \ } -/** - * Request flags - * - * FR_ISREPLY: set if the request has reply - * FR_FORCE: force sending of the request even if interrupted - * FR_BACKGROUND: request is sent in the background - * FR_WAITING: request is counted as "waiting" - * FR_ABORTED: the request was aborted - * FR_INTERRUPTED: the request has been interrupted - * FR_LOCKED: data is being copied to/from the request - * FR_PENDING: request is not yet in userspace - * FR_SENT: request is in userspace, waiting for an answer - * FR_FINISHED: request is finished - * FR_PRIVATE: request is on private list - * FR_ASYNC: request is asynchronous - * FR_URING: request is handled through fuse-io-uring - */ -enum fuse_req_flag { - FR_ISREPLY, - FR_FORCE, - FR_BACKGROUND, - FR_WAITING, - FR_ABORTED, - FR_INTERRUPTED, - FR_LOCKED, - FR_PENDING, - FR_SENT, - FR_FINISHED, - FR_PRIVATE, - FR_ASYNC, - FR_URING, -}; - -/** - * A request to the client - * - * .waitq.lock protects the following fields: - * - FR_ABORTED - * - FR_LOCKED (may also be modified under fc->lock, tested under both) - */ -struct fuse_req { - /** This can be on either pending processing or io lists in - fuse_conn */ - struct list_head list; - - /** Entry on the interrupts list */ - struct list_head intr_entry; - - /* Input/output arguments */ - struct fuse_args *args; - - /** refcount */ - refcount_t count; - - /* Request flags, updated with test/set/clear_bit() */ - unsigned long flags; - - /* The request input header */ - struct { - struct fuse_in_header h; - } in; - - /* The request output header */ - struct { - struct fuse_out_header h; - } out; - - /** Used to wake up the task waiting for completion of request*/ - wait_queue_head_t waitq; - -#if IS_ENABLED(CONFIG_VIRTIO_FS) - /** virtio-fs's physically contiguous buffer for in and out args */ - void *argbuf; -#endif - - /** fuse_mount this request belongs to */ - struct fuse_mount *fm; - -#ifdef CONFIG_FUSE_IO_URING - void *ring_entry; - void *ring_queue; -#endif - /** When (in jiffies) the request was created */ - unsigned long create_time; -}; - -struct fuse_iqueue; - -/** - * Input queue callbacks - * - * Input queue signalling is device-specific. For example, the /dev/fuse file - * uses fiq->waitq and fasync to wake processes that are waiting on queue - * readiness. These callbacks allow other device types to respond to input - * queue activity. - */ -struct fuse_iqueue_ops { - /** - * Send one forget - */ - void (*send_forget)(struct fuse_iqueue *fiq, struct fuse_forget_link *link); - - /** - * Send interrupt for request - */ - void (*send_interrupt)(struct fuse_iqueue *fiq, struct fuse_req *req); - - /** - * Send one request - */ - void (*send_req)(struct fuse_iqueue *fiq, struct fuse_req *req); - - /** - * Clean up when fuse_iqueue is destroyed - */ - void (*release)(struct fuse_iqueue *fiq); -}; - -/** /dev/fuse input queue operations */ -extern const struct fuse_iqueue_ops fuse_dev_fiq_ops; - -struct fuse_iqueue { - /** Connection established */ - unsigned connected; - - /** Lock protecting accesses to members of this structure */ - spinlock_t lock; - - /** Readers of the connection are waiting on this */ - wait_queue_head_t waitq; - - /** The next unique request id */ - u64 reqctr; - - /** The list of pending requests */ - struct list_head pending; - - /** Pending interrupts */ - struct list_head interrupts; - - /** Queue of pending forgets */ - struct fuse_forget_link forget_list_head; - struct fuse_forget_link *forget_list_tail; - - /** Batching of FORGET requests (positive indicates FORGET batch) */ - int forget_batch; - - /** O_ASYNC requests */ - struct fasync_struct *fasync; - - /** Device-specific callbacks */ - const struct fuse_iqueue_ops *ops; - - /** Device-specific state */ - void *priv; -}; - -#define FUSE_PQ_HASH_BITS 8 -#define FUSE_PQ_HASH_SIZE (1 << FUSE_PQ_HASH_BITS) - -struct fuse_pqueue { - /** Connection established */ - unsigned connected; - - /** Lock protecting accessess to members of this structure */ - spinlock_t lock; - - /** Hash table of requests being processed */ - struct list_head *processing; - - /** The list of requests under I/O */ - struct list_head io; -}; - -/** - * Fuse device instance - */ -struct fuse_dev { - /** Reference count of this object */ - refcount_t ref; - - /** Issue FUSE_INIT synchronously */ - bool sync_init; - - /** Fuse connection for this device */ - struct fuse_conn *fc; - - /** Processing queue */ - struct fuse_pqueue pq; - - /** list entry on fc->devices */ - struct list_head entry; -}; - enum fuse_dax_mode { FUSE_DAX_INODE_DEFAULT, /* default */ FUSE_DAX_ALWAYS, /* "-o dax=always" */ @@ -637,133 +408,135 @@ struct fuse_sync_bucket { }; /** - * A Fuse connection. + * struct fuse_conn - A Fuse connection. * * This structure is created, when the root filesystem is mounted, and * is destroyed, when the client device is closed and the last * fuse_mount is destroyed. */ struct fuse_conn { - /** Lock protecting accessess to members of this structure */ + /** + * @lock: Lock protecting: + * - polled_files + * - backing_files_map + * - curr_bucket + */ spinlock_t lock; - /** Refcount */ + /** @count: Refcount */ refcount_t count; - /** Current epoch for up-to-date dentries */ + /** @epoch: Current epoch for up-to-date dentries */ atomic_t epoch; + /** @epoch_work: Used to invalidate dentries from old epochs */ struct work_struct epoch_work; + /** @rcu: Used to delay freeing fuse_conn, making it safe */ struct rcu_head rcu; - /** The user id for this mount */ + /** @user_id: The user id for this mount */ kuid_t user_id; - /** The group id for this mount */ + /** @group_id: The group id for this mount */ kgid_t group_id; - /** The pid namespace for this mount */ + /** @pid_ns: The pid namespace for this mount */ struct pid_namespace *pid_ns; - /** The user namespace for this mount */ + /** @user_ns: The user namespace for this mount */ struct user_namespace *user_ns; - /** Maximum read size */ + /** @max_read: Maximum read size */ unsigned max_read; - /** Maximum write size */ + /** @max_write: Maximum write size */ unsigned max_write; - /** Maximum number of pages that can be used in a single request */ + /** + * @max_pages: Maximum number of pages that can be used in a + * single request + */ unsigned int max_pages; - /** Constrain ->max_pages to this value during feature negotiation */ + /** + * @max_pages_limit: Constrain ->max_pages to this value during + * feature negotiation + */ unsigned int max_pages_limit; - /** Input queue */ - struct fuse_iqueue iq; + /** @chan: transport layer object */ + struct fuse_chan *chan; - /** The next unique kernel file handle */ + /** @khctr: The next unique kernel file handle */ atomic64_t khctr; - /** rbtree of fuse_files waiting for poll events indexed by ph */ + /** + * @polled_files: rbtree of fuse_files waiting for poll events + * indexed by ph + */ struct rb_root polled_files; - /** Maximum number of outstanding background requests */ - unsigned max_background; - - /** Number of background requests at which congestion starts */ + /** + * @congestion_threshold: Number of background requests at which + * congestion starts + */ unsigned congestion_threshold; - /** Number of requests currently in the background */ - unsigned num_background; - - /** Number of background requests currently queued for userspace */ - unsigned active_background; - - /** The list of background requests set aside for later queuing */ - struct list_head bg_queue; - - /** Protects: max_background, congestion_threshold, num_background, - * active_background, bg_queue, blocked */ - spinlock_t bg_lock; - - /** Flag indicating that INIT reply has been received. Allocating - * any fuse request will be suspended until the flag is set */ - int initialized; - - /** Flag indicating if connection is blocked. This will be - the case before the INIT reply is received, and if there - are too many outstading backgrounds requests */ - int blocked; - - /** waitq for blocked connection */ - wait_queue_head_t blocked_waitq; - - /** Connection established, cleared on umount, connection - abort and device release */ - unsigned connected; - - /** Connection aborted via sysfs */ - bool aborted; - - /** Connection failed (version mismatch). Cannot race with - setting other bitfields since it is only set once in INIT - reply, before any other request, and never cleared */ + /** + * @conn_error: Connection failed (version mismatch). Cannot race with + * setting other bitfields since it is only set once in INIT + * reply, before any other request, and never cleared + */ unsigned conn_error:1; - /** Connection successful. Only set in INIT */ + /** @conn_init: Connection successful. Only set in INIT */ unsigned conn_init:1; - /** Do readahead asynchronously? Only set in INIT */ + /** @async_read: Do readahead asynchronously? Only set in INIT */ unsigned async_read:1; - /** Return an unique read error after abort. Only set in INIT */ + /** + * @abort_err: Return an unique read error after abort. + * Only set in INIT + */ unsigned abort_err:1; - /** Do not send separate SETATTR request before open(O_TRUNC) */ + /** + * @atomic_o_trunc: Do not send separate SETATTR request before + * open(O_TRUNC) + */ unsigned atomic_o_trunc:1; - /** Filesystem supports NFS exporting. Only set in INIT */ + /** + * @export_support: Filesystem supports NFS exporting. + * Only set in INIT + */ unsigned export_support:1; - /** write-back cache policy (default is write-through) */ + /** @writeback_cache: write-back cache policy (default is write-through) */ unsigned writeback_cache:1; - /** allow parallel lookups and readdir (default is serialized) */ + /** + * @parallel_dirops: allow parallel lookups and readdir (default is + * serialized) + */ unsigned parallel_dirops:1; - /** handle fs handles killing suid/sgid/cap on write/chown/trunc */ + /** + * @handle_killpriv: handle fs handles killing suid/sgid/cap on + * write/chown/trunc + */ unsigned handle_killpriv:1; - /** cache READLINK responses in page cache */ + /** @cache_symlinks: cache READLINK responses in page cache */ unsigned cache_symlinks:1; - /* show legacy mount options */ + /** @legacy_opts_show: show legacy mount options */ unsigned int legacy_opts_show:1; - /* + /** + * @handle_killpriv_v2: * fs kills suid/sgid/cap on write/chown/trunc. suid is killed on * write/trunc only if caller did not have CAP_FSETID. sgid is killed * on write/truncate only if caller did not have CAP_FSETID as well as @@ -776,224 +549,219 @@ struct fuse_conn { * and hence races in setting them will not cause malfunction */ - /** Is open/release not implemented by fs? */ + /** @no_open: Is open/release not implemented by fs? */ unsigned no_open:1; - /** Is opendir/releasedir not implemented by fs? */ + /** @no_opendir: Is opendir/releasedir not implemented by fs? */ unsigned no_opendir:1; - /** Is fsync not implemented by fs? */ + /** @no_fsync: Is fsync not implemented by fs? */ unsigned no_fsync:1; - /** Is fsyncdir not implemented by fs? */ + /** @no_fsyncdir: Is fsyncdir not implemented by fs? */ unsigned no_fsyncdir:1; - /** Is flush not implemented by fs? */ + /** @no_flush: Is flush not implemented by fs? */ unsigned no_flush:1; - /** Is setxattr not implemented by fs? */ + /** @no_setxattr: Is setxattr not implemented by fs? */ unsigned no_setxattr:1; - /** Does file server support extended setxattr */ + /** @setxattr_ext: Does file server support extended setxattr */ unsigned setxattr_ext:1; - /** Is getxattr not implemented by fs? */ + /** @no_getxattr: Is getxattr not implemented by fs? */ unsigned no_getxattr:1; - /** Is listxattr not implemented by fs? */ + /** @no_listxattr: Is listxattr not implemented by fs? */ unsigned no_listxattr:1; - /** Is removexattr not implemented by fs? */ + /** @no_removexattr: Is removexattr not implemented by fs? */ unsigned no_removexattr:1; - /** Are posix file locking primitives not implemented by fs? */ + /** @no_lock: Are posix file locking primitives not implemented by fs? */ unsigned no_lock:1; - /** Is access not implemented by fs? */ + /** @no_access: Is access not implemented by fs? */ unsigned no_access:1; - /** Is create not implemented by fs? */ + /** @no_create: Is create not implemented by fs? */ unsigned no_create:1; - /** Is interrupt not implemented by fs? */ - unsigned no_interrupt:1; - - /** Is bmap not implemented by fs? */ + /** @no_bmap: Is bmap not implemented by fs? */ unsigned no_bmap:1; - /** Is poll not implemented by fs? */ + /** @no_poll: Is poll not implemented by fs? */ unsigned no_poll:1; - /** Do multi-page cached writes */ + /** @big_writes: Do multi-page cached writes */ unsigned big_writes:1; - /** Don't apply umask to creation modes */ + /** @dont_mask: Don't apply umask to creation modes */ unsigned dont_mask:1; - /** Are BSD file locking primitives not implemented by fs? */ + /** @no_flock: Are BSD file locking primitives not implemented by fs? */ unsigned no_flock:1; - /** Is fallocate not implemented by fs? */ + /** @no_fallocate: Is fallocate not implemented by fs? */ unsigned no_fallocate:1; - /** Is rename with flags implemented by fs? */ + /** @no_rename2: Is rename with flags implemented by fs? */ unsigned no_rename2:1; - /** Use enhanced/automatic page cache invalidation. */ + /** @auto_inval_data: Use enhanced/automatic page cache invalidation. */ unsigned auto_inval_data:1; - /** Filesystem is fully responsible for page cache invalidation. */ + /** + * @explicit_inval_data: Filesystem is fully responsible for page cache + * invalidation. + */ unsigned explicit_inval_data:1; - /** Does the filesystem support readdirplus? */ + /** @do_readdirplus: Does the filesystem support readdirplus? */ unsigned do_readdirplus:1; - /** Does the filesystem want adaptive readdirplus? */ + /** @readdirplus_auto: Does the filesystem want adaptive readdirplus? */ unsigned readdirplus_auto:1; - /** Does the filesystem support asynchronous direct-IO submission? */ + /** + * @async_dio: Does the filesystem support asynchronous direct-IO + * submission? + */ unsigned async_dio:1; - /** Is lseek not implemented by fs? */ + /** @no_lseek: Is lseek not implemented by fs? */ unsigned no_lseek:1; - /** Does the filesystem support posix acls? */ + /** @posix_acl: Does the filesystem support posix acls? */ unsigned posix_acl:1; - /** Check permissions based on the file mode or not? */ + /** + * @default_permissions: Check permissions based on the file mode + * or not? + */ unsigned default_permissions:1; - /** Allow other than the mounter user to access the filesystem ? */ + /** + * @allow_other: Allow other than the mounter user to access the + * filesystem ? + */ unsigned allow_other:1; - /** Does the filesystem support copy_file_range? */ + /** @no_copy_file_range: Does the filesystem support copy_file_range? */ unsigned no_copy_file_range:1; - /** Does the filesystem support copy_file_range_64? */ + /** + * @no_copy_file_range_64: Does the filesystem support + * copy_file_range_64? + */ unsigned no_copy_file_range_64:1; - /* Send DESTROY request */ + /** @destroy: Send DESTROY request */ unsigned int destroy:1; - /* Delete dentries that have gone stale */ + /** @delete_stale: Delete dentries that have gone stale */ unsigned int delete_stale:1; - /** Do not create entry in fusectl fs */ + /** @no_control: Do not create entry in fusectl fs */ unsigned int no_control:1; - /** Do not allow MNT_FORCE umount */ + /** @no_force_umount: Do not allow MNT_FORCE umount */ unsigned int no_force_umount:1; - /* Auto-mount submounts announced by the server */ + /** @auto_submounts: Auto-mount submounts announced by the server */ unsigned int auto_submounts:1; - /* Propagate syncfs() to server */ + /** @sync_fs: Propagate syncfs() to server */ unsigned int sync_fs:1; - /* Initialize security xattrs when creating a new inode */ + /** @init_security: Initialize security xattrs when creating a new inode */ unsigned int init_security:1; - /* Add supplementary group info when creating a new inode */ + /** + * @create_supp_group: Add supplementary group info when creating + * a new inode + */ unsigned int create_supp_group:1; - /* Does the filesystem support per inode DAX? */ + /** @inode_dax: Does the filesystem support per inode DAX? */ unsigned int inode_dax:1; - /* Is tmpfile not implemented by fs? */ + /** @no_tmpfile: Is tmpfile not implemented by fs? */ unsigned int no_tmpfile:1; - /* Relax restrictions to allow shared mmap in FOPEN_DIRECT_IO mode */ + /** + * @direct_io_allow_mmap: Relax restrictions to allow shared mmap + * in FOPEN_DIRECT_IO mode + */ unsigned int direct_io_allow_mmap:1; - /* Is statx not implemented by fs? */ + /** @no_statx: Is statx not implemented by fs? */ unsigned int no_statx:1; - /** Passthrough support for read/write IO */ + /** @passthrough: Passthrough support for read/write IO */ unsigned int passthrough:1; - /* Use pages instead of pointer for kernel I/O */ + /** @use_pages_for_kvec_io: Use pages instead of pointer for kernel I/O */ unsigned int use_pages_for_kvec_io:1; - /* Is link not implemented by fs? */ + /** @no_link: Is link not implemented by fs? */ unsigned int no_link:1; - /* Is synchronous FUSE_INIT allowed? */ + /** @sync_init: Is synchronous FUSE_INIT allowed? */ unsigned int sync_init:1; - /* Use io_uring for communication */ - unsigned int io_uring; - - /** Maximum stack depth for passthrough backing files */ + /** @max_stack_depth: Maximum stack depth for passthrough backing files */ int max_stack_depth; - /** The number of requests waiting for completion */ - atomic_t num_waiting; - - /** Negotiated minor version */ + /** @minor: Negotiated minor version */ unsigned minor; - /** Entry on the fuse_conn_list */ + /** @entry: Entry on the fuse_conn_list */ struct list_head entry; - /** Device ID from the root super block */ + /** @dev: Device ID from the root super block */ dev_t dev; - /** Key for lock owner ID scrambling */ + /** @scramble_key: Key for lock owner ID scrambling */ u32 scramble_key[4]; - /** Version counter for attribute changes */ + /** @attr_version: Version counter for attribute changes */ atomic64_t attr_version; - /** Version counter for evict inode */ + /** @evict_ctr: Version counter for evict inode */ atomic64_t evict_ctr; - /* maximum file name length */ + /** @name_max: maximum file name length */ u32 name_max; - /** Called on final put */ + /** @release: Called on final put */ void (*release)(struct fuse_conn *); /** - * Read/write semaphore to hold when accessing the sb of any + * @killsb: Read/write semaphore to hold when accessing the sb of any * fuse_mount belonging to this connection */ struct rw_semaphore killsb; - /** List of device instances belonging to this connection */ - struct list_head devices; - #ifdef CONFIG_FUSE_DAX - /* Dax mode */ + /** @dax_mode: Dax mode */ enum fuse_dax_mode dax_mode; - /* Dax specific conn data, non-NULL if DAX is enabled */ + /** @dax: Dax specific conn data, non-NULL if DAX is enabled */ struct fuse_conn_dax *dax; #endif - /** List of filesystems using this connection */ + /** @mounts: List of filesystems using this connection */ struct list_head mounts; - /* New writepages go into this bucket */ + /** @curr_bucket: New writepages go into this bucket */ struct fuse_sync_bucket __rcu *curr_bucket; #ifdef CONFIG_FUSE_PASSTHROUGH - /** IDR for backing files ids */ + /** @backing_files_map: IDR for backing files ids */ struct idr backing_files_map; #endif - -#ifdef CONFIG_FUSE_IO_URING - /** uring connection information*/ - struct fuse_ring *ring; -#endif - - /** Only used if the connection opts into request timeouts */ - struct { - /* Worker for checking if any requests have timed out */ - struct delayed_work work; - - /* Request timeout (in jiffies). 0 = no timeout */ - unsigned int req_timeout; - } timeout; }; /* @@ -1136,7 +904,7 @@ extern const struct file_operations fuse_dev_operations; extern const struct dentry_operations fuse_dentry_operations; -/** +/* * Get a filled in inode */ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, @@ -1147,14 +915,6 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name, struct fuse_entry_out *outarg, struct inode **inode); -/** - * Send FORGET command - */ -void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, - u64 nodeid, u64 nlookup); - -struct fuse_forget_link *fuse_alloc_forget(void); - /* * Initialize READ or READDIR request */ @@ -1186,44 +946,44 @@ int fuse_finish_open(struct inode *inode, struct file *file); void fuse_sync_release(struct fuse_inode *fi, struct fuse_file *ff, unsigned int flags); -/** +/* * Send RELEASE or RELEASEDIR request */ void fuse_release_common(struct file *file, bool isdir); -/** +/* * Send FSYNC or FSYNCDIR request */ int fuse_fsync_common(struct file *file, loff_t start, loff_t end, int datasync, int opcode); -/** +/* * Notify poll wakeup */ int fuse_notify_poll_wakeup(struct fuse_conn *fc, struct fuse_notify_poll_wakeup_out *outarg); -/** +/* * Initialize file operations on a regular file */ void fuse_init_file_inode(struct inode *inode, unsigned int flags); -/** +/* * Initialize inode operations on regular files and special files */ void fuse_init_common(struct inode *inode); -/** +/* * Initialize inode and file operations on a directory */ void fuse_init_dir(struct inode *inode); -/** +/* * Initialize inode operations on a symlink */ void fuse_init_symlink(struct inode *inode); -/** +/* * Change attributes of an inode */ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, @@ -1237,20 +997,10 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, u32 fuse_get_cache_mask(struct inode *inode); -/** - * Initialize the client device - */ -int fuse_dev_init(void); - -/** - * Cleanup the client device - */ -void fuse_dev_cleanup(void); - int fuse_ctl_init(void); void __exit fuse_ctl_cleanup(void); -/** +/* * Simple request sending that does request allocation and freeing */ ssize_t __fuse_simple_request(struct mnt_idmap *idmap, @@ -1271,30 +1021,14 @@ static inline ssize_t fuse_simple_idmap_request(struct mnt_idmap *idmap, int fuse_simple_background(struct fuse_mount *fm, struct fuse_args *args, gfp_t gfp_flags); - -/** - * Assign a unique id to a fuse request - */ -void fuse_request_assign_unique(struct fuse_iqueue *fiq, struct fuse_req *req); - -/** - * End a finished request - */ -void fuse_request_end(struct fuse_req *req); - -/* Abort all requests */ -void fuse_abort_conn(struct fuse_conn *fc); -void fuse_wait_aborted(struct fuse_conn *fc); - -/* Check if any requests timed out */ -void fuse_check_timeout(struct work_struct *work); +int fuse_simple_notify_reply(struct fuse_mount *fm, struct fuse_args *args, u64 unique); void fuse_dentry_tree_init(void); void fuse_dentry_tree_cleanup(void); void fuse_epoch_work(struct work_struct *work); -/** +/* * Invalidate inode attributes */ @@ -1304,6 +1038,9 @@ void fuse_epoch_work(struct work_struct *work); /* Attributes possibly changed on data and/or size modification */ #define FUSE_STATX_MODSIZE (FUSE_STATX_MODIFY | STATX_SIZE) +/* Attributes possibly changed on directory modification */ +#define FUSE_STATX_MODDIR (FUSE_STATX_MODSIZE | STATX_NLINK) + void fuse_invalidate_attr(struct inode *inode); void fuse_invalidate_attr_mask(struct inode *inode, u32 mask); @@ -1317,45 +1054,26 @@ u64 fuse_time_to_jiffies(u64 sec, u32 nsec); void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o); -/** - * Acquire reference to fuse_conn - */ -struct fuse_conn *fuse_conn_get(struct fuse_conn *fc); - -/** - * Initialize the fuse processing queue - */ -void fuse_pqueue_init(struct fuse_pqueue *fpq); - -/** +/* * Initialize fuse_conn */ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm, - struct user_namespace *user_ns, - const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv); + struct user_namespace *user_ns, struct fuse_chan *fch); -/** - * Release reference to fuse_conn - */ -void fuse_conn_put(struct fuse_conn *fc); - -struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc); -struct fuse_dev *fuse_dev_alloc(void); -void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc); -void fuse_dev_put(struct fuse_dev *fud); int fuse_send_init(struct fuse_mount *fm); /** - * Fill in superblock and initialize fuse connection + * fuse_fill_super_common - Fill in superblock and initialize fuse connection * @sb: partially-initialized superblock to fill in * @ctx: mount context */ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx); -/* - * Remove the mount from the connection +/** + * fuse_mount_remove - Remove the mount from the connection + * @fm: fuse_mount to remove * - * Returns whether this was the last mount + * Returns: whether this was the last mount */ bool fuse_mount_remove(struct fuse_mount *fm); @@ -1373,23 +1091,25 @@ void fuse_conn_destroy(struct fuse_mount *fm); void fuse_mount_destroy(struct fuse_mount *fm); /** - * Add connection to control filesystem + * fuse_ctl_add_conn - Add connection to control filesystem + * @fc: Fuse connection to add */ int fuse_ctl_add_conn(struct fuse_conn *fc); /** - * Remove connection from control filesystem + * fuse_ctl_remove_conn - Remove connection from control filesystem + * @fc: Fuse connection to remove */ void fuse_ctl_remove_conn(struct fuse_conn *fc); -/** +/* * Is file type valid? */ int fuse_valid_type(int m); bool fuse_invalid_attr(struct fuse_attr *attr); -/** +/* * Is current process allowed to perform filesystem operation? */ bool fuse_allow_current_process(struct fuse_conn *fc); @@ -1406,7 +1126,7 @@ void fuse_flush_writepages(struct inode *inode); void fuse_set_nowrite(struct inode *inode); void fuse_release_nowrite(struct inode *inode); -/** +/* * Scan all fuse_mounts belonging to fc to find the first where * ilookup5() returns a result. Return that result and the * respective fuse_mount in *fm (unless fm is NULL). @@ -1416,13 +1136,13 @@ void fuse_release_nowrite(struct inode *inode); struct inode *fuse_ilookup(struct fuse_conn *fc, u64 nodeid, struct fuse_mount **fm); -/** +/* * File-system tells the kernel to invalidate cache for the given node id. */ int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid, loff_t offset, loff_t len); -/** +/* * File-system tells the kernel to invalidate parent attributes and * the dentry matching parent/name. * @@ -1444,7 +1164,7 @@ void fuse_try_prune_one_inode(struct fuse_conn *fc, u64 nodeid); int fuse_do_open(struct fuse_mount *fm, u64 nodeid, struct file *file, bool isdir); -/** +/* * fuse_direct_io() flags */ @@ -1461,7 +1181,6 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, long fuse_ioctl_common(struct file *file, unsigned int cmd, unsigned long arg, unsigned int flags); __poll_t fuse_file_poll(struct file *file, poll_table *wait); -int fuse_dev_release(struct inode *inode, struct file *file); bool fuse_write_update_attr(struct inode *inode, loff_t pos, ssize_t written); @@ -1471,8 +1190,6 @@ int fuse_write_inode(struct inode *inode, struct writeback_control *wbc); int fuse_do_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr, struct file *file); -void fuse_set_initialized(struct fuse_conn *fc); - void fuse_unlock_inode(struct inode *inode, bool locked); bool fuse_lock_inode(struct inode *inode); @@ -1494,15 +1211,6 @@ int fuse_set_acl(struct mnt_idmap *, struct dentry *dentry, /* readdir.c */ int fuse_readdir(struct file *file, struct dir_context *ctx); -/** - * Return the number of bytes in an arguments list - */ -unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args); - -/** - * Get the next unique ID for a request - */ -u64 fuse_get_unique(struct fuse_iqueue *fiq); void fuse_free_conn(struct fuse_conn *fc); /* dax.c */ @@ -1570,8 +1278,6 @@ static inline struct fuse_backing *fuse_backing_lookup(struct fuse_conn *fc, void fuse_backing_files_init(struct fuse_conn *fc); void fuse_backing_files_free(struct fuse_conn *fc); -int fuse_backing_open(struct fuse_conn *fc, struct fuse_backing_map *map); -int fuse_backing_close(struct fuse_conn *fc, int backing_id); /* passthrough.c */ static inline struct fuse_backing *fuse_inode_backing(struct fuse_inode *fi) diff --git a/fs/fuse/fuse_trace.h b/fs/fuse/fuse_trace.h index bbe9ddd8c716..60baa10bbcb9 100644 --- a/fs/fuse/fuse_trace.h +++ b/fs/fuse/fuse_trace.h @@ -90,7 +90,7 @@ TRACE_EVENT(fuse_request_send, ), TP_fast_assign( - __entry->connection = req->fm->fc->dev; + __entry->connection = req->chan->conn->dev; __entry->unique = req->in.h.unique; __entry->opcode = req->in.h.opcode; __entry->len = req->in.h.len; @@ -101,6 +101,28 @@ TRACE_EVENT(fuse_request_send, __print_symbolic(__entry->opcode, OPCODES), __entry->len) ); +TRACE_EVENT(fuse_request_sent, + TP_PROTO(const struct fuse_req *req), + + TP_ARGS(req), + + TP_STRUCT__entry( + __field(dev_t, connection) + __field(uint64_t, unique) + __field(enum fuse_opcode, opcode) + ), + + TP_fast_assign( + __entry->connection = req->chan->conn->dev; + __entry->unique = req->in.h.unique; + __entry->opcode = req->in.h.opcode; + ), + + TP_printk("connection %u req %llu opcode %u (%s)", + __entry->connection, __entry->unique, __entry->opcode, + __print_symbolic(__entry->opcode, OPCODES)) +); + TRACE_EVENT(fuse_request_end, TP_PROTO(const struct fuse_req *req), @@ -114,7 +136,7 @@ TRACE_EVENT(fuse_request_end, ), TP_fast_assign( - __entry->connection = req->fm->fc->dev; + __entry->connection = req->chan->conn->dev; __entry->unique = req->in.h.unique; __entry->len = req->out.h.len; __entry->error = req->out.h.error; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index deddfffb037f..d975073c6029 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1,14 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 /* FUSE: Filesystem in Userspace Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu> - - This program can be distributed under the terms of the GNU GPL. - See the file COPYING. */ +#include "dev.h" #include "fuse_i.h" -#include "fuse_dev_i.h" -#include "dev_uring_i.h" #include <linux/dax.h> #include <linux/pagemap.h> @@ -35,14 +32,11 @@ MODULE_LICENSE("GPL"); static struct kmem_cache *fuse_inode_cachep; struct list_head fuse_conn_list; DEFINE_MUTEX(fuse_mutex); -DECLARE_WAIT_QUEUE_HEAD(fuse_dev_waitq); static int set_global_limit(const char *val, const struct kernel_param *kp); unsigned int fuse_max_pages_limit = 256; /* default is no timeout */ -unsigned int fuse_default_req_timeout; -unsigned int fuse_max_req_timeout; unsigned int max_user_bgreq; module_param_call(max_user_bgreq, set_global_limit, param_get_uint, @@ -62,9 +56,6 @@ MODULE_PARM_DESC(max_user_congthresh, #define FUSE_DEFAULT_BLKSIZE 512 -/** Maximum number of outstanding background requests */ -#define FUSE_DEFAULT_MAX_BACKGROUND 12 - /** Congestion starts at 75% of maximum */ #define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4) @@ -72,11 +63,6 @@ MODULE_PARM_DESC(max_user_congthresh, static struct file_system_type fuseblk_fs_type; #endif -struct fuse_forget_link *fuse_alloc_forget(void) -{ - return kzalloc_obj(struct fuse_forget_link, GFP_KERNEL_ACCOUNT); -} - static struct fuse_submount_lookup *fuse_alloc_submount_lookup(void) { struct fuse_submount_lookup *sl; @@ -150,7 +136,7 @@ static void fuse_cleanup_submount_lookup(struct fuse_conn *fc, if (!refcount_dec_and_test(&sl->count)) return; - fuse_queue_forget(fc, sl->forget, sl->nodeid, 1); + fuse_chan_queue_forget(fc->chan, sl->forget, sl->nodeid, 1); sl->forget = NULL; kfree(sl); } @@ -173,8 +159,8 @@ static void fuse_evict_inode(struct inode *inode) if (FUSE_IS_DAX(inode)) fuse_dax_inode_cleanup(inode); if (fi->nlookup) { - fuse_queue_forget(fc, fi->forget, fi->nodeid, - fi->nlookup); + fuse_chan_queue_forget(fc->chan, fi->forget, fi->nodeid, + fi->nlookup); fi->forget = NULL; } @@ -624,7 +610,7 @@ static void fuse_umount_begin(struct super_block *sb) if (fc->no_force_umount) return; - fuse_abort_conn(fc); + fuse_chan_abort(fc->chan, false); // Only retire block-device-based superblocks. if (sb->s_bdev != NULL) @@ -688,11 +674,9 @@ static struct fuse_sync_bucket *fuse_sync_bucket_alloc(void) struct fuse_sync_bucket *bucket; bucket = kzalloc_obj(*bucket, GFP_KERNEL | __GFP_NOFAIL); - if (bucket) { - init_waitqueue_head(&bucket->waitq); - /* Initial active count */ - atomic_set(&bucket->count, 1); - } + init_waitqueue_head(&bucket->waitq); + /* Initial active count */ + atomic_set(&bucket->count, 1); return bucket; } @@ -816,8 +800,7 @@ static int fuse_opt_fd(struct fs_context *fsc, struct file *file) if (file->f_cred->user_ns != fsc->user_ns) return invalfc(fsc, "wrong user namespace for fuse device"); - ctx->fud = file->private_data; - refcount_inc(&ctx->fud->ref); + ctx->fud = fuse_dev_grab(file); return 0; } @@ -970,56 +953,19 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root) return 0; } -static void fuse_iqueue_init(struct fuse_iqueue *fiq, - const struct fuse_iqueue_ops *ops, - void *priv) -{ - memset(fiq, 0, sizeof(struct fuse_iqueue)); - spin_lock_init(&fiq->lock); - init_waitqueue_head(&fiq->waitq); - INIT_LIST_HEAD(&fiq->pending); - INIT_LIST_HEAD(&fiq->interrupts); - fiq->forget_list_tail = &fiq->forget_list_head; - fiq->connected = 1; - fiq->ops = ops; - fiq->priv = priv; -} - -void fuse_pqueue_init(struct fuse_pqueue *fpq) -{ - unsigned int i; - - spin_lock_init(&fpq->lock); - for (i = 0; i < FUSE_PQ_HASH_SIZE; i++) - INIT_LIST_HEAD(&fpq->processing[i]); - INIT_LIST_HEAD(&fpq->io); - fpq->connected = 1; -} - void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm, - struct user_namespace *user_ns, - const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv) + struct user_namespace *user_ns, struct fuse_chan *fch) { memset(fc, 0, sizeof(*fc)); spin_lock_init(&fc->lock); - spin_lock_init(&fc->bg_lock); init_rwsem(&fc->killsb); refcount_set(&fc->count, 1); atomic_set(&fc->epoch, 1); INIT_WORK(&fc->epoch_work, fuse_epoch_work); - init_waitqueue_head(&fc->blocked_waitq); - fuse_iqueue_init(&fc->iq, fiq_ops, fiq_priv); - INIT_LIST_HEAD(&fc->bg_queue); INIT_LIST_HEAD(&fc->entry); - INIT_LIST_HEAD(&fc->devices); - atomic_set(&fc->num_waiting, 0); - fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND; fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD; atomic64_set(&fc->khctr, 0); fc->polled_files = RB_ROOT; - fc->blocked = 0; - fc->initialized = 0; - fc->connected = 1; atomic64_set(&fc->attr_version, 1); atomic64_set(&fc->evict_ctr, 1); get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); @@ -1028,7 +974,6 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm, fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ; fc->max_pages_limit = fuse_max_pages_limit; fc->name_max = FUSE_NAME_LOW_MAX; - fc->timeout.req_timeout = 0; if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH)) fuse_backing_files_init(fc); @@ -1036,6 +981,8 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm, INIT_LIST_HEAD(&fc->mounts); list_add(&fm->fc_entry, &fc->mounts); fm->fc = fc; + fuse_chan_set_fc(fch, fc); + fc->chan = fch; } EXPORT_SYMBOL_GPL(fuse_conn_init); @@ -1043,7 +990,8 @@ static void delayed_release(struct rcu_head *p) { struct fuse_conn *fc = container_of(p, struct fuse_conn, rcu); - fuse_uring_destruct(fc); + fuse_uring_destruct(fc->chan); + fuse_chan_free(fc->chan); put_user_ns(fc->user_ns); fc->release(fc); @@ -1051,7 +999,6 @@ static void delayed_release(struct rcu_head *p) void fuse_conn_put(struct fuse_conn *fc) { - struct fuse_iqueue *fiq = &fc->iq; struct fuse_sync_bucket *bucket; if (!refcount_dec_and_test(&fc->count)) @@ -1059,11 +1006,8 @@ void fuse_conn_put(struct fuse_conn *fc) if (IS_ENABLED(CONFIG_FUSE_DAX)) fuse_dax_conn_free(fc); - if (fc->timeout.req_timeout) - cancel_delayed_work_sync(&fc->timeout.work); cancel_work_sync(&fc->epoch_work); - if (fiq->ops->release) - fiq->ops->release(fiq); + fuse_chan_release(fc->chan); put_pid_ns(fc->pid_ns); bucket = rcu_dereference_protected(fc->curr_bucket, 1); if (bucket) { @@ -1083,6 +1027,11 @@ struct fuse_conn *fuse_conn_get(struct fuse_conn *fc) } EXPORT_SYMBOL_GPL(fuse_conn_get); +dev_t fuse_conn_get_id(struct fuse_conn *fc) +{ + return fc->dev; +} + static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned int mode) { struct fuse_attr attr; @@ -1113,12 +1062,11 @@ static struct dentry *fuse_get_dentry(struct super_block *sb, inode = ilookup5(sb, handle->nodeid, fuse_inode_eq, &handle->nodeid); if (!inode) { struct fuse_entry_out outarg; - const struct qstr name = QSTR_INIT(".", 1); if (!fc->export_support) goto out_err; - err = fuse_lookup_name(sb, handle->nodeid, &name, &outarg, + err = fuse_lookup_name(sb, handle->nodeid, &QSTR("."), &outarg, &inode); if (err && err != -ENOENT) goto out_err; @@ -1294,12 +1242,13 @@ static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg) sanitize_global_limit(&max_user_bgreq); sanitize_global_limit(&max_user_congthresh); - spin_lock(&fc->bg_lock); if (arg->max_background) { - fc->max_background = arg->max_background; + unsigned int max_background = arg->max_background; + + if (!cap_sys_admin && max_background > max_user_bgreq) + max_background = max_user_bgreq; - if (!cap_sys_admin && fc->max_background > max_user_bgreq) - fc->max_background = max_user_bgreq; + fuse_chan_max_background_set(fc->chan, max_background); } if (arg->congestion_threshold) { fc->congestion_threshold = arg->congestion_threshold; @@ -1308,48 +1257,20 @@ static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg) fc->congestion_threshold > max_user_congthresh) fc->congestion_threshold = max_user_congthresh; } - spin_unlock(&fc->bg_lock); -} - -static void set_request_timeout(struct fuse_conn *fc, unsigned int timeout) -{ - fc->timeout.req_timeout = secs_to_jiffies(timeout); - INIT_DELAYED_WORK(&fc->timeout.work, fuse_check_timeout); - queue_delayed_work(system_percpu_wq, &fc->timeout.work, - fuse_timeout_timer_freq); -} - -static void init_server_timeout(struct fuse_conn *fc, unsigned int timeout) -{ - if (!timeout && !fuse_max_req_timeout && !fuse_default_req_timeout) - return; - - if (!timeout) - timeout = fuse_default_req_timeout; - - if (fuse_max_req_timeout) { - if (timeout) - timeout = min(fuse_max_req_timeout, timeout); - else - timeout = fuse_max_req_timeout; - } - - timeout = max(FUSE_TIMEOUT_TIMER_FREQ, timeout); - - set_request_timeout(fc, timeout); } struct fuse_init_args { struct fuse_args args; struct fuse_init_in in; struct fuse_init_out out; + struct fuse_mount *fm; }; -static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, - int error) +static void process_init_reply(struct fuse_args *args, int error) { - struct fuse_conn *fc = fm->fc; struct fuse_init_args *ia = container_of(args, typeof(*ia), args); + struct fuse_mount *fm = ia->fm; + struct fuse_conn *fc = fm->fc; struct fuse_init_out *arg = &ia->out; bool ok = true; @@ -1481,7 +1402,7 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, ok = false; } if (flags & FUSE_OVER_IO_URING && fuse_uring_enabled()) - fc->io_uring = 1; + fuse_chan_io_uring_enable(fc->chan); if (flags & FUSE_REQUEST_TIMEOUT) timeout = arg->request_timeout; @@ -1491,7 +1412,7 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, fc->no_flock = 1; } - init_server_timeout(fc, timeout); + fuse_init_server_timeout(fc->chan, timeout); fm->sb->s_bdi->ra_pages = min(fm->sb->s_bdi->ra_pages, ra_pages); @@ -1505,10 +1426,15 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, if (!ok) { fc->conn_init = 0; fc->conn_error = 1; + fuse_chan_set_initialized(fc->chan, NULL); + } else { + struct fuse_chan_param cp = { + .minor = fc->minor, + .max_write = fc->max_write, + .max_pages = fc->max_pages, + }; + fuse_chan_set_initialized(fc->chan, &cp); } - - fuse_set_initialized(fc); - wake_up_all(&fc->blocked_waitq); } static struct fuse_init_args *fuse_new_init(struct fuse_mount *fm) @@ -1518,6 +1444,7 @@ static struct fuse_init_args *fuse_new_init(struct fuse_mount *fm) ia = kzalloc_obj(*ia, GFP_KERNEL | __GFP_NOFAIL); + ia->fm = fm; ia->in.major = FUSE_KERNEL_VERSION; ia->in.minor = FUSE_KERNEL_MINOR_VERSION; ia->in.max_readahead = fm->sb->s_bdi->ra_pages * PAGE_SIZE; @@ -1591,7 +1518,7 @@ int fuse_send_init(struct fuse_mount *fm) if (!err) return 0; } - process_init_reply(fm, &ia->args, err); + process_init_reply(&ia->args, err); if (fm->fc->conn_error) return -ENOTCONN; return 0; @@ -1600,7 +1527,6 @@ EXPORT_SYMBOL_GPL(fuse_send_init); void fuse_free_conn(struct fuse_conn *fc) { - WARN_ON(!list_empty(&fc->devices)); kfree(fc); } EXPORT_SYMBOL_GPL(fuse_free_conn); @@ -1643,89 +1569,6 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) return 0; } -struct fuse_dev *fuse_dev_alloc(void) -{ - struct fuse_dev *fud; - struct list_head *pq; - - fud = kzalloc_obj(struct fuse_dev); - if (!fud) - return NULL; - - refcount_set(&fud->ref, 1); - pq = kzalloc_objs(struct list_head, FUSE_PQ_HASH_SIZE); - if (!pq) { - kfree(fud); - return NULL; - } - - fud->pq.processing = pq; - fuse_pqueue_init(&fud->pq); - - return fud; -} -EXPORT_SYMBOL_GPL(fuse_dev_alloc); - -void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc) -{ - struct fuse_conn *old_fc; - - spin_lock(&fc->lock); - /* - * Pairs with: - * - xchg() in fuse_dev_release() - * - smp_load_acquire() in fuse_dev_fc_get() - */ - old_fc = cmpxchg(&fud->fc, NULL, fc); - if (old_fc) { - /* - * failed to set fud->fc because - * - it was already set to a different fc - * - it was set to disconneted - */ - fc->connected = 0; - } else { - list_add_tail(&fud->entry, &fc->devices); - fuse_conn_get(fc); - } - spin_unlock(&fc->lock); -} -EXPORT_SYMBOL_GPL(fuse_dev_install); - -struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc) -{ - struct fuse_dev *fud; - - fud = fuse_dev_alloc(); - if (!fud) - return NULL; - - fuse_dev_install(fud, fc); - return fud; -} -EXPORT_SYMBOL_GPL(fuse_dev_alloc_install); - -void fuse_dev_put(struct fuse_dev *fud) -{ - struct fuse_conn *fc; - - if (!refcount_dec_and_test(&fud->ref)) - return; - - fc = fuse_dev_fc_get(fud); - if (fc && fc != FUSE_DEV_FC_DISCONNECTED) { - /* This is the virtiofs case (fuse_dev_release() not called) */ - spin_lock(&fc->lock); - list_del(&fud->entry); - spin_unlock(&fc->lock); - - fuse_conn_put(fc); - } - kfree(fud->pq.processing); - kfree(fud); -} -EXPORT_SYMBOL_GPL(fuse_dev_put); - static void fuse_fill_attr_from_inode(struct fuse_attr *attr, const struct fuse_inode *fi) { @@ -1941,9 +1784,9 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) mutex_lock(&fuse_mutex); err = -EINVAL; if (fud) { - if (fuse_dev_fc_get(fud)) + if (fuse_dev_is_installed(fud)) goto err_unlock; - if (fud->sync_init) + if (fuse_dev_is_sync_init(fud)) fc->sync_init = 1; } @@ -1953,10 +1796,9 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) list_add_tail(&fc->entry, &fuse_conn_list); sb->s_root = root_dentry; - if (fud) { - fuse_dev_install(fud, fc); - wake_up_all(&fuse_dev_waitq); - } + if (fud) + fuse_dev_install(fud, fc->chan); + mutex_unlock(&fuse_mutex); return 0; @@ -2001,9 +1843,7 @@ static int fuse_set_no_super(struct super_block *sb, struct fs_context *fsc) static int fuse_test_super(struct super_block *sb, struct fs_context *fsc) { - struct fuse_dev *fud = fsc->sget_key; - - return fuse_dev_fc_get(fud) == get_fuse_conn_super(sb); + return fuse_dev_verify(fsc->sget_key, get_fuse_conn_super(sb)->chan); } static int fuse_get_tree(struct fs_context *fsc) @@ -2012,8 +1852,12 @@ static int fuse_get_tree(struct fs_context *fsc) struct fuse_conn *fc; struct fuse_mount *fm; struct super_block *sb; + struct fuse_chan *fch __free(fuse_chan_free) = fuse_dev_chan_new(); int err; + if (!fch) + return -ENOMEM; + fc = kmalloc_obj(*fc); if (!fc) return -ENOMEM; @@ -2024,7 +1868,7 @@ static int fuse_get_tree(struct fs_context *fsc) return -ENOMEM; } - fuse_conn_init(fc, fm, fsc->user_ns, &fuse_dev_fiq_ops, NULL); + fuse_conn_init(fc, fm, fsc->user_ns, no_free_ptr(fch)); fc->release = fuse_free_conn; fsc->s_fs_info = fm; @@ -2045,7 +1889,7 @@ static int fuse_get_tree(struct fs_context *fsc) * Allow creating a fuse mount with an already initialized fuse * connection */ - if (fuse_dev_fc_get(ctx->fud)) { + if (fuse_dev_is_installed(ctx->fud)) { fsc->sget_key = ctx->fud; sb = sget_fc(fsc, fuse_test_super, fuse_set_no_super); err = PTR_ERR_OR_ZERO(sb); @@ -2116,8 +1960,8 @@ void fuse_conn_destroy(struct fuse_mount *fm) if (fc->destroy) fuse_send_destroy(fm); - fuse_abort_conn(fc); - fuse_wait_aborted(fc); + fuse_chan_abort(fc->chan, false); + fuse_chan_wait_aborted(fc->chan); if (!list_empty(&fc->entry)) { mutex_lock(&fuse_mutex); diff --git a/fs/fuse/notify.c b/fs/fuse/notify.c new file mode 100644 index 000000000000..29578104ae6c --- /dev/null +++ b/fs/fuse/notify.c @@ -0,0 +1,444 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "dev.h" +#include "fuse_i.h" +#include <linux/pagemap.h> + +static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size, + struct fuse_copy_state *cs) +{ + struct fuse_notify_poll_wakeup_out outarg; + int err; + + if (size != sizeof(outarg)) + return -EINVAL; + + err = fuse_copy_one(cs, &outarg, sizeof(outarg)); + if (err) + return err; + + fuse_copy_finish(cs); + return fuse_notify_poll_wakeup(fc, &outarg); +} + +static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size, + struct fuse_copy_state *cs) +{ + struct fuse_notify_inval_inode_out outarg; + int err; + + if (size != sizeof(outarg)) + return -EINVAL; + + err = fuse_copy_one(cs, &outarg, sizeof(outarg)); + if (err) + return err; + fuse_copy_finish(cs); + + down_read(&fc->killsb); + err = fuse_reverse_inval_inode(fc, outarg.ino, + outarg.off, outarg.len); + up_read(&fc->killsb); + return err; +} + +static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size, + struct fuse_copy_state *cs) +{ + struct fuse_notify_inval_entry_out outarg; + int err; + char *buf; + struct qstr name; + + if (size < sizeof(outarg)) + return -EINVAL; + + err = fuse_copy_one(cs, &outarg, sizeof(outarg)); + if (err) + return err; + + if (outarg.namelen > fc->name_max) + return -ENAMETOOLONG; + + err = -EINVAL; + if (size != sizeof(outarg) + outarg.namelen + 1) + return -EINVAL; + + buf = kzalloc(outarg.namelen + 1, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + name.name = buf; + name.len = outarg.namelen; + err = fuse_copy_one(cs, buf, outarg.namelen + 1); + if (err) + goto err; + fuse_copy_finish(cs); + buf[outarg.namelen] = 0; + + down_read(&fc->killsb); + err = fuse_reverse_inval_entry(fc, outarg.parent, 0, &name, outarg.flags); + up_read(&fc->killsb); +err: + kfree(buf); + return err; +} + +static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size, + struct fuse_copy_state *cs) +{ + struct fuse_notify_delete_out outarg; + int err; + char *buf; + struct qstr name; + + if (size < sizeof(outarg)) + return -EINVAL; + + err = fuse_copy_one(cs, &outarg, sizeof(outarg)); + if (err) + return err; + + if (outarg.namelen > fc->name_max) + return -ENAMETOOLONG; + + if (size != sizeof(outarg) + outarg.namelen + 1) + return -EINVAL; + + buf = kzalloc(outarg.namelen + 1, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + name.name = buf; + name.len = outarg.namelen; + err = fuse_copy_one(cs, buf, outarg.namelen + 1); + if (err) + goto err; + fuse_copy_finish(cs); + buf[outarg.namelen] = 0; + + down_read(&fc->killsb); + err = fuse_reverse_inval_entry(fc, outarg.parent, outarg.child, &name, 0); + up_read(&fc->killsb); +err: + kfree(buf); + return err; +} + +static int fuse_notify_store(struct fuse_conn *fc, unsigned int size, + struct fuse_copy_state *cs) +{ + struct fuse_notify_store_out outarg; + struct inode *inode; + struct address_space *mapping; + u64 nodeid; + int err; + unsigned int num; + loff_t file_size; + loff_t pos; + loff_t end; + + if (size < sizeof(outarg)) + return -EINVAL; + + err = fuse_copy_one(cs, &outarg, sizeof(outarg)); + if (err) + return err; + + if (size - sizeof(outarg) != outarg.size) + return -EINVAL; + + if (outarg.offset >= MAX_LFS_FILESIZE) + return -EINVAL; + + nodeid = outarg.nodeid; + pos = outarg.offset; + num = min(outarg.size, MAX_LFS_FILESIZE - pos); + + down_read(&fc->killsb); + + err = -ENOENT; + inode = fuse_ilookup(fc, nodeid, NULL); + if (!inode) + goto out_up_killsb; + if (!S_ISREG(inode->i_mode)) { + err = -EINVAL; + goto out_iput; + } + + mapping = inode->i_mapping; + file_size = i_size_read(inode); + end = pos + num; + if (end > file_size) { + file_size = end; + fuse_write_update_attr(inode, file_size, num); + } + + while (num) { + struct folio *folio; + unsigned int folio_offset; + unsigned int nr_bytes; + pgoff_t index = pos >> PAGE_SHIFT; + + folio = filemap_grab_folio(mapping, index); + err = PTR_ERR(folio); + if (IS_ERR(folio)) + goto out_iput; + + folio_offset = offset_in_folio(folio, pos); + nr_bytes = min(num, folio_size(folio) - folio_offset); + + err = fuse_copy_folio(cs, &folio, folio_offset, nr_bytes, 0); + if (!folio_test_uptodate(folio) && !err && folio_offset == 0 && + (nr_bytes == folio_size(folio) || file_size == end)) { + folio_zero_segment(folio, nr_bytes, folio_size(folio)); + folio_mark_uptodate(folio); + } + folio_unlock(folio); + folio_put(folio); + + if (err) + goto out_iput; + + pos += nr_bytes; + num -= nr_bytes; + } + + err = 0; + +out_iput: + iput(inode); +out_up_killsb: + up_read(&fc->killsb); + return err; +} + +struct fuse_retrieve_args { + struct fuse_args_pages ap; + struct fuse_notify_retrieve_in inarg; +}; + +static void fuse_retrieve_end(struct fuse_args *args, int error) +{ + struct fuse_retrieve_args *ra = + container_of(args, typeof(*ra), ap.args); + + release_pages(ra->ap.folios, ra->ap.num_folios); + kfree(ra); +} + +static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode, + struct fuse_notify_retrieve_out *outarg) +{ + int err; + struct address_space *mapping = inode->i_mapping; + loff_t file_size; + unsigned int num; + unsigned int offset; + size_t total_len = 0; + unsigned int num_pages; + struct fuse_conn *fc = fm->fc; + struct fuse_retrieve_args *ra; + size_t args_size = sizeof(*ra); + struct fuse_args_pages *ap; + struct fuse_args *args; + loff_t pos = outarg->offset; + + offset = offset_in_page(pos); + file_size = i_size_read(inode); + + num = min(outarg->size, fc->max_write); + if (pos > file_size) + num = 0; + else if (num > file_size - pos) + num = file_size - pos; + + num_pages = DIV_ROUND_UP(num + offset, PAGE_SIZE); + num_pages = min(num_pages, fc->max_pages); + num = min(num, num_pages << PAGE_SHIFT); + + args_size += num_pages * (sizeof(ap->folios[0]) + sizeof(ap->descs[0])); + + ra = kzalloc(args_size, GFP_KERNEL); + if (!ra) + return -ENOMEM; + + ap = &ra->ap; + ap->folios = (void *) (ra + 1); + ap->descs = (void *) (ap->folios + num_pages); + + args = &ap->args; + args->nodeid = outarg->nodeid; + args->opcode = FUSE_NOTIFY_REPLY; + args->in_numargs = 3; + args->in_pages = true; + args->end = fuse_retrieve_end; + + while (num && ap->num_folios < num_pages) { + struct folio *folio; + unsigned int folio_offset; + unsigned int nr_bytes; + pgoff_t index = pos >> PAGE_SHIFT; + + folio = filemap_get_folio(mapping, index); + if (IS_ERR(folio)) + break; + if (!folio_test_uptodate(folio)) { + folio_put(folio); + break; + } + + folio_offset = offset_in_folio(folio, pos); + nr_bytes = min(folio_size(folio) - folio_offset, num); + + ap->folios[ap->num_folios] = folio; + ap->descs[ap->num_folios].offset = folio_offset; + ap->descs[ap->num_folios].length = nr_bytes; + ap->num_folios++; + + pos += nr_bytes; + num -= nr_bytes; + total_len += nr_bytes; + } + ra->inarg.offset = outarg->offset; + ra->inarg.size = total_len; + fuse_set_zero_arg0(args); + args->in_args[1].size = sizeof(ra->inarg); + args->in_args[1].value = &ra->inarg; + args->in_args[2].size = total_len; + + err = fuse_simple_notify_reply(fm, args, outarg->notify_unique); + if (err) + fuse_retrieve_end(args, err); + + return err; +} + +static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size, + struct fuse_copy_state *cs) +{ + struct fuse_notify_retrieve_out outarg; + struct fuse_mount *fm; + struct inode *inode; + u64 nodeid; + int err; + + if (size != sizeof(outarg)) + return -EINVAL; + + err = fuse_copy_one(cs, &outarg, sizeof(outarg)); + if (err) + return err; + + fuse_copy_finish(cs); + + if (outarg.offset >= MAX_LFS_FILESIZE) + return -EINVAL; + + down_read(&fc->killsb); + err = -ENOENT; + nodeid = outarg.nodeid; + + inode = fuse_ilookup(fc, nodeid, &fm); + if (inode) { + err = -EINVAL; + if (S_ISREG(inode->i_mode)) + err = fuse_retrieve(fm, inode, &outarg); + iput(inode); + } + up_read(&fc->killsb); + + return err; +} + +static int fuse_notify_resend(struct fuse_conn *fc) +{ + fuse_chan_resend(fc->chan); + return 0; +} + +/* + * Increments the fuse connection epoch. This will cause dentries and + * readdir caches from previous epochs to be invalidated. Additionally, + * if inval_wq is set, a work queue is scheduled to trigger the invalidation. + */ +static int fuse_notify_inc_epoch(struct fuse_conn *fc) +{ + atomic_inc(&fc->epoch); + if (inval_wq) + schedule_work(&fc->epoch_work); + + return 0; +} + +static int fuse_notify_prune(struct fuse_conn *fc, unsigned int size, + struct fuse_copy_state *cs) +{ + struct fuse_notify_prune_out outarg; + const unsigned int batch = 512; + u64 *nodeids __free(kfree) = kmalloc(sizeof(u64) * batch, GFP_KERNEL); + unsigned int num, i; + int err; + + if (!nodeids) + return -ENOMEM; + + if (size < sizeof(outarg)) + return -EINVAL; + + err = fuse_copy_one(cs, &outarg, sizeof(outarg)); + if (err) + return err; + + if (size - sizeof(outarg) != array_size(outarg.count, sizeof(u64))) + return -EINVAL; + + for (; outarg.count; outarg.count -= num) { + num = min(batch, outarg.count); + err = fuse_copy_one(cs, nodeids, num * sizeof(u64)); + if (err) + return err; + + scoped_guard(rwsem_read, &fc->killsb) { + for (i = 0; i < num; i++) + fuse_try_prune_one_inode(fc, nodeids[i]); + } + } + return 0; +} + +int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, + unsigned int size, struct fuse_copy_state *cs) +{ + switch (code) { + case FUSE_NOTIFY_POLL: + return fuse_notify_poll(fc, size, cs); + + case FUSE_NOTIFY_INVAL_INODE: + return fuse_notify_inval_inode(fc, size, cs); + + case FUSE_NOTIFY_INVAL_ENTRY: + return fuse_notify_inval_entry(fc, size, cs); + + case FUSE_NOTIFY_STORE: + return fuse_notify_store(fc, size, cs); + + case FUSE_NOTIFY_RETRIEVE: + return fuse_notify_retrieve(fc, size, cs); + + case FUSE_NOTIFY_DELETE: + return fuse_notify_delete(fc, size, cs); + + case FUSE_NOTIFY_RESEND: + return fuse_notify_resend(fc); + + case FUSE_NOTIFY_INC_EPOCH: + return fuse_notify_inc_epoch(fc); + + case FUSE_NOTIFY_PRUNE: + return fuse_notify_prune(fc, size, cs); + + default: + return -EINVAL; + } +} diff --git a/fs/fuse/poll.c b/fs/fuse/poll.c new file mode 100644 index 000000000000..bce3ee2e861e --- /dev/null +++ b/fs/fuse/poll.c @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "dev.h" +#include "fuse_i.h" + +void fuse_end_polls(struct fuse_conn *fc) +{ + struct rb_node *p; + + spin_lock(&fc->lock); + p = rb_first(&fc->polled_files); + + while (p) { + struct fuse_file *ff; + ff = rb_entry(p, struct fuse_file, polled_node); + wake_up_interruptible_all(&ff->poll_wait); + + p = rb_next(p); + } + spin_unlock(&fc->lock); +} + +/* + * All files which have been polled are linked to RB tree + * fuse_conn->polled_files which is indexed by kh. Walk the tree and + * find the matching one. + */ +static struct rb_node **fuse_find_polled_node(struct fuse_conn *fc, u64 kh, + struct rb_node **parent_out) +{ + struct rb_node **link = &fc->polled_files.rb_node; + struct rb_node *last = NULL; + + while (*link) { + struct fuse_file *ff; + + last = *link; + ff = rb_entry(last, struct fuse_file, polled_node); + + if (kh < ff->kh) + link = &last->rb_left; + else if (kh > ff->kh) + link = &last->rb_right; + else + return link; + } + + if (parent_out) + *parent_out = last; + return link; +} + +/* + * The file is about to be polled. Make sure it's on the polled_files + * RB tree. Note that files once added to the polled_files tree are + * not removed before the file is released. This is because a file + * polled once is likely to be polled again. + */ +static void fuse_register_polled_file(struct fuse_conn *fc, + struct fuse_file *ff) +{ + spin_lock(&fc->lock); + if (RB_EMPTY_NODE(&ff->polled_node)) { + struct rb_node **link, *parent; + + link = fuse_find_polled_node(fc, ff->kh, &parent); + BUG_ON(*link); + rb_link_node(&ff->polled_node, parent, link); + rb_insert_color(&ff->polled_node, &fc->polled_files); + } + spin_unlock(&fc->lock); +} + +__poll_t fuse_file_poll(struct file *file, poll_table *wait) +{ + struct fuse_file *ff = file->private_data; + struct fuse_mount *fm = ff->fm; + struct fuse_poll_in inarg = { .fh = ff->fh, .kh = ff->kh }; + struct fuse_poll_out outarg; + FUSE_ARGS(args); + int err; + + if (fm->fc->no_poll) + return DEFAULT_POLLMASK; + + poll_wait(file, &ff->poll_wait, wait); + inarg.events = mangle_poll(poll_requested_events(wait)); + + /* + * Ask for notification iff there's someone waiting for it. + * The client may ignore the flag and always notify. + */ + if (waitqueue_active(&ff->poll_wait)) { + inarg.flags |= FUSE_POLL_SCHEDULE_NOTIFY; + fuse_register_polled_file(fm->fc, ff); + } + + args.opcode = FUSE_POLL; + args.nodeid = ff->nodeid; + args.in_numargs = 1; + args.in_args[0].size = sizeof(inarg); + args.in_args[0].value = &inarg; + args.out_numargs = 1; + args.out_args[0].size = sizeof(outarg); + args.out_args[0].value = &outarg; + err = fuse_simple_request(fm, &args); + + if (!err) + return demangle_poll(outarg.revents); + if (err == -ENOSYS) { + fm->fc->no_poll = 1; + return DEFAULT_POLLMASK; + } + return EPOLLERR; +} +EXPORT_SYMBOL_GPL(fuse_file_poll); + +/* + * This is called from fuse_handle_notify() on FUSE_NOTIFY_POLL and + * wakes up the poll waiters. + */ +int fuse_notify_poll_wakeup(struct fuse_conn *fc, + struct fuse_notify_poll_wakeup_out *outarg) +{ + u64 kh = outarg->kh; + struct rb_node **link; + + spin_lock(&fc->lock); + + link = fuse_find_polled_node(fc, kh, NULL); + if (*link) { + struct fuse_file *ff; + + ff = rb_entry(*link, struct fuse_file, polled_node); + wake_up_interruptible_sync(&ff->poll_wait); + } + + spin_unlock(&fc->lock); + return 0; +} + diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c index a2361f1d9905..0e1321491747 100644 --- a/fs/fuse/readdir.c +++ b/fs/fuse/readdir.c @@ -1,9 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 /* FUSE: Filesystem in Userspace Copyright (C) 2001-2018 Miklos Szeredi <miklos@szeredi.hu> - - This program can be distributed under the terms of the GNU GPL. - See the file COPYING. */ @@ -12,6 +10,7 @@ #include <linux/posix_acl.h> #include <linux/pagemap.h> #include <linux/highmem.h> +#include <linux/vmalloc.h> static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx) { @@ -334,6 +333,43 @@ static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file, return 0; } +static struct page **fuse_readdir_alloc_buf(struct fuse_args_pages *ap, size_t *bufsize) +{ + unsigned int i, nr_alloc, nr_pages = DIV_ROUND_UP(*bufsize, PAGE_SIZE); + struct page **pages = kcalloc(nr_pages, sizeof(*pages), GFP_KERNEL); + + if (!pages) + return NULL; + + nr_alloc = alloc_pages_bulk(GFP_KERNEL, nr_pages, pages); + if (!nr_alloc) + goto free_array; + + if (nr_alloc < nr_pages) { + nr_pages = nr_alloc; + *bufsize = (size_t) nr_pages << PAGE_SHIFT; + } + + ap->folios = fuse_folios_alloc(nr_pages, GFP_KERNEL, &ap->descs); + if (!ap->folios) + goto release_pages; + + for (i = 0; i < nr_pages; i++) { + ap->folios[i] = page_folio(pages[i]); + ap->descs[i].length = min_t(size_t, *bufsize - (size_t)i * PAGE_SIZE, PAGE_SIZE); + } + ap->num_folios = nr_pages; + ap->args.out_pages = true; + + return pages; + +release_pages: + release_pages(pages, nr_pages); +free_array: + kfree(pages); + return NULL; +} + static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx) { int plus; @@ -342,18 +378,16 @@ static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx) struct fuse_mount *fm = get_fuse_mount(inode); struct fuse_conn *fc = fm->fc; struct fuse_io_args ia = {}; - struct fuse_args *args = &ia.ap.args; + struct fuse_args_pages *ap = &ia.ap; void *buf; size_t bufsize = clamp((unsigned int) ctx->count, PAGE_SIZE, fc->max_pages << PAGE_SHIFT); u64 attr_version = 0, evict_ctr = 0; bool locked; + struct page **pages = fuse_readdir_alloc_buf(ap, &bufsize); - buf = kvmalloc(bufsize, GFP_KERNEL); - if (!buf) + if (!pages) return -ENOMEM; - args->out_args[0].value = buf; - plus = fuse_use_readdirplus(inode, ctx); if (plus) { attr_version = fuse_get_attr_version(fm->fc); @@ -363,24 +397,37 @@ static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx) fuse_read_args_fill(&ia, file, ctx->pos, bufsize, FUSE_READDIR); } locked = fuse_lock_inode(inode); - res = fuse_simple_request(fm, args); + res = fuse_simple_request(fm, &ap->args); fuse_unlock_inode(inode, locked); - if (res >= 0) { - if (!res) { - struct fuse_file *ff = file->private_data; - - if (ff->open_flags & FOPEN_CACHE_DIR) - fuse_readdir_cache_end(file, ctx->pos); - } else if (plus) { - res = parse_dirplusfile(buf, res, file, ctx, attr_version, - evict_ctr); - } else { + if (res < 0) + goto out; + + if (!res) { + struct fuse_file *ff = file->private_data; + + if (ff->open_flags & FOPEN_CACHE_DIR) + fuse_readdir_cache_end(file, ctx->pos); + goto out; + } + + buf = vm_map_ram(pages, ap->num_folios, -1); + if (!buf) { + res = -ENOMEM; + } else { + if (plus) + res = parse_dirplusfile(buf, res, file, ctx, attr_version, evict_ctr); + else res = parse_dirfile(buf, res, file, ctx); - } + + vm_unmap_ram(buf, ap->num_folios); } +out: + kfree(ap->folios); + release_pages(pages, ap->num_folios); + kfree(pages); - kvfree(buf); fuse_invalidate_atime(inode); + return res; } @@ -441,6 +488,7 @@ static void fuse_rdc_reset(struct inode *inode) fi->rdc.version++; fi->rdc.size = 0; fi->rdc.pos = 0; + fi->rdc.epoch = 0; } #define UNCACHED 1 @@ -482,6 +530,7 @@ retry_locked: if (!ctx->pos && !fi->rdc.size) { fi->rdc.mtime = inode_get_mtime(inode); fi->rdc.iversion = inode_query_iversion(inode); + fi->rdc.epoch = atomic_read(&fc->epoch); } spin_unlock(&fi->rdc.lock); return UNCACHED; @@ -495,7 +544,8 @@ retry_locked: struct timespec64 mtime = inode_get_mtime(inode); if (inode_peek_iversion(inode) != fi->rdc.iversion || - !timespec64_equal(&fi->rdc.mtime, &mtime)) { + !timespec64_equal(&fi->rdc.mtime, &mtime) || + fi->rdc.epoch != atomic_read(&fc->epoch)) { fuse_rdc_reset(inode); goto retry_locked; } diff --git a/fs/fuse/req.c b/fs/fuse/req.c new file mode 100644 index 000000000000..a01ee743d31e --- /dev/null +++ b/fs/fuse/req.c @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "dev.h" +#include "fuse_i.h" + +static int fuse_fill_creds(struct fuse_mount *fm, struct fuse_args *args, struct mnt_idmap *idmap) +{ + struct fuse_conn *fc = fm->fc; + bool no_idmap = !fm->sb || (fm->sb->s_iflags & SB_I_NOIDMAP); + kuid_t fsuid = mapped_fsuid(idmap, fc->user_ns); + kgid_t fsgid = mapped_fsgid(idmap, fc->user_ns); + + args->pid = pid_nr_ns(task_pid(current), fc->pid_ns); + + if (args->force) { + if (args->nocreds) + return 0; + + if (no_idmap) { + args->uid = from_kuid_munged(fc->user_ns, current_fsuid()); + args->gid = from_kgid_munged(fc->user_ns, current_fsgid()); + } else { + args->uid = FUSE_INVALID_UIDGID; + args->gid = FUSE_INVALID_UIDGID; + } + return 0; + } + + WARN_ON(args->nocreds); + /* + * Keep the old behavior when idmappings support was not + * declared by a FUSE server. + * + * For those FUSE servers who support idmapped mounts, we send UID/GID + * only along with "inode creation" fuse requests, otherwise idmap == + * &invalid_mnt_idmap and req->in.h.{u,g}id will be equal to + * FUSE_INVALID_UIDGID. + */ + if (no_idmap) { + fsuid = current_fsuid(); + fsgid = current_fsgid(); + } + args->uid = from_kuid(fc->user_ns, fsuid); + args->gid = from_kgid(fc->user_ns, fsgid); + + if (no_idmap && unlikely(args->uid == ((uid_t)-1) || args->gid == ((gid_t)-1))) + return -EOVERFLOW; + + return 0; +} + +static int fuse_req_prep(struct fuse_mount *fm, struct fuse_args *args, struct mnt_idmap *idmap) +{ + if (!args->force && fm->fc->conn_error) + return -ECONNREFUSED; + + return fuse_fill_creds(fm, args, idmap); +} + +ssize_t __fuse_simple_request(struct mnt_idmap *idmap, struct fuse_mount *fm, + struct fuse_args *args) +{ + struct fuse_conn *fc = fm->fc; + int err = fuse_req_prep(fm, args, idmap); + + if (err) + return err; + + return fuse_chan_send(fc->chan, args); +} + +int fuse_simple_background(struct fuse_mount *fm, struct fuse_args *args, gfp_t gfp_flags) +{ + struct fuse_conn *fc = fm->fc; + int err; + + WARN_ON(args->force && !args->nocreds); + + err = fuse_req_prep(fm, args, &invalid_mnt_idmap); + if (err) + return err; + + return fuse_chan_send_bg(fc->chan, args, gfp_flags); +} +EXPORT_SYMBOL_GPL(fuse_simple_background); + +int fuse_simple_notify_reply(struct fuse_mount *fm, struct fuse_args *args, u64 unique) +{ + struct fuse_conn *fc = fm->fc; + int err; + + WARN_ON(args->force && !args->nocreds); + + err = fuse_req_prep(fm, args, &invalid_mnt_idmap); + if (err) + return err; + + return fuse_chan_send_notify_reply(fc->chan, args, unique); +} diff --git a/fs/fuse/req_timeout.c b/fs/fuse/req_timeout.c new file mode 100644 index 000000000000..6cc6fc491343 --- /dev/null +++ b/fs/fuse/req_timeout.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "dev.h" +#include "sysctl.h" +#include "fuse_dev_i.h" +#include "dev_uring_i.h" + +/* Frequency (in seconds) of request timeout checks, if opted into */ +#define FUSE_TIMEOUT_TIMER_FREQ 15 + +/* Frequency (in jiffies) of request timeout checks, if opted into */ +static const unsigned long fuse_timeout_timer_freq = + secs_to_jiffies(FUSE_TIMEOUT_TIMER_FREQ); + +/* + * Default timeout (in seconds) for the server to reply to a request + * before the connection is aborted, if no timeout was specified on mount. + * + * Exported via sysctl + */ +unsigned int fuse_default_req_timeout; + +/* + * Max timeout (in seconds) for the server to reply to a request before + * the connection is aborted. + * + * Exported via sysctl + */ +unsigned int fuse_max_req_timeout; + +bool fuse_request_expired(struct fuse_chan *fch, struct list_head *list) +{ + struct fuse_req *req; + + req = list_first_entry_or_null(list, struct fuse_req, list); + if (!req) + return false; + return time_is_before_jiffies(req->create_time + fch->timeout.req_timeout); +} + +static bool fuse_fpq_processing_expired(struct fuse_chan *fch, struct list_head *processing) +{ + int i; + + for (i = 0; i < FUSE_PQ_HASH_SIZE; i++) + if (fuse_request_expired(fch, &processing[i])) + return true; + + return false; +} + +/* + * Check if any requests aren't being completed by the time the request timeout + * elapses. To do so, we: + * - check the fiq pending list + * - check the bg queue + * - check the fpq io and processing lists + * + * To make this fast, we only check against the head request on each list since + * these are generally queued in order of creation time (eg newer requests get + * queued to the tail). We might miss a few edge cases (eg requests transitioning + * between lists, re-sent requests at the head of the pending list having a + * later creation time than other requests on that list, etc.) but that is fine + * since if the request never gets fulfilled, it will eventually be caught. + */ +static void fuse_check_timeout(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct fuse_chan *fch = container_of(dwork, struct fuse_chan, timeout.work); + struct fuse_iqueue *fiq = &fch->iq; + struct fuse_dev *fud; + struct fuse_pqueue *fpq; + bool expired = false; + + if (!atomic_read(&fch->num_waiting)) + goto out; + + spin_lock(&fiq->lock); + expired = fuse_request_expired(fch, &fiq->pending); + spin_unlock(&fiq->lock); + if (expired) + goto chan_abort; + + spin_lock(&fch->bg_lock); + expired = fuse_request_expired(fch, &fch->bg_queue); + spin_unlock(&fch->bg_lock); + if (expired) + goto chan_abort; + + spin_lock(&fch->lock); + if (!fch->connected) { + spin_unlock(&fch->lock); + return; + } + list_for_each_entry(fud, &fch->devices, entry) { + fpq = &fud->pq; + spin_lock(&fpq->lock); + if (fuse_request_expired(fch, &fpq->io) || + fuse_fpq_processing_expired(fch, fpq->processing)) { + spin_unlock(&fpq->lock); + spin_unlock(&fch->lock); + goto chan_abort; + } + + spin_unlock(&fpq->lock); + } + spin_unlock(&fch->lock); + + if (fuse_uring_request_expired(fch)) + goto chan_abort; + +out: + queue_delayed_work(system_percpu_wq, &fch->timeout.work, + fuse_timeout_timer_freq); + return; + +chan_abort: + fuse_chan_abort(fch, false); +} + +static void set_request_timeout(struct fuse_chan *fch, unsigned int timeout) +{ + fch->timeout.req_timeout = secs_to_jiffies(timeout); + INIT_DELAYED_WORK(&fch->timeout.work, fuse_check_timeout); + queue_delayed_work(system_percpu_wq, &fch->timeout.work, + fuse_timeout_timer_freq); +} + +void fuse_init_server_timeout(struct fuse_chan *fch, unsigned int timeout) +{ + if (!timeout && !fuse_max_req_timeout && !fuse_default_req_timeout) + return; + + if (!timeout) + timeout = fuse_default_req_timeout; + + if (fuse_max_req_timeout) { + if (timeout) + timeout = min(fuse_max_req_timeout, timeout); + else + timeout = fuse_max_req_timeout; + } + + timeout = max(FUSE_TIMEOUT_TIMER_FREQ, timeout); + + set_request_timeout(fch, timeout); +} + diff --git a/fs/fuse/sysctl.c b/fs/fuse/sysctl.c index e2d921abcb88..74eca5ce9a2c 100644 --- a/fs/fuse/sysctl.c +++ b/fs/fuse/sysctl.c @@ -6,6 +6,7 @@ */ #include <linux/sysctl.h> +#include "sysctl.h" #include "fuse_i.h" static struct ctl_table_header *fuse_table_header; diff --git a/fs/fuse/sysctl.h b/fs/fuse/sysctl.h new file mode 100644 index 000000000000..948d88417133 --- /dev/null +++ b/fs/fuse/sysctl.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _FS_FUSE_SYSCTL_H +#define _FS_FUSE_SYSCTL_H + +extern unsigned int fuse_default_req_timeout; +extern unsigned int fuse_max_req_timeout; + +#endif /* _FS_FUSE_SYSCTL_H */ diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 12300651a0f1..df25d4faca41 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -19,6 +19,7 @@ #include <linux/highmem.h> #include <linux/cleanup.h> #include <linux/uio.h> +#include "dev.h" #include "fuse_i.h" #include "fuse_dev_i.h" @@ -1009,7 +1010,9 @@ out: kfree(vqs); if (ret) { kfree(fs->vqs); + fs->vqs = NULL; kfree(fs->mq_map); + fs->mq_map = NULL; } return ret; } @@ -1519,7 +1522,7 @@ static void virtio_fs_send_req(struct fuse_iqueue *fiq, struct fuse_req *req) if (ret == -ENOSPC) { /* * Virtqueue full. Retry submission from worker - * context as we might be holding fc->bg_lock. + * context as we might be holding fc->chan->bg_lock. */ spin_lock(&fsvq->lock); list_add_tail(&req->list, &fsvq->queued_reqs); @@ -1562,7 +1565,7 @@ static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc) { struct fuse_mount *fm = get_fuse_mount_super(sb); struct fuse_conn *fc = fm->fc; - struct virtio_fs *fs = fc->iq.priv; + struct virtio_fs *fs = fc->chan->iq.priv; struct fuse_fs_context *ctx = fsc->fs_private; unsigned int i; int err; @@ -1606,7 +1609,7 @@ static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc) for (i = 0; i < fs->nvqs; i++) { struct virtio_fs_vq *fsvq = &fs->vqs[i]; - fuse_dev_install(fsvq->fud, fc); + fuse_dev_install(fsvq->fud, fc->chan); } /* Previous unmount will stop all queues. Start these again */ @@ -1625,7 +1628,7 @@ err: static void virtio_fs_conn_destroy(struct fuse_mount *fm) { struct fuse_conn *fc = fm->fc; - struct virtio_fs *vfs = fc->iq.priv; + struct virtio_fs *vfs = fc->chan->iq.priv; struct virtio_fs_vq *fsvq = &vfs->vqs[VQ_HIPRIO]; /* Stop dax worker. Soon evict_inodes() will be called which @@ -1673,7 +1676,7 @@ static int virtio_fs_test_super(struct super_block *sb, struct fuse_mount *fsc_fm = fsc->s_fs_info; struct fuse_mount *sb_fm = get_fuse_mount_super(sb); - return fsc_fm->fc->iq.priv == sb_fm->fc->iq.priv; + return fsc_fm->fc->chan->iq.priv == sb_fm->fc->chan->iq.priv; } static int virtio_fs_get_tree(struct fs_context *fsc) @@ -1683,13 +1686,17 @@ static int virtio_fs_get_tree(struct fs_context *fsc) struct fuse_conn *fc = NULL; struct fuse_mount *fm; unsigned int virtqueue_size; + struct fuse_chan *fch __free(fuse_chan_free) = fuse_chan_new(); int err = -EIO; + if (!fch) + return -ENOMEM; + if (!fsc->source) return invalf(fsc, "No source specified"); /* This gets a reference on virtio_fs object. This ptr gets installed - * in fc->iq->priv. Once fuse_conn is going away, it calls ->put() + * in chan->iq->priv. Once fuse_conn is going away, it calls ->put() * to drop the reference to this object. */ fs = virtio_fs_find_instance(fsc->source); @@ -1711,7 +1718,9 @@ static int virtio_fs_get_tree(struct fs_context *fsc) if (!fm) goto out_err; - fuse_conn_init(fc, fm, fsc->user_ns, &virtio_fs_fiq_ops, fs); + fuse_iqueue_init(&fch->iq, &virtio_fs_fiq_ops, fs); + fuse_conn_init(fc, fm, fsc->user_ns, no_free_ptr(fch)); + fc->release = fuse_free_conn; fc->delete_stale = true; fc->auto_submounts = true; diff --git a/fs/fuse/xattr.c b/fs/fuse/xattr.c index 93dfb06b6cea..cab2685acc65 100644 --- a/fs/fuse/xattr.c +++ b/fs/fuse/xattr.c @@ -1,9 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 /* * FUSE: Filesystem in Userspace * Copyright (C) 2001-2016 Miklos Szeredi <miklos@szeredi.hu> - * - * This program can be distributed under the terms of the GNU GPL. - * See the file COPYING. */ #include "fuse_i.h" diff --git a/fs/internal.h b/fs/internal.h index d77578d66d42..355d93f92208 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -138,7 +138,6 @@ extern bool super_trylock_shared(struct super_block *sb); struct super_block *user_get_super(dev_t, bool excl); void put_super(struct super_block *sb); extern bool mount_capable(struct fs_context *); -int sb_init_dio_done_wq(struct super_block *sb); /* * Prepare superblock for changing its read-only state (i.e., either remount diff --git a/include/linux/fs/super.h b/include/linux/fs/super.h index f21ffbb6dea5..405612678115 100644 --- a/include/linux/fs/super.h +++ b/include/linux/fs/super.h @@ -235,4 +235,6 @@ int freeze_super(struct super_block *super, enum freeze_holder who, int thaw_super(struct super_block *super, enum freeze_holder who, const void *freeze_owner); +int sb_init_dio_done_wq(struct super_block *sb); + #endif /* _LINUX_FS_SUPER_H */ |
