diff options
author | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2025-08-15 13:14:14 +0300 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2025-08-15 13:14:14 +0300 |
commit | cd5b27c9e334ff91ec39647957a3bdf75f171938 (patch) | |
tree | 88a95929dde4232e34c61206d24f29ee79d7b34c /drivers/vhost | |
parent | 7b9069cd1859ed8d1c5c757ca21ec787f75f0029 (diff) | |
parent | 880e4ff5d6c8dc6b660f163a0e9b68b898cc6310 (diff) | |
download | linux-rolling-lts.tar.xz |
Merge v6.12.42linux-rolling-lts
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'drivers/vhost')
-rw-r--r-- | drivers/vhost/Kconfig | 18 | ||||
-rw-r--r-- | drivers/vhost/scsi.c | 4 | ||||
-rw-r--r-- | drivers/vhost/vhost.c | 244 | ||||
-rw-r--r-- | drivers/vhost/vhost.h | 22 |
4 files changed, 267 insertions, 21 deletions
diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig index b455d9ab6f3d..a4730217bfb6 100644 --- a/drivers/vhost/Kconfig +++ b/drivers/vhost/Kconfig @@ -94,4 +94,22 @@ config VHOST_CROSS_ENDIAN_LEGACY If unsure, say "N". +config VHOST_ENABLE_FORK_OWNER_CONTROL + bool "Enable VHOST_ENABLE_FORK_OWNER_CONTROL" + default y + help + This option enables two IOCTLs: VHOST_SET_FORK_FROM_OWNER and + VHOST_GET_FORK_FROM_OWNER. These allow userspace applications + to modify the vhost worker mode for vhost devices. + + Also expose module parameter 'fork_from_owner_default' to allow users + to configure the default mode for vhost workers. + + By default, `VHOST_ENABLE_FORK_OWNER_CONTROL` is set to `y`, + users can change the worker thread mode as needed. + If this config is disabled (n),the related IOCTLs and parameters will + be unavailable. + + If unsure, say "Y". + endif diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 38d243d914d0..88f213d1106f 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1088,10 +1088,8 @@ vhost_scsi_get_req(struct vhost_virtqueue *vq, struct vhost_scsi_ctx *vc, /* validated at handler entry */ vs_tpg = vhost_vq_get_backend(vq); tpg = READ_ONCE(vs_tpg[*vc->target]); - if (unlikely(!tpg)) { - vq_err(vq, "Target 0x%x does not exist\n", *vc->target); + if (unlikely(!tpg)) goto out; - } } if (tpgp) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 63612faeab72..79b0b7cd2860 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -22,6 +22,7 @@ #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/kthread.h> +#include <linux/cgroup.h> #include <linux/module.h> #include <linux/sort.h> #include <linux/sched/mm.h> @@ -41,6 +42,13 @@ static int max_iotlb_entries = 2048; module_param(max_iotlb_entries, int, 0444); MODULE_PARM_DESC(max_iotlb_entries, "Maximum number of iotlb entries. (default: 2048)"); +static bool fork_from_owner_default = VHOST_FORK_OWNER_TASK; + +#ifdef CONFIG_VHOST_ENABLE_FORK_OWNER_CONTROL +module_param(fork_from_owner_default, bool, 0444); +MODULE_PARM_DESC(fork_from_owner_default, + "Set task mode as the default(default: Y)"); +#endif enum { VHOST_MEMORY_F_LOG = 0x1, @@ -242,7 +250,7 @@ static void vhost_worker_queue(struct vhost_worker *worker, * test_and_set_bit() implies a memory barrier. */ llist_add(&work->node, &worker->work_list); - vhost_task_wake(worker->vtsk); + worker->ops->wakeup(worker); } } @@ -388,6 +396,44 @@ static void vhost_vq_reset(struct vhost_dev *dev, __vhost_vq_meta_reset(vq); } +static int vhost_run_work_kthread_list(void *data) +{ + struct vhost_worker *worker = data; + struct vhost_work *work, *work_next; + struct vhost_dev *dev = worker->dev; + struct llist_node *node; + + kthread_use_mm(dev->mm); + + for (;;) { + /* mb paired w/ kthread_stop */ + set_current_state(TASK_INTERRUPTIBLE); + + if (kthread_should_stop()) { + __set_current_state(TASK_RUNNING); + break; + } + node = llist_del_all(&worker->work_list); + if (!node) + schedule(); + + node = llist_reverse_order(node); + /* make sure flag is seen after deletion */ + smp_wmb(); + llist_for_each_entry_safe(work, work_next, node, node) { + clear_bit(VHOST_WORK_QUEUED, &work->flags); + __set_current_state(TASK_RUNNING); + kcov_remote_start_common(worker->kcov_handle); + work->fn(work); + kcov_remote_stop(); + cond_resched(); + } + } + kthread_unuse_mm(dev->mm); + + return 0; +} + static bool vhost_run_work_list(void *data) { struct vhost_worker *worker = data; @@ -552,6 +598,7 @@ void vhost_dev_init(struct vhost_dev *dev, dev->byte_weight = byte_weight; dev->use_worker = use_worker; dev->msg_handler = msg_handler; + dev->fork_owner = fork_from_owner_default; init_waitqueue_head(&dev->wait); INIT_LIST_HEAD(&dev->read_list); INIT_LIST_HEAD(&dev->pending_list); @@ -581,6 +628,46 @@ long vhost_dev_check_owner(struct vhost_dev *dev) } EXPORT_SYMBOL_GPL(vhost_dev_check_owner); +struct vhost_attach_cgroups_struct { + struct vhost_work work; + struct task_struct *owner; + int ret; +}; + +static void vhost_attach_cgroups_work(struct vhost_work *work) +{ + struct vhost_attach_cgroups_struct *s; + + s = container_of(work, struct vhost_attach_cgroups_struct, work); + s->ret = cgroup_attach_task_all(s->owner, current); +} + +static int vhost_attach_task_to_cgroups(struct vhost_worker *worker) +{ + struct vhost_attach_cgroups_struct attach; + int saved_cnt; + + attach.owner = current; + + vhost_work_init(&attach.work, vhost_attach_cgroups_work); + vhost_worker_queue(worker, &attach.work); + + mutex_lock(&worker->mutex); + + /* + * Bypass attachment_cnt check in __vhost_worker_flush: + * Temporarily change it to INT_MAX to bypass the check + */ + saved_cnt = worker->attachment_cnt; + worker->attachment_cnt = INT_MAX; + __vhost_worker_flush(worker); + worker->attachment_cnt = saved_cnt; + + mutex_unlock(&worker->mutex); + + return attach.ret; +} + /* Caller should have device mutex */ bool vhost_dev_has_owner(struct vhost_dev *dev) { @@ -626,7 +713,7 @@ static void vhost_worker_destroy(struct vhost_dev *dev, WARN_ON(!llist_empty(&worker->work_list)); xa_erase(&dev->worker_xa, worker->id); - vhost_task_stop(worker->vtsk); + worker->ops->stop(worker); kfree(worker); } @@ -649,42 +736,115 @@ static void vhost_workers_free(struct vhost_dev *dev) xa_destroy(&dev->worker_xa); } +static void vhost_task_wakeup(struct vhost_worker *worker) +{ + return vhost_task_wake(worker->vtsk); +} + +static void vhost_kthread_wakeup(struct vhost_worker *worker) +{ + wake_up_process(worker->kthread_task); +} + +static void vhost_task_do_stop(struct vhost_worker *worker) +{ + return vhost_task_stop(worker->vtsk); +} + +static void vhost_kthread_do_stop(struct vhost_worker *worker) +{ + kthread_stop(worker->kthread_task); +} + +static int vhost_task_worker_create(struct vhost_worker *worker, + struct vhost_dev *dev, const char *name) +{ + struct vhost_task *vtsk; + u32 id; + int ret; + + vtsk = vhost_task_create(vhost_run_work_list, vhost_worker_killed, + worker, name); + if (IS_ERR(vtsk)) + return PTR_ERR(vtsk); + + worker->vtsk = vtsk; + vhost_task_start(vtsk); + ret = xa_alloc(&dev->worker_xa, &id, worker, xa_limit_32b, GFP_KERNEL); + if (ret < 0) { + vhost_task_do_stop(worker); + return ret; + } + worker->id = id; + return 0; +} + +static int vhost_kthread_worker_create(struct vhost_worker *worker, + struct vhost_dev *dev, const char *name) +{ + struct task_struct *task; + u32 id; + int ret; + + task = kthread_create(vhost_run_work_kthread_list, worker, "%s", name); + if (IS_ERR(task)) + return PTR_ERR(task); + + worker->kthread_task = task; + wake_up_process(task); + ret = xa_alloc(&dev->worker_xa, &id, worker, xa_limit_32b, GFP_KERNEL); + if (ret < 0) + goto stop_worker; + + ret = vhost_attach_task_to_cgroups(worker); + if (ret) + goto stop_worker; + + worker->id = id; + return 0; + +stop_worker: + vhost_kthread_do_stop(worker); + return ret; +} + +static const struct vhost_worker_ops kthread_ops = { + .create = vhost_kthread_worker_create, + .stop = vhost_kthread_do_stop, + .wakeup = vhost_kthread_wakeup, +}; + +static const struct vhost_worker_ops vhost_task_ops = { + .create = vhost_task_worker_create, + .stop = vhost_task_do_stop, + .wakeup = vhost_task_wakeup, +}; + static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev) { struct vhost_worker *worker; - struct vhost_task *vtsk; char name[TASK_COMM_LEN]; int ret; - u32 id; + const struct vhost_worker_ops *ops = dev->fork_owner ? &vhost_task_ops : + &kthread_ops; worker = kzalloc(sizeof(*worker), GFP_KERNEL_ACCOUNT); if (!worker) return NULL; worker->dev = dev; + worker->ops = ops; snprintf(name, sizeof(name), "vhost-%d", current->pid); - vtsk = vhost_task_create(vhost_run_work_list, vhost_worker_killed, - worker, name); - if (IS_ERR(vtsk)) - goto free_worker; - mutex_init(&worker->mutex); init_llist_head(&worker->work_list); worker->kcov_handle = kcov_common_handle(); - worker->vtsk = vtsk; - - vhost_task_start(vtsk); - - ret = xa_alloc(&dev->worker_xa, &id, worker, xa_limit_32b, GFP_KERNEL); + ret = ops->create(worker, dev, name); if (ret < 0) - goto stop_worker; - worker->id = id; + goto free_worker; return worker; -stop_worker: - vhost_task_stop(vtsk); free_worker: kfree(worker); return NULL; @@ -865,6 +1025,14 @@ long vhost_worker_ioctl(struct vhost_dev *dev, unsigned int ioctl, switch (ioctl) { /* dev worker ioctls */ case VHOST_NEW_WORKER: + /* + * vhost_tasks will account for worker threads under the parent's + * NPROC value but kthreads do not. To avoid userspace overflowing + * the system with worker threads fork_owner must be true. + */ + if (!dev->fork_owner) + return -EFAULT; + ret = vhost_new_worker(dev, &state); if (!ret && copy_to_user(argp, &state, sizeof(state))) ret = -EFAULT; @@ -982,6 +1150,7 @@ void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_iotlb *umem) vhost_dev_cleanup(dev); + dev->fork_owner = fork_from_owner_default; dev->umem = umem; /* We don't need VQ locks below since vhost_dev_cleanup makes sure * VQs aren't running. @@ -2135,6 +2304,45 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) goto done; } +#ifdef CONFIG_VHOST_ENABLE_FORK_OWNER_CONTROL + if (ioctl == VHOST_SET_FORK_FROM_OWNER) { + /* Only allow modification before owner is set */ + if (vhost_dev_has_owner(d)) { + r = -EBUSY; + goto done; + } + u8 fork_owner_val; + + if (get_user(fork_owner_val, (u8 __user *)argp)) { + r = -EFAULT; + goto done; + } + if (fork_owner_val != VHOST_FORK_OWNER_TASK && + fork_owner_val != VHOST_FORK_OWNER_KTHREAD) { + r = -EINVAL; + goto done; + } + d->fork_owner = !!fork_owner_val; + r = 0; + goto done; + } + if (ioctl == VHOST_GET_FORK_FROM_OWNER) { + u8 fork_owner_val = d->fork_owner; + + if (fork_owner_val != VHOST_FORK_OWNER_TASK && + fork_owner_val != VHOST_FORK_OWNER_KTHREAD) { + r = -EINVAL; + goto done; + } + if (put_user(fork_owner_val, (u8 __user *)argp)) { + r = -EFAULT; + goto done; + } + r = 0; + goto done; + } +#endif + /* You must be the owner to do anything else */ r = vhost_dev_check_owner(d); if (r) diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index bb75a292d50c..ab704d84fb34 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -26,7 +26,18 @@ struct vhost_work { unsigned long flags; }; +struct vhost_worker; +struct vhost_dev; + +struct vhost_worker_ops { + int (*create)(struct vhost_worker *worker, struct vhost_dev *dev, + const char *name); + void (*stop)(struct vhost_worker *worker); + void (*wakeup)(struct vhost_worker *worker); +}; + struct vhost_worker { + struct task_struct *kthread_task; struct vhost_task *vtsk; struct vhost_dev *dev; /* Used to serialize device wide flushing with worker swapping. */ @@ -36,6 +47,7 @@ struct vhost_worker { u32 id; int attachment_cnt; bool killed; + const struct vhost_worker_ops *ops; }; /* Poll a file (eventfd or socket) */ @@ -176,6 +188,16 @@ struct vhost_dev { int byte_weight; struct xarray worker_xa; bool use_worker; + /* + * If fork_owner is true we use vhost_tasks to create + * the worker so all settings/limits like cgroups, NPROC, + * scheduler, etc are inherited from the owner. If false, + * we use kthreads and only attach to the same cgroups + * as the owner for compat with older kernels. + * here we use true as default value. + * The default value is set by fork_from_owner_default + */ + bool fork_owner; int (*msg_handler)(struct vhost_dev *dev, u32 asid, struct vhost_iotlb_msg *msg); }; |