From e4be66e5f36b8cd2a052ba9e2ba063e6c37f5453 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 27 Feb 2023 13:41:27 -0800 Subject: vhost: use struct_size and size_add to compute flex array sizes The vhost_get_avail_size and vhost_get_used_size functions compute the size of structures with flexible array members with an additional 2 bytes if the VIRTIO_RING_F_EVENT_IDX feature flag is set. Convert these functions to use struct_size() and size_add() instead of coding the calculation by hand. This ensures that the calculations will saturate at SIZE_MAX rather than overflowing. Signed-off-by: Jacob Keller Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: virtualization@lists.linux-foundation.org Cc: kvm@vger.kernel.org Message-Id: <20230227214127.3678392-1-jacob.e.keller@intel.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/vhost') diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index f11bdbe4c2c5..43fa626d4e44 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -436,8 +436,7 @@ static size_t vhost_get_avail_size(struct vhost_virtqueue *vq, size_t event __maybe_unused = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; - return sizeof(*vq->avail) + - sizeof(*vq->avail->ring) * num + event; + return size_add(struct_size(vq->avail, ring, num), event); } static size_t vhost_get_used_size(struct vhost_virtqueue *vq, @@ -446,8 +445,7 @@ static size_t vhost_get_used_size(struct vhost_virtqueue *vq, size_t event __maybe_unused = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; - return sizeof(*vq->used) + - sizeof(*vq->used->ring) * num + event; + return size_add(struct_size(vq->used, ring, num), event); } static size_t vhost_get_desc_size(struct vhost_virtqueue *vq, -- cgit v1.2.3 From 9a10cb4de33f4c914bd2e33ac05cff3ddb6a67b0 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 20 Mar 2023 21:06:20 -0500 Subject: vhost-scsi: Delay releasing our refcount on the tpg We currently hold the vhost_scsi_mutex the entire time we are running vhost_scsi_clear_endpoint. One of the reasons for this is that it prevents userspace from being able to free the se_tpg from under us after we have called target_undepend_item. However, it forces management operations for for other devices to have to wait on a flakey device's: vhost_scsi_clear_endpoint -> vhost_scsi_flush() call which can which can take a long time. This moves the target_undepend_item call and the tpg unsetup code to after we have stopped new IO from starting up and after we have waited on running IO. We can then release our refcount on the tpg and session knowing our device is no longer accessing them. We can then drop the vhost_scsi_mutex use during thee flush call in later patches in this set, when we have removed other reasons for holding it. Signed-off-by: Mike Christie Message-Id: <20230321020624.13323-4-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/scsi.c | 61 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 24 deletions(-) (limited to 'drivers/vhost') diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 32d0be968103..502d6803df0b 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1691,11 +1691,10 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs, if (!tpg) continue; - mutex_lock(&tpg->tv_tpg_mutex); tv_tport = tpg->tport; if (!tv_tport) { ret = -ENODEV; - goto err_tpg; + goto err_dev; } if (strcmp(tv_tport->tport_name, t->vhost_wwpn)) { @@ -1704,35 +1703,51 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs, tv_tport->tport_name, tpg->tport_tpgt, t->vhost_wwpn, t->vhost_tpgt); ret = -EINVAL; - goto err_tpg; + goto err_dev; } + match = true; + } + if (!match) + goto free_vs_tpg; + + /* Prevent new cmds from starting and accessing the tpgs/sessions */ + for (i = 0; i < vs->dev.nvqs; i++) { + vq = &vs->vqs[i].vq; + mutex_lock(&vq->mutex); + vhost_vq_set_backend(vq, NULL); + mutex_unlock(&vq->mutex); + } + /* Make sure cmds are not running before tearing them down. */ + vhost_scsi_flush(vs); + + for (i = 0; i < vs->dev.nvqs; i++) { + vq = &vs->vqs[i].vq; + vhost_scsi_destroy_vq_cmds(vq); + } + + /* + * We can now release our hold on the tpg and sessions and userspace + * can free them after this point. + */ + for (i = 0; i < VHOST_SCSI_MAX_TARGET; i++) { + target = i; + tpg = vs->vs_tpg[target]; + if (!tpg) + continue; + + mutex_lock(&tpg->tv_tpg_mutex); + tpg->tv_tpg_vhost_count--; tpg->vhost_scsi = NULL; vs->vs_tpg[target] = NULL; - match = true; + mutex_unlock(&tpg->tv_tpg_mutex); - /* - * Release se_tpg->tpg_group.cg_item configfs dependency now - * to allow vhost-scsi WWPN se_tpg->tpg_group shutdown to occur. - */ + se_tpg = &tpg->se_tpg; target_undepend_item(&se_tpg->tpg_group.cg_item); } - if (match) { - for (i = 0; i < vs->dev.nvqs; i++) { - vq = &vs->vqs[i].vq; - mutex_lock(&vq->mutex); - vhost_vq_set_backend(vq, NULL); - mutex_unlock(&vq->mutex); - } - /* Make sure cmds are not running before tearing them down. */ - vhost_scsi_flush(vs); - for (i = 0; i < vs->dev.nvqs; i++) { - vq = &vs->vqs[i].vq; - vhost_scsi_destroy_vq_cmds(vq); - } - } +free_vs_tpg: /* * Act as synchronize_rcu to make sure access to * old vs->vs_tpg is finished. @@ -1745,8 +1760,6 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs, mutex_unlock(&vhost_scsi_mutex); return 0; -err_tpg: - mutex_unlock(&tpg->tv_tpg_mutex); err_dev: mutex_unlock(&vs->dev.mutex); mutex_unlock(&vhost_scsi_mutex); -- cgit v1.2.3 From eb1b291466376ad4d2f0a42392a2cd9f0e4983e5 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 20 Mar 2023 21:06:21 -0500 Subject: vhost-scsi: Drop device mutex use in vhost_scsi_do_plug We don't need the device mutex in vhost_scsi_do_plug because: 1. we have the vhost_scsi_mutex so the tpg->vhost_scsi pointer will not change on us and the vhost_scsi can't be freed from under us if it was set. 2. vhost_scsi_clear_endpoint will stop the virtqueues and flush them while holding the vhost_scsi_mutex so we know once vhost_scsi_clear_endpoint has completed that vhost_scsi_do_plug can't send new events and any queued ones have completed. So this patch drops the device mutex use in vhost_scsi_do_plug. Signed-off-by: Mike Christie Message-Id: <20230321020624.13323-5-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/scsi.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/vhost') diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 502d6803df0b..c945136ecf18 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -2003,8 +2003,6 @@ vhost_scsi_do_plug(struct vhost_scsi_tpg *tpg, if (!vs) return; - mutex_lock(&vs->dev.mutex); - if (plug) reason = VIRTIO_SCSI_EVT_RESET_RESCAN; else @@ -2016,7 +2014,6 @@ vhost_scsi_do_plug(struct vhost_scsi_tpg *tpg, vhost_scsi_send_evt(vs, tpg, lun, VIRTIO_SCSI_T_TRANSPORT_RESET, reason); mutex_unlock(&vq->mutex); - mutex_unlock(&vs->dev.mutex); } static void vhost_scsi_hotplug(struct vhost_scsi_tpg *tpg, struct se_lun *lun) -- cgit v1.2.3 From ced9eb376ab716ec5b06bef8c3e05608b8eb6700 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 20 Mar 2023 21:06:22 -0500 Subject: vhost-scsi: Check for a cleared backend before queueing an event We currenly hold the vhost_scsi_mutex while clearing the endpoint and while performing vhost_scsi_do_plug, so tpg->vhost_scsi can't be freed from uder us, and to make sure anything queued is handled by the full call in vhost_scsi_clear_endpoint. This patch removes the need for the vhost_scsi_mutex for the latter case. In the next patches, we won't hold the vhost_scsi_mutex while flushing so this patch adds a check for the clearing of the virtqueue from vhost_scsi_clear_endpoint. We then know that once vhost_scsi_clear_endpoint has cleared the backend that no new events will be queued, and the flush after the vhost_vq_set_backend(vq, NULL) call will see everything that's been queued to that point. So the flush will then handle all events without the need for the vhost_scsi_mutex. Signed-off-by: Mike Christie Message-Id: <20230321020624.13323-6-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/scsi.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/vhost') diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index c945136ecf18..ba8097fcea43 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -2010,9 +2010,17 @@ vhost_scsi_do_plug(struct vhost_scsi_tpg *tpg, vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq; mutex_lock(&vq->mutex); + /* + * We can't queue events if the backend has been cleared, because + * we could end up queueing an event after the flush. + */ + if (!vhost_vq_get_backend(vq)) + goto unlock; + if (vhost_has_feature(vq, VIRTIO_SCSI_F_HOTPLUG)) vhost_scsi_send_evt(vs, tpg, lun, VIRTIO_SCSI_T_TRANSPORT_RESET, reason); +unlock: mutex_unlock(&vq->mutex); } -- cgit v1.2.3 From f5ed6f9e82ee62bf805551522dfa2d6c8030bb47 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 20 Mar 2023 21:06:23 -0500 Subject: vhost-scsi: Drop vhost_scsi_mutex use in port callouts We are using the vhost_scsi_mutex to make sure vhost_scsi_port_link and vhost_scsi_port_unlink see if vhost_scsi_clear_endpoint has cleared tpg->vhost_scsi and it can't be freed while they are using. However, we currently set the tpg->vhost_scsi pointer while holding tv_tpg_mutex. So, we can just hold that while calling vhost_scsi_hotplug/hotunplug. We then don't need to hold the vhost_scsi_mutex while vhost_scsi_clear_endpoint is holding it and doing a flush which could cause the LUN map/unmap to have to wait on another device's flush. Signed-off-by: Mike Christie Message-Id: <20230321020624.13323-7-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/scsi.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'drivers/vhost') diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index ba8097fcea43..d4372a4aff49 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -2040,15 +2040,10 @@ static int vhost_scsi_port_link(struct se_portal_group *se_tpg, struct vhost_scsi_tpg *tpg = container_of(se_tpg, struct vhost_scsi_tpg, se_tpg); - mutex_lock(&vhost_scsi_mutex); - mutex_lock(&tpg->tv_tpg_mutex); tpg->tv_tpg_port_count++; - mutex_unlock(&tpg->tv_tpg_mutex); - vhost_scsi_hotplug(tpg, lun); - - mutex_unlock(&vhost_scsi_mutex); + mutex_unlock(&tpg->tv_tpg_mutex); return 0; } @@ -2059,15 +2054,10 @@ static void vhost_scsi_port_unlink(struct se_portal_group *se_tpg, struct vhost_scsi_tpg *tpg = container_of(se_tpg, struct vhost_scsi_tpg, se_tpg); - mutex_lock(&vhost_scsi_mutex); - mutex_lock(&tpg->tv_tpg_mutex); tpg->tv_tpg_port_count--; - mutex_unlock(&tpg->tv_tpg_mutex); - vhost_scsi_hotunplug(tpg, lun); - - mutex_unlock(&vhost_scsi_mutex); + mutex_unlock(&tpg->tv_tpg_mutex); } static ssize_t vhost_scsi_tpg_attrib_fabric_prot_type_store( -- cgit v1.2.3 From bea273c7a8712a055cd504a2bb7d76d8d713ba6e Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 20 Mar 2023 21:06:24 -0500 Subject: vhost-scsi: Reduce vhost_scsi_mutex use We on longer need to hold the vhost_scsi_mutex the entire time we set/clear the endpoint. The tv_tpg_mutex handles tpg accesses not related to the tpg list, the port link/unlink functions use the tv_tpg_mutex while accessing the tpg->vhost_scsi pointer, vhost_scsi_do_plug will no longer queue events after the virtqueue's backend has been cleared and flushed, and we don't drop our refcount to the tpg until after we have stopped cmds and wait for outstanding cmds to complete. This moves the vhost_scsi_mutex use to it's documented use of being used to access the tpg list. We then don't need to hold it while a flush is being performed causing other device's vhost_scsi_set_endpoint and vhost_scsi_make_tpg/vhost_scsi_drop_tpg calls to have to wait on a flakey device. Signed-off-by: Mike Christie Message-Id: <20230321020624.13323-8-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/scsi.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'drivers/vhost') diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index d4372a4aff49..3b0b556c57ef 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -229,7 +229,10 @@ struct vhost_scsi_ctx { struct iov_iter out_iter; }; -/* Global spinlock to protect vhost_scsi TPG list for vhost IOCTL access */ +/* + * Global mutex to protect vhost_scsi TPG list for vhost IOCTLs and LIO + * configfs management operations. + */ static DEFINE_MUTEX(vhost_scsi_mutex); static LIST_HEAD(vhost_scsi_list); @@ -1526,7 +1529,7 @@ out: * vhost_scsi_tpg with an active struct vhost_scsi_nexus * * The lock nesting rule is: - * vhost_scsi_mutex -> vs->dev.mutex -> tpg->tv_tpg_mutex -> vq->mutex + * vs->dev.mutex -> vhost_scsi_mutex -> tpg->tv_tpg_mutex -> vq->mutex */ static int vhost_scsi_set_endpoint(struct vhost_scsi *vs, @@ -1540,7 +1543,6 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, int index, ret, i, len; bool match = false; - mutex_lock(&vhost_scsi_mutex); mutex_lock(&vs->dev.mutex); /* Verify that ring has been setup correctly. */ @@ -1561,6 +1563,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, if (vs->vs_tpg) memcpy(vs_tpg, vs->vs_tpg, len); + mutex_lock(&vhost_scsi_mutex); list_for_each_entry(tpg, &vhost_scsi_list, tv_tpg_list) { mutex_lock(&tpg->tv_tpg_mutex); if (!tpg->tpg_nexus) { @@ -1576,6 +1579,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, if (!strcmp(tv_tport->tport_name, t->vhost_wwpn)) { if (vs->vs_tpg && vs->vs_tpg[tpg->tport_tpgt]) { mutex_unlock(&tpg->tv_tpg_mutex); + mutex_unlock(&vhost_scsi_mutex); ret = -EEXIST; goto undepend; } @@ -1590,6 +1594,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, if (ret) { pr_warn("target_depend_item() failed: %d\n", ret); mutex_unlock(&tpg->tv_tpg_mutex); + mutex_unlock(&vhost_scsi_mutex); goto undepend; } tpg->tv_tpg_vhost_count++; @@ -1599,6 +1604,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, } mutex_unlock(&tpg->tv_tpg_mutex); } + mutex_unlock(&vhost_scsi_mutex); if (match) { memcpy(vs->vs_vhost_wwpn, t->vhost_wwpn, @@ -1654,7 +1660,6 @@ undepend: kfree(vs_tpg); out: mutex_unlock(&vs->dev.mutex); - mutex_unlock(&vhost_scsi_mutex); return ret; } @@ -1670,7 +1675,6 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs, int index, ret, i; u8 target; - mutex_lock(&vhost_scsi_mutex); mutex_lock(&vs->dev.mutex); /* Verify that ring has been setup correctly. */ for (index = 0; index < vs->dev.nvqs; ++index) { @@ -1757,12 +1761,10 @@ free_vs_tpg: vs->vs_tpg = NULL; WARN_ON(vs->vs_events_nr); mutex_unlock(&vs->dev.mutex); - mutex_unlock(&vhost_scsi_mutex); return 0; err_dev: mutex_unlock(&vs->dev.mutex); - mutex_unlock(&vhost_scsi_mutex); return ret; } -- cgit v1.2.3 From 5e68470f4e80a4120e9ecec408f6ab4ad386bd4a Mon Sep 17 00:00:00 2001 From: Xie Yongji Date: Thu, 23 Mar 2023 13:30:40 +0800 Subject: vdpa: Add eventfd for the vdpa callback Add eventfd for the vdpa callback so that user can signal it directly instead of triggering the callback. It will be used for vhost-vdpa case. Signed-off-by: Xie Yongji Message-Id: <20230323053043.35-9-xieyongji@bytedance.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vhost/vdpa.c | 2 ++ drivers/virtio/virtio_vdpa.c | 1 + include/linux/vdpa.h | 6 ++++++ 3 files changed, 9 insertions(+) (limited to 'drivers/vhost') diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 7be9d9d8f01c..9cd878e25cff 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -599,9 +599,11 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, if (vq->call_ctx.ctx) { cb.callback = vhost_vdpa_virtqueue_cb; cb.private = vq; + cb.trigger = vq->call_ctx.ctx; } else { cb.callback = NULL; cb.private = NULL; + cb.trigger = NULL; } ops->set_vq_cb(vdpa, idx, &cb); vhost_vdpa_setup_vq_irq(v, idx); diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c index f3826f42b704..2a095f37f26b 100644 --- a/drivers/virtio/virtio_vdpa.c +++ b/drivers/virtio/virtio_vdpa.c @@ -196,6 +196,7 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index, /* Setup virtqueue callback */ cb.callback = callback ? virtio_vdpa_virtqueue_cb : NULL; cb.private = info; + cb.trigger = NULL; ops->set_vq_cb(vdpa, index, &cb); ops->set_vq_num(vdpa, index, virtqueue_get_vring_size(vq)); diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 2313db735773..f2064fa2d2da 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -13,10 +13,16 @@ * struct vdpa_calllback - vDPA callback definition. * @callback: interrupt callback function * @private: the data passed to the callback function + * @trigger: the eventfd for the callback (Optional). + * When it is set, the vDPA driver must guarantee that + * signaling it is functional equivalent to triggering + * the callback. Then vDPA parent can signal it directly + * instead of triggering the callback. */ struct vdpa_callback { irqreturn_t (*callback)(void *data); void *private; + struct eventfd_ctx *trigger; }; /** -- cgit v1.2.3 From 905233af513c0c6971cae0f25280f23be1f6cbe4 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Fri, 31 Mar 2023 10:02:08 +0200 Subject: vringh: fix typos in the vringh_init_* documentation Replace `userpace` with `userspace`. Cc: Simon Horman Signed-off-by: Stefano Garzarella Message-Id: <20230331080208.17002-1-sgarzare@redhat.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Xuan Zhuo Acked-by: Jason Wang --- drivers/vhost/vringh.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'drivers/vhost') diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c index a1e27da54481..694462ba3242 100644 --- a/drivers/vhost/vringh.c +++ b/drivers/vhost/vringh.c @@ -636,9 +636,9 @@ static inline int xfer_to_user(const struct vringh *vrh, * @features: the feature bits for this ring. * @num: the number of elements. * @weak_barriers: true if we only need memory barriers, not I/O. - * @desc: the userpace descriptor pointer. - * @avail: the userpace avail pointer. - * @used: the userpace used pointer. + * @desc: the userspace descriptor pointer. + * @avail: the userspace avail pointer. + * @used: the userspace used pointer. * * Returns an error if num is invalid: you should check pointers * yourself! @@ -911,9 +911,9 @@ static inline int kern_xfer(const struct vringh *vrh, void *dst, * @features: the feature bits for this ring. * @num: the number of elements. * @weak_barriers: true if we only need memory barriers, not I/O. - * @desc: the userpace descriptor pointer. - * @avail: the userpace avail pointer. - * @used: the userpace used pointer. + * @desc: the userspace descriptor pointer. + * @avail: the userspace avail pointer. + * @used: the userspace used pointer. * * Returns an error if num is invalid. */ @@ -1306,9 +1306,9 @@ static inline int putused_iotlb(const struct vringh *vrh, * @features: the feature bits for this ring. * @num: the number of elements. * @weak_barriers: true if we only need memory barriers, not I/O. - * @desc: the userpace descriptor pointer. - * @avail: the userpace avail pointer. - * @used: the userpace used pointer. + * @desc: the userspace descriptor pointer. + * @avail: the userspace avail pointer. + * @used: the userspace used pointer. * * Returns an error if num is invalid. */ -- cgit v1.2.3 From 9067de4725a299bc1baf11de9f5040fdd0bd05c3 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 4 Apr 2023 15:13:19 +0200 Subject: vhost-vdpa: use bind_mm/unbind_mm device callbacks When the user call VHOST_SET_OWNER ioctl and the vDPA device has `use_va` set to true, let's call the bind_mm callback. In this way we can bind the device to the user address space and directly use the user VA. The unbind_mm callback is called during the release after stopping the device. Signed-off-by: Stefano Garzarella Message-Id: <20230404131326.44403-3-sgarzare@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vdpa.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'drivers/vhost') diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 9cd878e25cff..cd266fa80c4e 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -219,6 +219,28 @@ static int vhost_vdpa_reset(struct vhost_vdpa *v) return vdpa_reset(vdpa); } +static long vhost_vdpa_bind_mm(struct vhost_vdpa *v) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + + if (!vdpa->use_va || !ops->bind_mm) + return 0; + + return ops->bind_mm(vdpa, v->vdev.mm); +} + +static void vhost_vdpa_unbind_mm(struct vhost_vdpa *v) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + + if (!vdpa->use_va || !ops->unbind_mm) + return; + + ops->unbind_mm(vdpa); +} + static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp) { struct vdpa_device *vdpa = v->vdpa; @@ -718,6 +740,17 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, break; } + if (r) + goto out; + + switch (cmd) { + case VHOST_SET_OWNER: + r = vhost_vdpa_bind_mm(v); + if (r) + vhost_dev_reset_owner(d, NULL); + break; + } +out: mutex_unlock(&d->mutex); return r; } @@ -1289,6 +1322,7 @@ static int vhost_vdpa_release(struct inode *inode, struct file *filep) vhost_vdpa_clean_irq(v); vhost_vdpa_reset(v); vhost_dev_stop(&v->vdev); + vhost_vdpa_unbind_mm(v); vhost_vdpa_config_put(v); vhost_vdpa_cleanup(v); mutex_unlock(&d->mutex); -- cgit v1.2.3 From c0371782500c5314741da9ccbfbf0375a0d379fc Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 4 Apr 2023 15:13:20 +0200 Subject: vringh: replace kmap_atomic() with kmap_local_page() kmap_atomic() is deprecated in favor of kmap_local_page() since commit f3ba3c710ac5 ("mm/highmem: Provide kmap_local*"). With kmap_local_page() the mappings are per thread, CPU local, can take page-faults, and can be called from any context (including interrupts). Furthermore, the tasks can be preempted and, when they are scheduled to run again, the kernel virtual addresses are restored and still valid. kmap_atomic() is implemented like a kmap_local_page() which also disables page-faults and preemption (the latter only for !PREEMPT_RT kernels, otherwise it only disables migration). The code within the mappings/un-mappings in getu16_iotlb() and putu16_iotlb() don't depend on the above-mentioned side effects of kmap_atomic(), so that mere replacements of the old API with the new one is all that is required (i.e., there is no need to explicitly add calls to pagefault_disable() and/or preempt_disable()). This commit reuses a "boiler plate" commit message from Fabio, who has already did this change in several places. Cc: "Fabio M. De Francesco" Reviewed-by: Fabio M. De Francesco Acked-by: Jason Wang Signed-off-by: Stefano Garzarella Message-Id: <20230404131326.44403-4-sgarzare@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vringh.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/vhost') diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c index 694462ba3242..a98adc469f19 100644 --- a/drivers/vhost/vringh.c +++ b/drivers/vhost/vringh.c @@ -1220,10 +1220,10 @@ static inline int getu16_iotlb(const struct vringh *vrh, if (ret < 0) return ret; - kaddr = kmap_atomic(iov.bv_page); + kaddr = kmap_local_page(iov.bv_page); from = kaddr + iov.bv_offset; *val = vringh16_to_cpu(vrh, READ_ONCE(*(__virtio16 *)from)); - kunmap_atomic(kaddr); + kunmap_local(kaddr); return 0; } @@ -1241,10 +1241,10 @@ static inline int putu16_iotlb(const struct vringh *vrh, if (ret < 0) return ret; - kaddr = kmap_atomic(iov.bv_page); + kaddr = kmap_local_page(iov.bv_page); to = kaddr + iov.bv_offset; WRITE_ONCE(*(__virtio16 *)to, cpu_to_vringh16(vrh, val)); - kunmap_atomic(kaddr); + kunmap_local(kaddr); return 0; } -- cgit v1.2.3 From f609d6cbb36ab1c9c7434f67d555c57a2f7d3dde Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 4 Apr 2023 15:13:21 +0200 Subject: vringh: define the stride used for translation Define a macro to be reused in the different parts of the code. Useful for the next patches where we add more arrays to manage also translations with user VA. Suggested-by: Eugenio Perez Martin Signed-off-by: Stefano Garzarella Message-Id: <20230404131326.44403-5-sgarzare@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vringh.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/vhost') diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c index a98adc469f19..3d5f6bb4ac31 100644 --- a/drivers/vhost/vringh.c +++ b/drivers/vhost/vringh.c @@ -1141,13 +1141,15 @@ static int iotlb_translate(const struct vringh *vrh, return ret; } +#define IOTLB_IOV_STRIDE 16 + static inline int copy_from_iotlb(const struct vringh *vrh, void *dst, void *src, size_t len) { u64 total_translated = 0; while (total_translated < len) { - struct bio_vec iov[16]; + struct bio_vec iov[IOTLB_IOV_STRIDE]; struct iov_iter iter; u64 translated; int ret; @@ -1180,7 +1182,7 @@ static inline int copy_to_iotlb(const struct vringh *vrh, void *dst, u64 total_translated = 0; while (total_translated < len) { - struct bio_vec iov[16]; + struct bio_vec iov[IOTLB_IOV_STRIDE]; struct iov_iter iter; u64 translated; int ret; -- cgit v1.2.3 From 42823a871fd4e17b34034f43b36ae57bd2ed8a67 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 4 Apr 2023 15:17:16 +0200 Subject: vringh: support VA with iotlb MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vDPA supports the possibility to use user VA in the iotlb messages. So, let's add support for user VA in vringh to use it in the vDPA simulators. Acked-by: Eugenio PĂ©rez Signed-off-by: Stefano Garzarella Message-Id: <20230404131716.45855-1-sgarzare@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vringh.c | 171 ++++++++++++++++++++++++++++++++++++++++--------- include/linux/vringh.h | 9 +++ 2 files changed, 148 insertions(+), 32 deletions(-) (limited to 'drivers/vhost') diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c index 3d5f6bb4ac31..955d938eb663 100644 --- a/drivers/vhost/vringh.c +++ b/drivers/vhost/vringh.c @@ -1094,10 +1094,17 @@ EXPORT_SYMBOL(vringh_need_notify_kern); #if IS_REACHABLE(CONFIG_VHOST_IOTLB) +struct iotlb_vec { + union { + struct iovec *iovec; + struct bio_vec *bvec; + } iov; + size_t count; +}; + static int iotlb_translate(const struct vringh *vrh, u64 addr, u64 len, u64 *translated, - struct bio_vec iov[], - int iov_size, u32 perm) + struct iotlb_vec *ivec, u32 perm) { struct vhost_iotlb_map *map; struct vhost_iotlb *iotlb = vrh->iotlb; @@ -1107,9 +1114,11 @@ static int iotlb_translate(const struct vringh *vrh, spin_lock(vrh->iotlb_lock); while (len > s) { - u64 size, pa, pfn; + uintptr_t io_addr; + size_t io_len; + u64 size; - if (unlikely(ret >= iov_size)) { + if (unlikely(ret >= ivec->count)) { ret = -ENOBUFS; break; } @@ -1124,10 +1133,22 @@ static int iotlb_translate(const struct vringh *vrh, } size = map->size - addr + map->start; - pa = map->addr + addr - map->start; - pfn = pa >> PAGE_SHIFT; - bvec_set_page(&iov[ret], pfn_to_page(pfn), min(len - s, size), - pa & (PAGE_SIZE - 1)); + io_len = min(len - s, size); + io_addr = map->addr - map->start + addr; + + if (vrh->use_va) { + struct iovec *iovec = ivec->iov.iovec; + + iovec[ret].iov_len = io_len; + iovec[ret].iov_base = (void __user *)io_addr; + } else { + u64 pfn = io_addr >> PAGE_SHIFT; + struct bio_vec *bvec = ivec->iov.bvec; + + bvec_set_page(&bvec[ret], pfn_to_page(pfn), io_len, + io_addr & (PAGE_SIZE - 1)); + } + s += size; addr += size; ++ret; @@ -1146,23 +1167,36 @@ static int iotlb_translate(const struct vringh *vrh, static inline int copy_from_iotlb(const struct vringh *vrh, void *dst, void *src, size_t len) { + struct iotlb_vec ivec; + union { + struct iovec iovec[IOTLB_IOV_STRIDE]; + struct bio_vec bvec[IOTLB_IOV_STRIDE]; + } iov; u64 total_translated = 0; + ivec.iov.iovec = iov.iovec; + ivec.count = IOTLB_IOV_STRIDE; + while (total_translated < len) { - struct bio_vec iov[IOTLB_IOV_STRIDE]; struct iov_iter iter; u64 translated; int ret; ret = iotlb_translate(vrh, (u64)(uintptr_t)src, len - total_translated, &translated, - iov, ARRAY_SIZE(iov), VHOST_MAP_RO); + &ivec, VHOST_MAP_RO); if (ret == -ENOBUFS) - ret = ARRAY_SIZE(iov); + ret = IOTLB_IOV_STRIDE; else if (ret < 0) return ret; - iov_iter_bvec(&iter, ITER_SOURCE, iov, ret, translated); + if (vrh->use_va) { + iov_iter_init(&iter, ITER_SOURCE, ivec.iov.iovec, ret, + translated); + } else { + iov_iter_bvec(&iter, ITER_SOURCE, ivec.iov.bvec, ret, + translated); + } ret = copy_from_iter(dst, translated, &iter); if (ret < 0) @@ -1179,23 +1213,36 @@ static inline int copy_from_iotlb(const struct vringh *vrh, void *dst, static inline int copy_to_iotlb(const struct vringh *vrh, void *dst, void *src, size_t len) { + struct iotlb_vec ivec; + union { + struct iovec iovec[IOTLB_IOV_STRIDE]; + struct bio_vec bvec[IOTLB_IOV_STRIDE]; + } iov; u64 total_translated = 0; + ivec.iov.iovec = iov.iovec; + ivec.count = IOTLB_IOV_STRIDE; + while (total_translated < len) { - struct bio_vec iov[IOTLB_IOV_STRIDE]; struct iov_iter iter; u64 translated; int ret; ret = iotlb_translate(vrh, (u64)(uintptr_t)dst, len - total_translated, &translated, - iov, ARRAY_SIZE(iov), VHOST_MAP_WO); + &ivec, VHOST_MAP_WO); if (ret == -ENOBUFS) - ret = ARRAY_SIZE(iov); + ret = IOTLB_IOV_STRIDE; else if (ret < 0) return ret; - iov_iter_bvec(&iter, ITER_DEST, iov, ret, translated); + if (vrh->use_va) { + iov_iter_init(&iter, ITER_DEST, ivec.iov.iovec, ret, + translated); + } else { + iov_iter_bvec(&iter, ITER_DEST, ivec.iov.bvec, ret, + translated); + } ret = copy_to_iter(src, translated, &iter); if (ret < 0) @@ -1212,20 +1259,36 @@ static inline int copy_to_iotlb(const struct vringh *vrh, void *dst, static inline int getu16_iotlb(const struct vringh *vrh, u16 *val, const __virtio16 *p) { - struct bio_vec iov; - void *kaddr, *from; + struct iotlb_vec ivec; + union { + struct iovec iovec[1]; + struct bio_vec bvec[1]; + } iov; + __virtio16 tmp; int ret; + ivec.iov.iovec = iov.iovec; + ivec.count = 1; + /* Atomic read is needed for getu16 */ - ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL, - &iov, 1, VHOST_MAP_RO); + ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), + NULL, &ivec, VHOST_MAP_RO); if (ret < 0) return ret; - kaddr = kmap_local_page(iov.bv_page); - from = kaddr + iov.bv_offset; - *val = vringh16_to_cpu(vrh, READ_ONCE(*(__virtio16 *)from)); - kunmap_local(kaddr); + if (vrh->use_va) { + ret = __get_user(tmp, (__virtio16 __user *)ivec.iov.iovec[0].iov_base); + if (ret) + return ret; + } else { + void *kaddr = kmap_local_page(ivec.iov.bvec[0].bv_page); + void *from = kaddr + ivec.iov.bvec[0].bv_offset; + + tmp = READ_ONCE(*(__virtio16 *)from); + kunmap_local(kaddr); + } + + *val = vringh16_to_cpu(vrh, tmp); return 0; } @@ -1233,20 +1296,36 @@ static inline int getu16_iotlb(const struct vringh *vrh, static inline int putu16_iotlb(const struct vringh *vrh, __virtio16 *p, u16 val) { - struct bio_vec iov; - void *kaddr, *to; + struct iotlb_vec ivec; + union { + struct iovec iovec; + struct bio_vec bvec; + } iov; + __virtio16 tmp; int ret; + ivec.iov.iovec = &iov.iovec; + ivec.count = 1; + /* Atomic write is needed for putu16 */ - ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL, - &iov, 1, VHOST_MAP_WO); + ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), + NULL, &ivec, VHOST_MAP_RO); if (ret < 0) return ret; - kaddr = kmap_local_page(iov.bv_page); - to = kaddr + iov.bv_offset; - WRITE_ONCE(*(__virtio16 *)to, cpu_to_vringh16(vrh, val)); - kunmap_local(kaddr); + tmp = cpu_to_vringh16(vrh, val); + + if (vrh->use_va) { + ret = __put_user(tmp, (__virtio16 __user *)ivec.iov.iovec[0].iov_base); + if (ret) + return ret; + } else { + void *kaddr = kmap_local_page(ivec.iov.bvec[0].bv_page); + void *to = kaddr + ivec.iov.bvec[0].bv_offset; + + WRITE_ONCE(*(__virtio16 *)to, tmp); + kunmap_local(kaddr); + } return 0; } @@ -1320,11 +1399,39 @@ int vringh_init_iotlb(struct vringh *vrh, u64 features, struct vring_avail *avail, struct vring_used *used) { + vrh->use_va = false; + return vringh_init_kern(vrh, features, num, weak_barriers, desc, avail, used); } EXPORT_SYMBOL(vringh_init_iotlb); +/** + * vringh_init_iotlb_va - initialize a vringh for a ring with IOTLB containing + * user VA. + * @vrh: the vringh to initialize. + * @features: the feature bits for this ring. + * @num: the number of elements. + * @weak_barriers: true if we only need memory barriers, not I/O. + * @desc: the userspace descriptor pointer. + * @avail: the userspace avail pointer. + * @used: the userspace used pointer. + * + * Returns an error if num is invalid. + */ +int vringh_init_iotlb_va(struct vringh *vrh, u64 features, + unsigned int num, bool weak_barriers, + struct vring_desc *desc, + struct vring_avail *avail, + struct vring_used *used) +{ + vrh->use_va = true; + + return vringh_init_kern(vrh, features, num, weak_barriers, + desc, avail, used); +} +EXPORT_SYMBOL(vringh_init_iotlb_va); + /** * vringh_set_iotlb - initialize a vringh for a ring with IOTLB. * @vrh: the vring diff --git a/include/linux/vringh.h b/include/linux/vringh.h index 1991a02c6431..b4edfadf5479 100644 --- a/include/linux/vringh.h +++ b/include/linux/vringh.h @@ -32,6 +32,9 @@ struct vringh { /* Can we get away with weak barriers? */ bool weak_barriers; + /* Use user's VA */ + bool use_va; + /* Last available index we saw (ie. where we're up to). */ u16 last_avail_idx; @@ -284,6 +287,12 @@ int vringh_init_iotlb(struct vringh *vrh, u64 features, struct vring_avail *avail, struct vring_used *used); +int vringh_init_iotlb_va(struct vringh *vrh, u64 features, + unsigned int num, bool weak_barriers, + struct vring_desc *desc, + struct vring_avail *avail, + struct vring_used *used); + int vringh_getdesc_iotlb(struct vringh *vrh, struct vringh_kiov *riov, struct vringh_kiov *wiov, -- cgit v1.2.3 From c82729e06644f4e087f5ff0f91b8fb15e03b8890 Mon Sep 17 00:00:00 2001 From: Cindy Lu Date: Thu, 20 Apr 2023 23:17:34 +0800 Subject: vhost_vdpa: fix unmap process in no-batch mode While using the vdpa device with vIOMMU enabled in the guest VM, when the vdpa device bind to vfio-pci and run testpmd then system will fail to unmap. The test process is Load guest VM --> attach to virtio driver--> bind to vfio-pci driver So the mapping process is 1)batched mode map to normal MR 2)batched mode unmapped the normal MR 3)unmapped all the memory 4)mapped to iommu MR This error happened in step 3). The iotlb was freed in step 2) and the function vhost_vdpa_process_iotlb_msg will return fail Which causes failure. To fix this, we will not remove the AS while the iotlb->nmaps is 0. This will free in the vhost_vdpa_clean Cc: stable@vger.kernel.org Fixes: aaca8373c4b1 ("vhost-vdpa: support ASID based IOTLB API") Signed-off-by: Cindy Lu Message-Id: <20230420151734.860168-1-lulu@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vdpa.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'drivers/vhost') diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index cd266fa80c4e..5133004452a1 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -886,11 +886,7 @@ static void vhost_vdpa_unmap(struct vhost_vdpa *v, if (!v->in_batch) ops->set_map(vdpa, asid, iotlb); } - /* If we are in the middle of batch processing, delay the free - * of AS until BATCH_END. - */ - if (!v->in_batch && !iotlb->nmaps) - vhost_vdpa_remove_as(v, asid); + } static int vhost_vdpa_va_map(struct vhost_vdpa *v, @@ -1147,8 +1143,6 @@ static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, u32 asid, if (v->in_batch && ops->set_map) ops->set_map(vdpa, asid, iotlb); v->in_batch = false; - if (!iotlb->nmaps) - vhost_vdpa_remove_as(v, asid); break; default: r = -EINVAL; -- cgit v1.2.3