summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-05-16 01:13:02 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2026-05-16 01:13:02 +0300
commit3bf83e47b497d2630d2dcb408ec14ad95050cead (patch)
treed9fb350f33550a2e95dc81f44f4783ce5b0a3e69 /drivers
parentb0662be9131d87d8858d34d6134500e109dff958 (diff)
parent702809dabdecca807bdd50cfdcc1c980feb2ba62 (diff)
downloadlinux-3bf83e47b497d2630d2dcb408ec14ad95050cead.tar.xz
Merge tag 'vfio-v7.1-rc4' of https://github.com/awilliam/linux-vfio
Pull VFIO fixes from Alex Williamson: - Convert vfio-pci BAR resource requests and iomaps initialization from a lazy, on-demand model to an eager pre-allocation model to avoid races while preserving legacy error behavior. Fix unchecked barmap access in dma-buf export path (Matt Evans) - Introduce an implicit unsigned cast in converting vfio-pci device offsets to region indexes, closing a potential out-of-bounds access through the vfio_pci_ioeventfd() interface (Matt Evans) - Fix a dma-buf kref underflow and stuck wait_for_completion() when closing a previously revoked dma-buf (Alex Williamson) * tag 'vfio-v7.1-rc4' of https://github.com/awilliam/linux-vfio: vfio/pci: Check BAR resources before exporting a DMABUF vfio/pci: Set up BAR resources and maps in vfio_pci_core_enable() vfio/pci: Make VFIO_PCI_OFFSET_TO_INDEX() return unsigned vfio/pci: fix dma-buf kref underflow after revoke
Diffstat (limited to 'drivers')
-rw-r--r--drivers/vfio/pci/vfio_pci_core.c37
-rw-r--r--drivers/vfio/pci/vfio_pci_dmabuf.c42
-rw-r--r--drivers/vfio/pci/vfio_pci_rdwr.c26
3 files changed, 65 insertions, 40 deletions
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 3f8d093aacf8..050e7542952e 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -483,6 +483,40 @@ static int vfio_pci_core_runtime_resume(struct device *dev)
#endif /* CONFIG_PM */
/*
+ * Eager-request BAR resources, and iomap them. Soft failures are
+ * allowed, and consumers must check the barmap before use in order to
+ * give compatible user-visible behaviour with the previous on-demand
+ * allocation method.
+ */
+static void vfio_pci_core_map_bars(struct vfio_pci_core_device *vdev)
+{
+ struct pci_dev *pdev = vdev->pdev;
+ int i;
+
+ for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+ int bar = i + PCI_STD_RESOURCES;
+
+ vdev->barmap[bar] = IOMEM_ERR_PTR(-ENODEV);
+
+ if (!pci_resource_len(pdev, i))
+ continue;
+
+ if (pci_request_selected_regions(pdev, 1 << bar, "vfio")) {
+ pci_dbg(pdev, "Failed to reserve region %d\n", bar);
+ vdev->barmap[bar] = IOMEM_ERR_PTR(-EBUSY);
+ continue;
+ }
+
+ vdev->barmap[bar] = pci_iomap(pdev, bar, 0);
+ if (!vdev->barmap[bar]) {
+ pci_dbg(pdev, "Failed to iomap region %d\n", bar);
+ pci_release_selected_regions(pdev, 1 << bar);
+ vdev->barmap[bar] = IOMEM_ERR_PTR(-ENOMEM);
+ }
+ }
+}
+
+/*
* The pci-driver core runtime PM routines always save the device state
* before going into suspended state. If the device is going into low power
* state with only with runtime PM ops, then no explicit handling is needed
@@ -568,6 +602,7 @@ int vfio_pci_core_enable(struct vfio_pci_core_device *vdev)
if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev))
vdev->has_vga = true;
+ vfio_pci_core_map_bars(vdev);
return 0;
@@ -648,7 +683,7 @@ void vfio_pci_core_disable(struct vfio_pci_core_device *vdev)
for (i = 0; i < PCI_STD_NUM_BARS; i++) {
bar = i + PCI_STD_RESOURCES;
- if (!vdev->barmap[bar])
+ if (IS_ERR_OR_NULL(vdev->barmap[bar]))
continue;
pci_iounmap(pdev, vdev->barmap[bar]);
pci_release_selected_regions(pdev, 1 << bar);
diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c b/drivers/vfio/pci/vfio_pci_dmabuf.c
index f87fd32e4a01..1a177ce7de54 100644
--- a/drivers/vfio/pci/vfio_pci_dmabuf.c
+++ b/drivers/vfio/pci/vfio_pci_dmabuf.c
@@ -244,9 +244,11 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
return -EINVAL;
/*
- * For PCI the region_index is the BAR number like everything else.
+ * For PCI the region_index is the BAR number like everything
+ * else. Check that PCI resources have been claimed for it.
*/
- if (get_dma_buf.region_index >= VFIO_PCI_ROM_REGION_INDEX)
+ if (get_dma_buf.region_index >= VFIO_PCI_ROM_REGION_INDEX ||
+ vfio_pci_core_setup_barmap(vdev, get_dma_buf.region_index))
return -ENODEV;
dma_ranges = memdup_array_user(&arg->dma_ranges, get_dma_buf.nr_ranges,
@@ -354,19 +356,18 @@ void vfio_pci_dma_buf_move(struct vfio_pci_core_device *vdev, bool revoked)
if (revoked) {
kref_put(&priv->kref, vfio_pci_dma_buf_done);
wait_for_completion(&priv->comp);
- } else {
/*
- * Kref is initialize again, because when revoke
- * was performed the reference counter was decreased
- * to zero to trigger completion.
+ * Re-arm the registered kref reference and the
+ * completion so the post-revoke state matches the
+ * post-creation state. An un-revoke followed by a
+ * new mapping needs the kref to be non-zero before
+ * kref_get(), and vfio_pci_dma_buf_cleanup()
+ * delegates its drain back through this revoke
+ * path on a possibly-already-revoked dma-buf.
*/
kref_init(&priv->kref);
- /*
- * There is no need to wait as no mapping was
- * performed when the previous status was
- * priv->revoked == true.
- */
reinit_completion(&priv->comp);
+ } else {
dma_resv_lock(priv->dmabuf->resv, NULL);
priv->revoked = false;
dma_resv_unlock(priv->dmabuf->resv);
@@ -382,21 +383,22 @@ void vfio_pci_dma_buf_cleanup(struct vfio_pci_core_device *vdev)
struct vfio_pci_dma_buf *tmp;
down_write(&vdev->memory_lock);
+
+ /*
+ * Drain any active mappings via the revoke path. The move is
+ * idempotent for dma-bufs already in the revoked state and
+ * leaves every priv with the kref re-armed and the completion
+ * ready, so cleanup itself does not need to participate in kref
+ * bookkeeping.
+ */
+ vfio_pci_dma_buf_move(vdev, true);
+
list_for_each_entry_safe(priv, tmp, &vdev->dmabufs, dmabufs_elm) {
if (!get_file_active(&priv->dmabuf->file))
continue;
- dma_resv_lock(priv->dmabuf->resv, NULL);
list_del_init(&priv->dmabufs_elm);
priv->vdev = NULL;
- priv->revoked = true;
- dma_buf_invalidate_mappings(priv->dmabuf);
- dma_resv_wait_timeout(priv->dmabuf->resv,
- DMA_RESV_USAGE_BOOKKEEP, false,
- MAX_SCHEDULE_TIMEOUT);
- dma_resv_unlock(priv->dmabuf->resv);
- kref_put(&priv->kref, vfio_pci_dma_buf_done);
- wait_for_completion(&priv->comp);
vfio_device_put_registration(&vdev->vdev);
fput(priv->dmabuf->file);
}
diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c
index 4251ee03e146..3bfbb879a005 100644
--- a/drivers/vfio/pci/vfio_pci_rdwr.c
+++ b/drivers/vfio/pci/vfio_pci_rdwr.c
@@ -198,27 +198,15 @@ ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
}
EXPORT_SYMBOL_GPL(vfio_pci_core_do_io_rw);
+/*
+ * The barmap is set up in vfio_pci_core_enable(). Callers use this
+ * function to check that the BAR resources are requested or that the
+ * pci_iomap() was done.
+ */
int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar)
{
- struct pci_dev *pdev = vdev->pdev;
- int ret;
- void __iomem *io;
-
- if (vdev->barmap[bar])
- return 0;
-
- ret = pci_request_selected_regions(pdev, 1 << bar, "vfio");
- if (ret)
- return ret;
-
- io = pci_iomap(pdev, bar, 0);
- if (!io) {
- pci_release_selected_regions(pdev, 1 << bar);
- return -ENOMEM;
- }
-
- vdev->barmap[bar] = io;
-
+ if (IS_ERR(vdev->barmap[bar]))
+ return PTR_ERR(vdev->barmap[bar]);
return 0;
}
EXPORT_SYMBOL_GPL(vfio_pci_core_setup_barmap);