summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/vfio/Kconfig1
-rw-r--r--drivers/vfio/pci/vfio_pci_core.c5
-rw-r--r--drivers/vfio/vfio_main.c175
-rw-r--r--include/linux/vfio.h28
4 files changed, 207 insertions, 2 deletions
diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index 6130d00252ed..86c381ceb9a1 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -3,6 +3,7 @@ menuconfig VFIO
tristate "VFIO Non-Privileged userspace driver framework"
select IOMMU_API
select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64)
+ select INTERVAL_TREE
help
VFIO provides a framework for secure userspace device drivers.
See Documentation/driver-api/vfio.rst for more details.
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 0d4b49f06b14..0a801aee2f2d 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -2128,6 +2128,11 @@ int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev)
return -EINVAL;
}
+ if (vdev->vdev.log_ops && !(vdev->vdev.log_ops->log_start &&
+ vdev->vdev.log_ops->log_stop &&
+ vdev->vdev.log_ops->log_read_and_clear))
+ return -EINVAL;
+
/*
* Prevent binding to PFs with VFs enabled, the VFs might be in use
* by the host or other users. We cannot capture the VFs if they
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index 77264d836d52..27d9186f35d5 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -33,6 +33,8 @@
#include <linux/wait.h>
#include <linux/sched/signal.h>
#include <linux/pm_runtime.h>
+#include <linux/interval_tree.h>
+#include <linux/iova_bitmap.h>
#include "vfio.h"
#define DRIVER_VERSION "0.3"
@@ -1658,6 +1660,167 @@ static int vfio_ioctl_device_feature_migration(struct vfio_device *device,
return 0;
}
+/* Ranges should fit into a single kernel page */
+#define LOG_MAX_RANGES \
+ (PAGE_SIZE / sizeof(struct vfio_device_feature_dma_logging_range))
+
+static int
+vfio_ioctl_device_feature_logging_start(struct vfio_device *device,
+ u32 flags, void __user *arg,
+ size_t argsz)
+{
+ size_t minsz =
+ offsetofend(struct vfio_device_feature_dma_logging_control,
+ ranges);
+ struct vfio_device_feature_dma_logging_range __user *ranges;
+ struct vfio_device_feature_dma_logging_control control;
+ struct vfio_device_feature_dma_logging_range range;
+ struct rb_root_cached root = RB_ROOT_CACHED;
+ struct interval_tree_node *nodes;
+ u64 iova_end;
+ u32 nnodes;
+ int i, ret;
+
+ if (!device->log_ops)
+ return -ENOTTY;
+
+ ret = vfio_check_feature(flags, argsz,
+ VFIO_DEVICE_FEATURE_SET,
+ sizeof(control));
+ if (ret != 1)
+ return ret;
+
+ if (copy_from_user(&control, arg, minsz))
+ return -EFAULT;
+
+ nnodes = control.num_ranges;
+ if (!nnodes)
+ return -EINVAL;
+
+ if (nnodes > LOG_MAX_RANGES)
+ return -E2BIG;
+
+ ranges = u64_to_user_ptr(control.ranges);
+ nodes = kmalloc_array(nnodes, sizeof(struct interval_tree_node),
+ GFP_KERNEL);
+ if (!nodes)
+ return -ENOMEM;
+
+ for (i = 0; i < nnodes; i++) {
+ if (copy_from_user(&range, &ranges[i], sizeof(range))) {
+ ret = -EFAULT;
+ goto end;
+ }
+ if (!IS_ALIGNED(range.iova, control.page_size) ||
+ !IS_ALIGNED(range.length, control.page_size)) {
+ ret = -EINVAL;
+ goto end;
+ }
+
+ if (check_add_overflow(range.iova, range.length, &iova_end) ||
+ iova_end > ULONG_MAX) {
+ ret = -EOVERFLOW;
+ goto end;
+ }
+
+ nodes[i].start = range.iova;
+ nodes[i].last = range.iova + range.length - 1;
+ if (interval_tree_iter_first(&root, nodes[i].start,
+ nodes[i].last)) {
+ /* Range overlapping */
+ ret = -EINVAL;
+ goto end;
+ }
+ interval_tree_insert(nodes + i, &root);
+ }
+
+ ret = device->log_ops->log_start(device, &root, nnodes,
+ &control.page_size);
+ if (ret)
+ goto end;
+
+ if (copy_to_user(arg, &control, sizeof(control))) {
+ ret = -EFAULT;
+ device->log_ops->log_stop(device);
+ }
+
+end:
+ kfree(nodes);
+ return ret;
+}
+
+static int
+vfio_ioctl_device_feature_logging_stop(struct vfio_device *device,
+ u32 flags, void __user *arg,
+ size_t argsz)
+{
+ int ret;
+
+ if (!device->log_ops)
+ return -ENOTTY;
+
+ ret = vfio_check_feature(flags, argsz,
+ VFIO_DEVICE_FEATURE_SET, 0);
+ if (ret != 1)
+ return ret;
+
+ return device->log_ops->log_stop(device);
+}
+
+static int vfio_device_log_read_and_clear(struct iova_bitmap *iter,
+ unsigned long iova, size_t length,
+ void *opaque)
+{
+ struct vfio_device *device = opaque;
+
+ return device->log_ops->log_read_and_clear(device, iova, length, iter);
+}
+
+static int
+vfio_ioctl_device_feature_logging_report(struct vfio_device *device,
+ u32 flags, void __user *arg,
+ size_t argsz)
+{
+ size_t minsz =
+ offsetofend(struct vfio_device_feature_dma_logging_report,
+ bitmap);
+ struct vfio_device_feature_dma_logging_report report;
+ struct iova_bitmap *iter;
+ u64 iova_end;
+ int ret;
+
+ if (!device->log_ops)
+ return -ENOTTY;
+
+ ret = vfio_check_feature(flags, argsz,
+ VFIO_DEVICE_FEATURE_GET,
+ sizeof(report));
+ if (ret != 1)
+ return ret;
+
+ if (copy_from_user(&report, arg, minsz))
+ return -EFAULT;
+
+ if (report.page_size < SZ_4K || !is_power_of_2(report.page_size))
+ return -EINVAL;
+
+ if (check_add_overflow(report.iova, report.length, &iova_end) ||
+ iova_end > ULONG_MAX)
+ return -EOVERFLOW;
+
+ iter = iova_bitmap_alloc(report.iova, report.length,
+ report.page_size,
+ u64_to_user_ptr(report.bitmap));
+ if (IS_ERR(iter))
+ return PTR_ERR(iter);
+
+ ret = iova_bitmap_for_each(iter, device,
+ vfio_device_log_read_and_clear);
+
+ iova_bitmap_free(iter);
+ return ret;
+}
+
static int vfio_ioctl_device_feature(struct vfio_device *device,
struct vfio_device_feature __user *arg)
{
@@ -1691,6 +1854,18 @@ static int vfio_ioctl_device_feature(struct vfio_device *device,
return vfio_ioctl_device_feature_mig_device_state(
device, feature.flags, arg->data,
feature.argsz - minsz);
+ case VFIO_DEVICE_FEATURE_DMA_LOGGING_START:
+ return vfio_ioctl_device_feature_logging_start(
+ device, feature.flags, arg->data,
+ feature.argsz - minsz);
+ case VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP:
+ return vfio_ioctl_device_feature_logging_stop(
+ device, feature.flags, arg->data,
+ feature.argsz - minsz);
+ case VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT:
+ return vfio_ioctl_device_feature_logging_report(
+ device, feature.flags, arg->data,
+ feature.argsz - minsz);
default:
if (unlikely(!device->ops->device_feature))
return -EINVAL;
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index e05ddc6fe6a5..0e2826559091 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -14,6 +14,7 @@
#include <linux/workqueue.h>
#include <linux/poll.h>
#include <uapi/linux/vfio.h>
+#include <linux/iova_bitmap.h>
struct kvm;
@@ -33,10 +34,11 @@ struct vfio_device {
struct device *dev;
const struct vfio_device_ops *ops;
/*
- * mig_ops is a static property of the vfio_device which must be set
- * prior to registering the vfio_device.
+ * mig_ops/log_ops is a static property of the vfio_device which must
+ * be set prior to registering the vfio_device.
*/
const struct vfio_migration_ops *mig_ops;
+ const struct vfio_log_ops *log_ops;
struct vfio_group *group;
struct vfio_device_set *dev_set;
struct list_head dev_set_list;
@@ -109,6 +111,28 @@ struct vfio_migration_ops {
};
/**
+ * @log_start: Optional callback to ask the device start DMA logging.
+ * @log_stop: Optional callback to ask the device stop DMA logging.
+ * @log_read_and_clear: Optional callback to ask the device read
+ * and clear the dirty DMAs in some given range.
+ *
+ * The vfio core implementation of the DEVICE_FEATURE_DMA_LOGGING_ set
+ * of features does not track logging state relative to the device,
+ * therefore the device implementation of vfio_log_ops must handle
+ * arbitrary user requests. This includes rejecting subsequent calls
+ * to log_start without an intervening log_stop, as well as graceful
+ * handling of log_stop and log_read_and_clear from invalid states.
+ */
+struct vfio_log_ops {
+ int (*log_start)(struct vfio_device *device,
+ struct rb_root_cached *ranges, u32 nnodes, u64 *page_size);
+ int (*log_stop)(struct vfio_device *device);
+ int (*log_read_and_clear)(struct vfio_device *device,
+ unsigned long iova, unsigned long length,
+ struct iova_bitmap *dirty);
+};
+
+/**
* vfio_check_feature - Validate user input for the VFIO_DEVICE_FEATURE ioctl
* @flags: Arg from the device_feature op
* @argsz: Arg from the device_feature op