From 02256acf1e81e42f6338a39020bf2de9807c33d7 Mon Sep 17 00:00:00 2001 From: Joseph Salisbury Date: Mon, 16 Mar 2026 14:56:17 -0400 Subject: vfio: uapi: fix comment typo The file contains a spelling error in a source comment (succes). Typos in comments reduce readability and make text searches less reliable for developers and maintainers. Replace 'succes' with 'success' in the affected comment. This is a comment-only cleanup and does not change behavior. Signed-off-by: Joseph Salisbury Link: https://lore.kernel.org/r/20260316185617.166414-1-joseph.salisbury@oracle.com Signed-off-by: Alex Williamson --- include/uapi/linux/vfio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index bb7b89330d35..63d56c1fbf6f 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -141,7 +141,7 @@ struct vfio_info_cap_header { * * Retrieve information about the group. Fills in provided * struct vfio_group_info. Caller sets argsz. - * Return: 0 on succes, -errno on failure. + * Return: 0 on success, -errno on failure. * Availability: Always */ struct vfio_group_status { -- cgit v1.2.3 From d7140b5dde459048da52cfc0494228055f7e2fb8 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 17 Mar 2026 18:17:48 +0200 Subject: vfio: Define uAPI for re-init initial bytes during the PRE_COPY phase As currently defined, initial_bytes is monotonically decreasing and precedes dirty_bytes when reading from the saving file descriptor. The transition from initial_bytes to dirty_bytes is unidirectional and irreversible. The initial_bytes are considered as critical data that is highly recommended to be transferred to the target as part of PRE_COPY, without this data, the PRE_COPY phase would be ineffective. We come to solve the case when a new chunk of critical data is introduced during the PRE_COPY phase and the driver would like to report an entirely new value for the initial_bytes. For that, we extend the VFIO_MIG_GET_PRECOPY_INFO ioctl with an output flag named VFIO_PRECOPY_INFO_REINIT to allow drivers reporting a new initial_bytes value during the PRE_COPY phase. Currently, existing VFIO_MIG_GET_PRECOPY_INFO implementations don't assign info.flags before copy_to_user(), this effectively echoes userspace-provided flags back as output, preventing the field from being used to report new reliable data from the drivers. Reliable use of the new VFIO_PRECOPY_INFO_REINIT flag requires userspace to explicitly opt in by enabling the VFIO_DEVICE_FEATURE_MIG_PRECOPY_INFOv2 device feature. When the caller opts in, the driver may report an entirely new value for initial_bytes. It may be larger, it may be smaller, it may include the previous unread initial_bytes, it may discard the previous unread initial_bytes, up to the driver logic and state. The presence of the VFIO_PRECOPY_INFO_REINIT output flag set by the driver indicates that new initial data is present on the stream. Once the caller sees this flag, the initial_bytes value should be re-evaluated relative to the readiness state for transition to STOP_COPY. Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20260317161753.18964-2-yishaih@nvidia.com Signed-off-by: Alex Williamson --- include/uapi/linux/vfio.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 63d56c1fbf6f..5de618a3a5ee 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -1266,6 +1266,19 @@ enum vfio_device_mig_state { * The initial_bytes field indicates the amount of initial precopy * data available from the device. This field should have a non-zero initial * value and decrease as migration data is read from the device. + * The presence of the VFIO_PRECOPY_INFO_REINIT output flag indicates + * that new initial data is present on the stream. + * The new initial data may result, for example, from device reconfiguration + * during migration that requires additional initialization data. + * In that case initial_bytes may report a non-zero value irrespective of + * any previously reported values, which progresses towards zero as precopy + * data is read from the data stream. dirty_bytes is also reset + * to zero and represents the state change of the device relative to the new + * initial_bytes. + * VFIO_PRECOPY_INFO_REINIT can be reported only after userspace opts in to + * VFIO_DEVICE_FEATURE_MIG_PRECOPY_INFOv2. Without this opt-in, the flags field + * of struct vfio_precopy_info is reserved for bug-compatibility reasons. + * * It is recommended to leave PRE_COPY for STOP_COPY only after this field * reaches zero. Leaving PRE_COPY earlier might make things slower. * @@ -1301,6 +1314,7 @@ enum vfio_device_mig_state { struct vfio_precopy_info { __u32 argsz; __u32 flags; +#define VFIO_PRECOPY_INFO_REINIT (1 << 0) /* output - new initial data is present */ __aligned_u64 initial_bytes; __aligned_u64 dirty_bytes; }; @@ -1510,6 +1524,16 @@ struct vfio_device_feature_dma_buf { struct vfio_region_dma_range dma_ranges[] __counted_by(nr_ranges); }; +/* + * Enables the migration precopy_info_v2 behaviour. + * + * VFIO_DEVICE_FEATURE_MIG_PRECOPY_INFOv2. + * + * On SET, enables the v2 pre_copy_info behaviour, where the + * vfio_precopy_info.flags is a valid output field. + */ +#define VFIO_DEVICE_FEATURE_MIG_PRECOPY_INFOv2 12 + /* -------- API for Type1 VFIO IOMMU -------- */ /** -- cgit v1.2.3 From 50ff3f404617c5d15832fec3711978104c4c9efd Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 17 Mar 2026 18:17:49 +0200 Subject: vfio: Add support for VFIO_DEVICE_FEATURE_MIG_PRECOPY_INFOv2 Currently, existing VFIO_MIG_GET_PRECOPY_INFO implementations don't assign info.flags before copy_to_user(). Because they copy the struct in from userspace first, this effectively echoes userspace-provided flags back as output, preventing the field from being used to report new reliable data from the drivers. Add support for a new device feature named VFIO_DEVICE_FEATURE_MIG_PRECOPY_INFOv2. On SET, enables the v2 pre_copy_info behaviour, where the vfio_precopy_info.flags is a valid output field. Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20260317161753.18964-3-yishaih@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/vfio_main.c | 21 +++++++++++++++++++++ include/linux/vfio.h | 1 + 2 files changed, 22 insertions(+) (limited to 'include') diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 742477546b15..8666f35fb3f0 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -553,6 +553,7 @@ static void vfio_df_device_last_close(struct vfio_device_file *df) vfio_df_iommufd_unbind(df); else vfio_device_group_unuse_iommu(device); + device->precopy_info_v2 = 0; module_put(device->dev->driver->owner); } @@ -964,6 +965,23 @@ vfio_ioctl_device_feature_migration_data_size(struct vfio_device *device, return 0; } +static int +vfio_ioctl_device_feature_migration_precopy_info_v2(struct vfio_device *device, + u32 flags, size_t argsz) +{ + int ret; + + if (!(device->migration_flags & VFIO_MIGRATION_PRE_COPY)) + return -EINVAL; + + ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_SET, 0); + if (ret != 1) + return ret; + + device->precopy_info_v2 = 1; + return 0; +} + static int vfio_ioctl_device_feature_migration(struct vfio_device *device, u32 flags, void __user *arg, size_t argsz) @@ -1251,6 +1269,9 @@ static int vfio_ioctl_device_feature(struct vfio_device *device, return vfio_ioctl_device_feature_migration_data_size( device, feature.flags, arg->data, feature.argsz - minsz); + case VFIO_DEVICE_FEATURE_MIG_PRECOPY_INFOv2: + return vfio_ioctl_device_feature_migration_precopy_info_v2( + device, feature.flags, feature.argsz - minsz); default: if (unlikely(!device->ops->device_feature)) return -ENOTTY; diff --git a/include/linux/vfio.h b/include/linux/vfio.h index e90859956514..7c1d33283e04 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -52,6 +52,7 @@ struct vfio_device { struct vfio_device_set *dev_set; struct list_head dev_set_list; unsigned int migration_flags; + u8 precopy_info_v2; struct kvm *kvm; /* Members below here are private, not for driver use */ -- cgit v1.2.3 From c995498636c704641c9e809c31b59445b48f7adc Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 17 Mar 2026 18:17:50 +0200 Subject: vfio: Adapt drivers to use the core helper vfio_check_precopy_ioctl Introduce a core helper function for VFIO_MIG_GET_PRECOPY_INFO and adapt all drivers to use it. It centralizes the common code and ensures that output flags are cleared on entry, in case user opts in to VFIO_DEVICE_FEATURE_MIG_PRECOPY_INFOv2. This preventing any unintended echoing of userspace data back to userspace. Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20260317161753.18964-4-yishaih@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 17 ++++------- drivers/vfio/pci/mlx5/main.c | 18 ++++-------- drivers/vfio/pci/qat/main.c | 17 ++++------- drivers/vfio/pci/virtio/migrate.c | 17 ++++------- include/linux/vfio.h | 39 ++++++++++++++++++++++++++ samples/vfio-mdev/mtty.c | 16 ++++------- 6 files changed, 68 insertions(+), 56 deletions(-) (limited to 'include') diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c index 1d367cff7dcf..bb121f635b9f 100644 --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c @@ -857,18 +857,12 @@ static long hisi_acc_vf_precopy_ioctl(struct file *filp, struct hisi_acc_vf_core_device *hisi_acc_vdev = migf->hisi_acc_vdev; loff_t *pos = &filp->f_pos; struct vfio_precopy_info info; - unsigned long minsz; int ret; - if (cmd != VFIO_MIG_GET_PRECOPY_INFO) - return -ENOTTY; - - minsz = offsetofend(struct vfio_precopy_info, dirty_bytes); - - if (copy_from_user(&info, (void __user *)arg, minsz)) - return -EFAULT; - if (info.argsz < minsz) - return -EINVAL; + ret = vfio_check_precopy_ioctl(&hisi_acc_vdev->core_device.vdev, cmd, + arg, &info); + if (ret) + return ret; mutex_lock(&hisi_acc_vdev->state_mutex); if (hisi_acc_vdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY) { @@ -893,7 +887,8 @@ static long hisi_acc_vf_precopy_ioctl(struct file *filp, mutex_unlock(&migf->lock); mutex_unlock(&hisi_acc_vdev->state_mutex); - return copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0; + return copy_to_user((void __user *)arg, &info, + offsetofend(struct vfio_precopy_info, dirty_bytes)) ? -EFAULT : 0; out: mutex_unlock(&migf->lock); mutex_unlock(&hisi_acc_vdev->state_mutex); diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c index dbba6173894b..fb541c17c712 100644 --- a/drivers/vfio/pci/mlx5/main.c +++ b/drivers/vfio/pci/mlx5/main.c @@ -463,21 +463,14 @@ static long mlx5vf_precopy_ioctl(struct file *filp, unsigned int cmd, struct mlx5_vhca_data_buffer *buf; struct vfio_precopy_info info = {}; loff_t *pos = &filp->f_pos; - unsigned long minsz; size_t inc_length = 0; bool end_of_data = false; int ret; - if (cmd != VFIO_MIG_GET_PRECOPY_INFO) - return -ENOTTY; - - minsz = offsetofend(struct vfio_precopy_info, dirty_bytes); - - if (copy_from_user(&info, (void __user *)arg, minsz)) - return -EFAULT; - - if (info.argsz < minsz) - return -EINVAL; + ret = vfio_check_precopy_ioctl(&mvdev->core_device.vdev, cmd, arg, + &info); + if (ret) + return ret; mutex_lock(&mvdev->state_mutex); if (mvdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY && @@ -545,7 +538,8 @@ static long mlx5vf_precopy_ioctl(struct file *filp, unsigned int cmd, done: mlx5vf_state_mutex_unlock(mvdev); - if (copy_to_user((void __user *)arg, &info, minsz)) + if (copy_to_user((void __user *)arg, &info, + offsetofend(struct vfio_precopy_info, dirty_bytes))) return -EFAULT; return 0; diff --git a/drivers/vfio/pci/qat/main.c b/drivers/vfio/pci/qat/main.c index b982d4ae666c..b3a4b7a55696 100644 --- a/drivers/vfio/pci/qat/main.c +++ b/drivers/vfio/pci/qat/main.c @@ -121,18 +121,12 @@ static long qat_vf_precopy_ioctl(struct file *filp, unsigned int cmd, struct qat_mig_dev *mig_dev = qat_vdev->mdev; struct vfio_precopy_info info; loff_t *pos = &filp->f_pos; - unsigned long minsz; int ret = 0; - if (cmd != VFIO_MIG_GET_PRECOPY_INFO) - return -ENOTTY; - - minsz = offsetofend(struct vfio_precopy_info, dirty_bytes); - - if (copy_from_user(&info, (void __user *)arg, minsz)) - return -EFAULT; - if (info.argsz < minsz) - return -EINVAL; + ret = vfio_check_precopy_ioctl(&qat_vdev->core_device.vdev, cmd, arg, + &info); + if (ret) + return ret; mutex_lock(&qat_vdev->state_mutex); if (qat_vdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY && @@ -160,7 +154,8 @@ out: mutex_unlock(&qat_vdev->state_mutex); if (ret) return ret; - return copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0; + return copy_to_user((void __user *)arg, &info, + offsetofend(struct vfio_precopy_info, dirty_bytes)) ? -EFAULT : 0; } static ssize_t qat_vf_save_read(struct file *filp, char __user *buf, diff --git a/drivers/vfio/pci/virtio/migrate.c b/drivers/vfio/pci/virtio/migrate.c index 35fa2d6ed611..7e11834ad512 100644 --- a/drivers/vfio/pci/virtio/migrate.c +++ b/drivers/vfio/pci/virtio/migrate.c @@ -443,19 +443,13 @@ static long virtiovf_precopy_ioctl(struct file *filp, unsigned int cmd, struct vfio_precopy_info info = {}; loff_t *pos = &filp->f_pos; bool end_of_data = false; - unsigned long minsz; u32 ctx_size = 0; int ret; - if (cmd != VFIO_MIG_GET_PRECOPY_INFO) - return -ENOTTY; - - minsz = offsetofend(struct vfio_precopy_info, dirty_bytes); - if (copy_from_user(&info, (void __user *)arg, minsz)) - return -EFAULT; - - if (info.argsz < minsz) - return -EINVAL; + ret = vfio_check_precopy_ioctl(&virtvdev->core_device.vdev, cmd, arg, + &info); + if (ret) + return ret; mutex_lock(&virtvdev->state_mutex); if (virtvdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY && @@ -514,7 +508,8 @@ static long virtiovf_precopy_ioctl(struct file *filp, unsigned int cmd, done: virtiovf_state_mutex_unlock(virtvdev); - if (copy_to_user((void __user *)arg, &info, minsz)) + if (copy_to_user((void __user *)arg, &info, + offsetofend(struct vfio_precopy_info, dirty_bytes))) return -EFAULT; return 0; diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 7c1d33283e04..50b474334a19 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -16,6 +16,7 @@ #include #include #include +#include struct kvm; struct iommufd_ctx; @@ -285,6 +286,44 @@ static inline int vfio_check_feature(u32 flags, size_t argsz, u32 supported_ops, return 1; } +/** + * vfio_check_precopy_ioctl - Validate user input for the VFIO_MIG_GET_PRECOPY_INFO ioctl + * @vdev: The vfio device + * @cmd: Cmd from the ioctl + * @arg: Arg from the ioctl + * @info: Driver pointer to hold the userspace input to the ioctl + * + * For use in a driver's get_precopy_info. Checks that the inputs to the + * VFIO_MIG_GET_PRECOPY_INFO ioctl are correct. + + * Returns 0 on success, otherwise errno. + */ + +static inline int +vfio_check_precopy_ioctl(struct vfio_device *vdev, unsigned int cmd, + unsigned long arg, struct vfio_precopy_info *info) +{ + unsigned long minsz; + + if (cmd != VFIO_MIG_GET_PRECOPY_INFO) + return -ENOTTY; + + minsz = offsetofend(struct vfio_precopy_info, dirty_bytes); + + if (copy_from_user(info, (void __user *)arg, minsz)) + return -EFAULT; + + if (info->argsz < minsz) + return -EINVAL; + + /* keep v1 behaviour as is for compatibility reasons */ + if (vdev->precopy_info_v2) + /* flags are output, set its initial value to 0 */ + info->flags = 0; + + return 0; +} + struct vfio_device *_vfio_alloc_device(size_t size, struct device *dev, const struct vfio_device_ops *ops); #define vfio_alloc_device(dev_struct, member, dev, ops) \ diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c index 01a9db84c4ab..69b6d9defbce 100644 --- a/samples/vfio-mdev/mtty.c +++ b/samples/vfio-mdev/mtty.c @@ -840,18 +840,11 @@ static long mtty_precopy_ioctl(struct file *filp, unsigned int cmd, struct mdev_state *mdev_state = migf->mdev_state; loff_t *pos = &filp->f_pos; struct vfio_precopy_info info = {}; - unsigned long minsz; int ret; - if (cmd != VFIO_MIG_GET_PRECOPY_INFO) - return -ENOTTY; - - minsz = offsetofend(struct vfio_precopy_info, dirty_bytes); - - if (copy_from_user(&info, (void __user *)arg, minsz)) - return -EFAULT; - if (info.argsz < minsz) - return -EINVAL; + ret = vfio_check_precopy_ioctl(&mdev_state->vdev, cmd, arg, &info); + if (ret) + return ret; mutex_lock(&mdev_state->state_mutex); if (mdev_state->state != VFIO_DEVICE_STATE_PRE_COPY && @@ -878,7 +871,8 @@ static long mtty_precopy_ioctl(struct file *filp, unsigned int cmd, info.initial_bytes = migf->filled_size - *pos; mutex_unlock(&migf->lock); - ret = copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0; + ret = copy_to_user((void __user *)arg, &info, + offsetofend(struct vfio_precopy_info, dirty_bytes)) ? -EFAULT : 0; unlock: mtty_state_mutex_unlock(mdev_state); return ret; -- cgit v1.2.3 From 4bee09a5dbd14e3369926b14b4ee14e22ebfc1f6 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 17 Mar 2026 18:17:51 +0200 Subject: net/mlx5: Add IFC bits for migration state Add the relevant IFC bits for querying an extra migration state from the device. Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20260317161753.18964-5-yishaih@nvidia.com Signed-off-by: Alex Williamson --- include/linux/mlx5/mlx5_ifc.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 775cb0c56865..1c8922c58c8f 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2173,7 +2173,8 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 sf_eq_usage[0x1]; u8 reserved_at_d3[0x5]; u8 multiplane[0x1]; - u8 reserved_at_d9[0x7]; + u8 migration_state[0x1]; + u8 reserved_at_da[0x6]; u8 cross_vhca_object_to_object_supported[0x20]; @@ -13280,13 +13281,24 @@ struct mlx5_ifc_query_vhca_migration_state_in_bits { u8 reserved_at_60[0x20]; }; +enum { + MLX5_QUERY_VHCA_MIG_STATE_UNINITIALIZED = 0x0, + MLX5_QUERY_VHCA_MIG_STATE_OPER_MIGRATION_IDLE = 0x1, + MLX5_QUERY_VHCA_MIG_STATE_OPER_MIGRATION_READY = 0x2, + MLX5_QUERY_VHCA_MIG_STATE_OPER_MIGRATION_DIRTY = 0x3, + MLX5_QUERY_VHCA_MIG_STATE_OPER_MIGRATION_INIT = 0x4, +}; + struct mlx5_ifc_query_vhca_migration_state_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; u8 syndrome[0x20]; - u8 reserved_at_40[0x40]; + u8 reserved_at_40[0x20]; + + u8 migration_state[0x4]; + u8 reserved_at_64[0x1c]; u8 required_umem_size[0x20]; -- cgit v1.2.3 From 555aa178f8d22261d71da74df6267e6e6e97f95a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 27 Mar 2026 17:55:08 +0100 Subject: vfio: unhide vdev->debug_root When debugfs is disabled, the hisilicon driver now fails to build: drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c: In function 'hisi_acc_vfio_debug_init': drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c:1671:62: error: 'struct vfio_device' has no member named 'debug_root' 1671 | vfio_dev_migration = debugfs_lookup("migration", vdev->debug_root); | ^~ The driver otherwise relies on dead-code elimination, but this reference fails. The single struct member is not going to make much of a difference for memory consumption, so just keep this visible unconditionally. Signed-off-by: Arnd Bergmann Fixes: b398f91779b8 ("hisi_acc_vfio_pci: register debugfs for hisilicon migration driver") Link: https://lore.kernel.org/r/20260327165521.3779707-1-arnd@kernel.org Signed-off-by: Alex Williamson --- include/linux/vfio.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 50b474334a19..31b826efba00 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -74,13 +74,11 @@ struct vfio_device { u8 iommufd_attached:1; #endif u8 cdev_opened:1; -#ifdef CONFIG_DEBUG_FS /* * debug_root is a static property of the vfio_device * which must be set prior to registering the vfio_device. */ struct dentry *debug_root; -#endif }; /** -- cgit v1.2.3