summaryrefslogtreecommitdiff
path: root/drivers/vfio
diff options
context:
space:
mode:
authorYishai Hadas <yishaih@nvidia.com>2024-03-06 13:56:24 +0300
committerAlex Williamson <alex.williamson@redhat.com>2024-03-11 21:02:59 +0300
commit821b8f6bf84897aeceb230ff4d446b8aa5336acd (patch)
tree27dc1ac028a13ba6f732c28aec9df706bfddb12c /drivers/vfio
parent626f534d774caaccd6861fbccee45a1a6a8781b3 (diff)
downloadlinux-821b8f6bf84897aeceb230ff4d446b8aa5336acd.tar.xz
vfio/mlx5: Enforce PRE_COPY support
Enable live migration only once the firmware supports PRE_COPY. PRE_COPY has been supported by the firmware for a long time already [1] and is required to achieve a low downtime upon live migration. This lets us clean up some old code that is not applicable those days while PRE_COPY is fully supported by the firmware. [1] The minimum firmware version that supports PRE_COPY is 28.36.1010, it was released in January 2023. No firmware without PRE_COPY support ever available to users. Signed-off-by: Yishai Hadas <yishaih@nvidia.com> Link: https://lore.kernel.org/r/20240306105624.114830-1-yishaih@nvidia.com Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Diffstat (limited to 'drivers/vfio')
-rw-r--r--drivers/vfio/pci/mlx5/cmd.c83
-rw-r--r--drivers/vfio/pci/mlx5/cmd.h6
-rw-r--r--drivers/vfio/pci/mlx5/main.c109
3 files changed, 71 insertions, 127 deletions
diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c
index c54bcd5d0917..41a4b0cf4297 100644
--- a/drivers/vfio/pci/mlx5/cmd.c
+++ b/drivers/vfio/pci/mlx5/cmd.c
@@ -233,6 +233,10 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev,
if (!MLX5_CAP_GEN(mvdev->mdev, migration))
goto end;
+ if (!(MLX5_CAP_GEN_2(mvdev->mdev, migration_multi_load) &&
+ MLX5_CAP_GEN_2(mvdev->mdev, migration_tracking_state)))
+ goto end;
+
mvdev->vf_id = pci_iov_vf_id(pdev);
if (mvdev->vf_id < 0)
goto end;
@@ -262,17 +266,14 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev,
mvdev->migrate_cap = 1;
mvdev->core_device.vdev.migration_flags =
VFIO_MIGRATION_STOP_COPY |
- VFIO_MIGRATION_P2P;
+ VFIO_MIGRATION_P2P |
+ VFIO_MIGRATION_PRE_COPY;
+
mvdev->core_device.vdev.mig_ops = mig_ops;
init_completion(&mvdev->tracker_comp);
if (MLX5_CAP_GEN(mvdev->mdev, adv_virtualization))
mvdev->core_device.vdev.log_ops = log_ops;
- if (MLX5_CAP_GEN_2(mvdev->mdev, migration_multi_load) &&
- MLX5_CAP_GEN_2(mvdev->mdev, migration_tracking_state))
- mvdev->core_device.vdev.migration_flags |=
- VFIO_MIGRATION_PRE_COPY;
-
if (MLX5_CAP_GEN_2(mvdev->mdev, migration_in_chunks))
mvdev->chunk_mode = 1;
@@ -414,6 +415,50 @@ void mlx5vf_free_data_buffer(struct mlx5_vhca_data_buffer *buf)
kfree(buf);
}
+static int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
+ unsigned int npages)
+{
+ unsigned int to_alloc = npages;
+ struct page **page_list;
+ unsigned long filled;
+ unsigned int to_fill;
+ int ret;
+
+ to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list));
+ page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL_ACCOUNT);
+ if (!page_list)
+ return -ENOMEM;
+
+ do {
+ filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_fill,
+ page_list);
+ if (!filled) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ to_alloc -= filled;
+ ret = sg_alloc_append_table_from_pages(
+ &buf->table, page_list, filled, 0,
+ filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC,
+ GFP_KERNEL_ACCOUNT);
+
+ if (ret)
+ goto err;
+ buf->allocated_length += filled * PAGE_SIZE;
+ /* clean input for another bulk allocation */
+ memset(page_list, 0, filled * sizeof(*page_list));
+ to_fill = min_t(unsigned int, to_alloc,
+ PAGE_SIZE / sizeof(*page_list));
+ } while (to_alloc > 0);
+
+ kvfree(page_list);
+ return 0;
+
+err:
+ kvfree(page_list);
+ return ret;
+}
+
struct mlx5_vhca_data_buffer *
mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf,
size_t length,
@@ -680,22 +725,20 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
goto err_out;
}
- if (MLX5VF_PRE_COPY_SUPP(mvdev)) {
- if (async_data->stop_copy_chunk) {
- u8 header_idx = buf->stop_copy_chunk_num ?
- buf->stop_copy_chunk_num - 1 : 0;
+ if (async_data->stop_copy_chunk) {
+ u8 header_idx = buf->stop_copy_chunk_num ?
+ buf->stop_copy_chunk_num - 1 : 0;
- header_buf = migf->buf_header[header_idx];
- migf->buf_header[header_idx] = NULL;
- }
+ header_buf = migf->buf_header[header_idx];
+ migf->buf_header[header_idx] = NULL;
+ }
- if (!header_buf) {
- header_buf = mlx5vf_get_data_buffer(migf,
- sizeof(struct mlx5_vf_migration_header), DMA_NONE);
- if (IS_ERR(header_buf)) {
- err = PTR_ERR(header_buf);
- goto err_free;
- }
+ if (!header_buf) {
+ header_buf = mlx5vf_get_data_buffer(migf,
+ sizeof(struct mlx5_vf_migration_header), DMA_NONE);
+ if (IS_ERR(header_buf)) {
+ err = PTR_ERR(header_buf);
+ goto err_free;
}
}
diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h
index 707393df36c4..df421dc6de04 100644
--- a/drivers/vfio/pci/mlx5/cmd.h
+++ b/drivers/vfio/pci/mlx5/cmd.h
@@ -13,9 +13,6 @@
#include <linux/mlx5/cq.h>
#include <linux/mlx5/qp.h>
-#define MLX5VF_PRE_COPY_SUPP(mvdev) \
- ((mvdev)->core_device.vdev.migration_flags & VFIO_MIGRATION_PRE_COPY)
-
enum mlx5_vf_migf_state {
MLX5_MIGF_STATE_ERROR = 1,
MLX5_MIGF_STATE_PRE_COPY_ERROR,
@@ -25,7 +22,6 @@ enum mlx5_vf_migf_state {
};
enum mlx5_vf_load_state {
- MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER,
MLX5_VF_LOAD_STATE_READ_HEADER,
MLX5_VF_LOAD_STATE_PREP_HEADER_DATA,
MLX5_VF_LOAD_STATE_READ_HEADER_DATA,
@@ -228,8 +224,6 @@ struct mlx5_vhca_data_buffer *
mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf,
size_t length, enum dma_data_direction dma_dir);
void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf);
-int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
- unsigned int npages);
struct page *mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf,
unsigned long offset);
void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev);
diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c
index 3982fcf60cf2..61d9b0f9146d 100644
--- a/drivers/vfio/pci/mlx5/main.c
+++ b/drivers/vfio/pci/mlx5/main.c
@@ -65,50 +65,6 @@ mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf,
return NULL;
}
-int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
- unsigned int npages)
-{
- unsigned int to_alloc = npages;
- struct page **page_list;
- unsigned long filled;
- unsigned int to_fill;
- int ret;
-
- to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list));
- page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL_ACCOUNT);
- if (!page_list)
- return -ENOMEM;
-
- do {
- filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_fill,
- page_list);
- if (!filled) {
- ret = -ENOMEM;
- goto err;
- }
- to_alloc -= filled;
- ret = sg_alloc_append_table_from_pages(
- &buf->table, page_list, filled, 0,
- filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC,
- GFP_KERNEL_ACCOUNT);
-
- if (ret)
- goto err;
- buf->allocated_length += filled * PAGE_SIZE;
- /* clean input for another bulk allocation */
- memset(page_list, 0, filled * sizeof(*page_list));
- to_fill = min_t(unsigned int, to_alloc,
- PAGE_SIZE / sizeof(*page_list));
- } while (to_alloc > 0);
-
- kvfree(page_list);
- return 0;
-
-err:
- kvfree(page_list);
- return ret;
-}
-
static void mlx5vf_disable_fd(struct mlx5_vf_migration_file *migf)
{
mutex_lock(&migf->lock);
@@ -777,36 +733,6 @@ mlx5vf_append_page_to_mig_buf(struct mlx5_vhca_data_buffer *vhca_buf,
return 0;
}
-static int
-mlx5vf_resume_read_image_no_header(struct mlx5_vhca_data_buffer *vhca_buf,
- loff_t requested_length,
- const char __user **buf, size_t *len,
- loff_t *pos, ssize_t *done)
-{
- int ret;
-
- if (requested_length > MAX_LOAD_SIZE)
- return -ENOMEM;
-
- if (vhca_buf->allocated_length < requested_length) {
- ret = mlx5vf_add_migration_pages(
- vhca_buf,
- DIV_ROUND_UP(requested_length - vhca_buf->allocated_length,
- PAGE_SIZE));
- if (ret)
- return ret;
- }
-
- while (*len) {
- ret = mlx5vf_append_page_to_mig_buf(vhca_buf, buf, len, pos,
- done);
- if (ret)
- return ret;
- }
-
- return 0;
-}
-
static ssize_t
mlx5vf_resume_read_image(struct mlx5_vf_migration_file *migf,
struct mlx5_vhca_data_buffer *vhca_buf,
@@ -1038,13 +964,6 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE;
break;
}
- case MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER:
- ret = mlx5vf_resume_read_image_no_header(vhca_buf,
- requested_length,
- &buf, &len, pos, &done);
- if (ret)
- goto out_unlock;
- break;
case MLX5_VF_LOAD_STATE_READ_IMAGE:
ret = mlx5vf_resume_read_image(migf, vhca_buf,
migf->record_size,
@@ -1114,21 +1033,16 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev)
}
migf->buf[0] = buf;
- if (MLX5VF_PRE_COPY_SUPP(mvdev)) {
- buf = mlx5vf_alloc_data_buffer(migf,
- sizeof(struct mlx5_vf_migration_header), DMA_NONE);
- if (IS_ERR(buf)) {
- ret = PTR_ERR(buf);
- goto out_buf;
- }
-
- migf->buf_header[0] = buf;
- migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER;
- } else {
- /* Initial state will be to read the image */
- migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER;
+ buf = mlx5vf_alloc_data_buffer(migf,
+ sizeof(struct mlx5_vf_migration_header), DMA_NONE);
+ if (IS_ERR(buf)) {
+ ret = PTR_ERR(buf);
+ goto out_buf;
}
+ migf->buf_header[0] = buf;
+ migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER;
+
stream_open(migf->filp->f_inode, migf->filp);
mutex_init(&migf->lock);
INIT_LIST_HEAD(&migf->buf_list);
@@ -1262,13 +1176,6 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
}
if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) {
- if (!MLX5VF_PRE_COPY_SUPP(mvdev)) {
- ret = mlx5vf_cmd_load_vhca_state(mvdev,
- mvdev->resuming_migf,
- mvdev->resuming_migf->buf[0]);
- if (ret)
- return ERR_PTR(ret);
- }
mlx5vf_disable_fds(mvdev, NULL);
return NULL;
}