diff options
Diffstat (limited to 'drivers/vfio/pci/mlx5')
-rw-r--r-- | drivers/vfio/pci/mlx5/cmd.c | 157 | ||||
-rw-r--r-- | drivers/vfio/pci/mlx5/cmd.h | 11 | ||||
-rw-r--r-- | drivers/vfio/pci/mlx5/main.c | 148 |
3 files changed, 176 insertions, 140 deletions
diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c index efd1d252cdc9..41a4b0cf4297 100644 --- a/drivers/vfio/pci/mlx5/cmd.c +++ b/drivers/vfio/pci/mlx5/cmd.c @@ -108,8 +108,9 @@ int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev, ret = wait_for_completion_interruptible(&mvdev->saving_migf->save_comp); if (ret) return ret; - if (mvdev->saving_migf->state == - MLX5_MIGF_STATE_PRE_COPY_ERROR) { + /* Upon cleanup, ignore previous pre_copy error state */ + if (mvdev->saving_migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR && + !(query_flags & MLX5VF_QUERY_CLEANUP)) { /* * In case we had a PRE_COPY error, only query full * image for final image @@ -121,6 +122,11 @@ int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev, } query_flags &= ~MLX5VF_QUERY_INC; } + /* Block incremental query which is state-dependent */ + if (mvdev->saving_migf->state == MLX5_MIGF_STATE_ERROR) { + complete(&mvdev->saving_migf->save_comp); + return -ENODEV; + } } MLX5_SET(query_vhca_migration_state_in, in, opcode, @@ -149,6 +155,12 @@ int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev, return 0; } +static void set_tracker_change_event(struct mlx5vf_pci_core_device *mvdev) +{ + mvdev->tracker.object_changed = true; + complete(&mvdev->tracker_comp); +} + static void set_tracker_error(struct mlx5vf_pci_core_device *mvdev) { /* Mark the tracker under an error and wake it up if it's running */ @@ -189,7 +201,7 @@ void mlx5vf_cmd_close_migratable(struct mlx5vf_pci_core_device *mvdev) /* Must be done outside the lock to let it progress */ set_tracker_error(mvdev); mutex_lock(&mvdev->state_mutex); - mlx5vf_disable_fds(mvdev); + mlx5vf_disable_fds(mvdev, NULL); _mlx5vf_free_page_tracker_resources(mvdev); mlx5vf_state_mutex_unlock(mvdev); } @@ -221,6 +233,10 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev, if (!MLX5_CAP_GEN(mvdev->mdev, migration)) goto end; + if (!(MLX5_CAP_GEN_2(mvdev->mdev, migration_multi_load) && + MLX5_CAP_GEN_2(mvdev->mdev, migration_tracking_state))) + goto end; + mvdev->vf_id = pci_iov_vf_id(pdev); if (mvdev->vf_id < 0) goto end; @@ -250,17 +266,14 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev, mvdev->migrate_cap = 1; mvdev->core_device.vdev.migration_flags = VFIO_MIGRATION_STOP_COPY | - VFIO_MIGRATION_P2P; + VFIO_MIGRATION_P2P | + VFIO_MIGRATION_PRE_COPY; + mvdev->core_device.vdev.mig_ops = mig_ops; init_completion(&mvdev->tracker_comp); if (MLX5_CAP_GEN(mvdev->mdev, adv_virtualization)) mvdev->core_device.vdev.log_ops = log_ops; - if (MLX5_CAP_GEN_2(mvdev->mdev, migration_multi_load) && - MLX5_CAP_GEN_2(mvdev->mdev, migration_tracking_state)) - mvdev->core_device.vdev.migration_flags |= - VFIO_MIGRATION_PRE_COPY; - if (MLX5_CAP_GEN_2(mvdev->mdev, migration_in_chunks)) mvdev->chunk_mode = 1; @@ -402,6 +415,50 @@ void mlx5vf_free_data_buffer(struct mlx5_vhca_data_buffer *buf) kfree(buf); } +static int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf, + unsigned int npages) +{ + unsigned int to_alloc = npages; + struct page **page_list; + unsigned long filled; + unsigned int to_fill; + int ret; + + to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list)); + page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL_ACCOUNT); + if (!page_list) + return -ENOMEM; + + do { + filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_fill, + page_list); + if (!filled) { + ret = -ENOMEM; + goto err; + } + to_alloc -= filled; + ret = sg_alloc_append_table_from_pages( + &buf->table, page_list, filled, 0, + filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC, + GFP_KERNEL_ACCOUNT); + + if (ret) + goto err; + buf->allocated_length += filled * PAGE_SIZE; + /* clean input for another bulk allocation */ + memset(page_list, 0, filled * sizeof(*page_list)); + to_fill = min_t(unsigned int, to_alloc, + PAGE_SIZE / sizeof(*page_list)); + } while (to_alloc > 0); + + kvfree(page_list); + return 0; + +err: + kvfree(page_list); + return ret; +} + struct mlx5_vhca_data_buffer * mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf, size_t length, @@ -608,8 +665,13 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context) err: /* The error flow can't run from an interrupt context */ - if (status == -EREMOTEIO) + if (status == -EREMOTEIO) { status = MLX5_GET(save_vhca_state_out, async_data->out, status); + /* Failed in FW, print cmd out failure details */ + mlx5_cmd_out_err(migf->mvdev->mdev, MLX5_CMD_OP_SAVE_VHCA_STATE, 0, + async_data->out); + } + async_data->status = status; queue_work(migf->mvdev->cb_wq, &async_data->work); } @@ -623,6 +685,7 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {}; struct mlx5_vhca_data_buffer *header_buf = NULL; struct mlx5vf_async_data *async_data; + bool pre_copy_cleanup = false; int err; lockdep_assert_held(&mvdev->state_mutex); @@ -633,6 +696,10 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, if (err) return err; + if ((migf->state == MLX5_MIGF_STATE_PRE_COPY || + migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR) && !track && !inc) + pre_copy_cleanup = true; + if (migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR) /* * In case we had a PRE_COPY error, SAVE is triggered only for @@ -651,29 +718,27 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, async_data = &migf->async_data; async_data->buf = buf; - async_data->stop_copy_chunk = !track; + async_data->stop_copy_chunk = (!track && !pre_copy_cleanup); async_data->out = kvzalloc(out_size, GFP_KERNEL); if (!async_data->out) { err = -ENOMEM; goto err_out; } - if (MLX5VF_PRE_COPY_SUPP(mvdev)) { - if (async_data->stop_copy_chunk) { - u8 header_idx = buf->stop_copy_chunk_num ? - buf->stop_copy_chunk_num - 1 : 0; + if (async_data->stop_copy_chunk) { + u8 header_idx = buf->stop_copy_chunk_num ? + buf->stop_copy_chunk_num - 1 : 0; - header_buf = migf->buf_header[header_idx]; - migf->buf_header[header_idx] = NULL; - } + header_buf = migf->buf_header[header_idx]; + migf->buf_header[header_idx] = NULL; + } - if (!header_buf) { - header_buf = mlx5vf_get_data_buffer(migf, - sizeof(struct mlx5_vf_migration_header), DMA_NONE); - if (IS_ERR(header_buf)) { - err = PTR_ERR(header_buf); - goto err_free; - } + if (!header_buf) { + header_buf = mlx5vf_get_data_buffer(migf, + sizeof(struct mlx5_vf_migration_header), DMA_NONE); + if (IS_ERR(header_buf)) { + err = PTR_ERR(header_buf); + goto err_free; } } @@ -900,6 +965,29 @@ static int mlx5vf_cmd_modify_tracker(struct mlx5_core_dev *mdev, return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); } +static int mlx5vf_cmd_query_tracker(struct mlx5_core_dev *mdev, + struct mlx5_vhca_page_tracker *tracker) +{ + u32 out[MLX5_ST_SZ_DW(query_page_track_obj_out)] = {}; + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + void *obj_context; + void *cmd_hdr; + int err; + + cmd_hdr = MLX5_ADDR_OF(modify_page_track_obj_in, in, general_obj_in_cmd_hdr); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_PAGE_TRACK); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, tracker->id); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + obj_context = MLX5_ADDR_OF(query_page_track_obj_out, out, obj_context); + tracker->status = MLX5_GET(page_track, obj_context, state); + return 0; +} + static int alloc_cq_frag_buf(struct mlx5_core_dev *mdev, struct mlx5_vhca_cq_buf *buf, int nent, int cqe_size) @@ -957,9 +1045,11 @@ static int mlx5vf_event_notifier(struct notifier_block *nb, unsigned long type, mlx5_nb_cof(nb, struct mlx5_vhca_page_tracker, nb); struct mlx5vf_pci_core_device *mvdev = container_of( tracker, struct mlx5vf_pci_core_device, tracker); + struct mlx5_eqe_obj_change *object; struct mlx5_eqe *eqe = data; u8 event_type = (u8)type; u8 queue_type; + u32 obj_id; int qp_num; switch (event_type) { @@ -975,6 +1065,12 @@ static int mlx5vf_event_notifier(struct notifier_block *nb, unsigned long type, break; set_tracker_error(mvdev); break; + case MLX5_EVENT_TYPE_OBJECT_CHANGE: + object = &eqe->data.obj_change; + obj_id = be32_to_cpu(object->obj_id); + if (obj_id == tracker->id) + set_tracker_change_event(mvdev); + break; default: break; } @@ -1634,6 +1730,11 @@ int mlx5vf_tracker_read_and_clear(struct vfio_device *vdev, unsigned long iova, goto end; } + if (tracker->is_err) { + err = -EIO; + goto end; + } + mdev = mvdev->mdev; err = mlx5vf_cmd_modify_tracker(mdev, tracker->id, iova, length, MLX5_PAGE_TRACK_STATE_REPORTING); @@ -1652,6 +1753,12 @@ int mlx5vf_tracker_read_and_clear(struct vfio_device *vdev, unsigned long iova, dirty, &tracker->status); if (poll_err == CQ_EMPTY) { wait_for_completion(&mvdev->tracker_comp); + if (tracker->object_changed) { + tracker->object_changed = false; + err = mlx5vf_cmd_query_tracker(mdev, tracker); + if (err) + goto end; + } continue; } } diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h index f2c7227fa683..df421dc6de04 100644 --- a/drivers/vfio/pci/mlx5/cmd.h +++ b/drivers/vfio/pci/mlx5/cmd.h @@ -13,9 +13,6 @@ #include <linux/mlx5/cq.h> #include <linux/mlx5/qp.h> -#define MLX5VF_PRE_COPY_SUPP(mvdev) \ - ((mvdev)->core_device.vdev.migration_flags & VFIO_MIGRATION_PRE_COPY) - enum mlx5_vf_migf_state { MLX5_MIGF_STATE_ERROR = 1, MLX5_MIGF_STATE_PRE_COPY_ERROR, @@ -25,7 +22,6 @@ enum mlx5_vf_migf_state { }; enum mlx5_vf_load_state { - MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER, MLX5_VF_LOAD_STATE_READ_HEADER, MLX5_VF_LOAD_STATE_PREP_HEADER_DATA, MLX5_VF_LOAD_STATE_READ_HEADER_DATA, @@ -162,6 +158,7 @@ struct mlx5_vhca_page_tracker { u32 id; u32 pdn; u8 is_err:1; + u8 object_changed:1; struct mlx5_uars_page *uar; struct mlx5_vhca_cq cq; struct mlx5_vhca_qp *host_qp; @@ -196,6 +193,7 @@ struct mlx5vf_pci_core_device { enum { MLX5VF_QUERY_INC = (1UL << 0), MLX5VF_QUERY_FINAL = (1UL << 1), + MLX5VF_QUERY_CLEANUP = (1UL << 2), }; int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod); @@ -226,12 +224,11 @@ struct mlx5_vhca_data_buffer * mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf, size_t length, enum dma_data_direction dma_dir); void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf); -int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf, - unsigned int npages); struct page *mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf, unsigned long offset); void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev); -void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev); +void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev, + enum mlx5_vf_migf_state *last_save_state); void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work); void mlx5vf_mig_file_set_save_work(struct mlx5_vf_migration_file *migf, u8 chunk_num, size_t next_required_umem_size); diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c index fe09a8c8af95..61d9b0f9146d 100644 --- a/drivers/vfio/pci/mlx5/main.c +++ b/drivers/vfio/pci/mlx5/main.c @@ -65,50 +65,6 @@ mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf, return NULL; } -int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf, - unsigned int npages) -{ - unsigned int to_alloc = npages; - struct page **page_list; - unsigned long filled; - unsigned int to_fill; - int ret; - - to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list)); - page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL_ACCOUNT); - if (!page_list) - return -ENOMEM; - - do { - filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_fill, - page_list); - if (!filled) { - ret = -ENOMEM; - goto err; - } - to_alloc -= filled; - ret = sg_alloc_append_table_from_pages( - &buf->table, page_list, filled, 0, - filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC, - GFP_KERNEL_ACCOUNT); - - if (ret) - goto err; - buf->allocated_length += filled * PAGE_SIZE; - /* clean input for another bulk allocation */ - memset(page_list, 0, filled * sizeof(*page_list)); - to_fill = min_t(unsigned int, to_alloc, - PAGE_SIZE / sizeof(*page_list)); - } while (to_alloc > 0); - - kvfree(page_list); - return 0; - -err: - kvfree(page_list); - return ret; -} - static void mlx5vf_disable_fd(struct mlx5_vf_migration_file *migf) { mutex_lock(&migf->lock); @@ -777,36 +733,6 @@ mlx5vf_append_page_to_mig_buf(struct mlx5_vhca_data_buffer *vhca_buf, return 0; } -static int -mlx5vf_resume_read_image_no_header(struct mlx5_vhca_data_buffer *vhca_buf, - loff_t requested_length, - const char __user **buf, size_t *len, - loff_t *pos, ssize_t *done) -{ - int ret; - - if (requested_length > MAX_LOAD_SIZE) - return -ENOMEM; - - if (vhca_buf->allocated_length < requested_length) { - ret = mlx5vf_add_migration_pages( - vhca_buf, - DIV_ROUND_UP(requested_length - vhca_buf->allocated_length, - PAGE_SIZE)); - if (ret) - return ret; - } - - while (*len) { - ret = mlx5vf_append_page_to_mig_buf(vhca_buf, buf, len, pos, - done); - if (ret) - return ret; - } - - return 0; -} - static ssize_t mlx5vf_resume_read_image(struct mlx5_vf_migration_file *migf, struct mlx5_vhca_data_buffer *vhca_buf, @@ -1038,13 +964,6 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf, migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE; break; } - case MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER: - ret = mlx5vf_resume_read_image_no_header(vhca_buf, - requested_length, - &buf, &len, pos, &done); - if (ret) - goto out_unlock; - break; case MLX5_VF_LOAD_STATE_READ_IMAGE: ret = mlx5vf_resume_read_image(migf, vhca_buf, migf->record_size, @@ -1114,21 +1033,16 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev) } migf->buf[0] = buf; - if (MLX5VF_PRE_COPY_SUPP(mvdev)) { - buf = mlx5vf_alloc_data_buffer(migf, - sizeof(struct mlx5_vf_migration_header), DMA_NONE); - if (IS_ERR(buf)) { - ret = PTR_ERR(buf); - goto out_buf; - } - - migf->buf_header[0] = buf; - migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER; - } else { - /* Initial state will be to read the image */ - migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER; + buf = mlx5vf_alloc_data_buffer(migf, + sizeof(struct mlx5_vf_migration_header), DMA_NONE); + if (IS_ERR(buf)) { + ret = PTR_ERR(buf); + goto out_buf; } + migf->buf_header[0] = buf; + migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER; + stream_open(migf->filp->f_inode, migf->filp); mutex_init(&migf->lock); INIT_LIST_HEAD(&migf->buf_list); @@ -1146,7 +1060,8 @@ end: return ERR_PTR(ret); } -void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev) +void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev, + enum mlx5_vf_migf_state *last_save_state) { if (mvdev->resuming_migf) { mlx5vf_disable_fd(mvdev->resuming_migf); @@ -1157,6 +1072,8 @@ void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev) if (mvdev->saving_migf) { mlx5_cmd_cleanup_async_ctx(&mvdev->saving_migf->async_ctx); cancel_work_sync(&mvdev->saving_migf->async_data.work); + if (last_save_state) + *last_save_state = mvdev->saving_migf->state; mlx5vf_disable_fd(mvdev->saving_migf); wake_up_interruptible(&mvdev->saving_migf->poll_wait); mlx5fv_cmd_clean_migf_resources(mvdev->saving_migf); @@ -1217,12 +1134,34 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev, return migf->filp; } - if ((cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP) || - (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_RUNNING) || + if (cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP) { + mlx5vf_disable_fds(mvdev, NULL); + return NULL; + } + + if ((cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_RUNNING) || (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_RUNNING_P2P)) { - mlx5vf_disable_fds(mvdev); - return NULL; + struct mlx5_vf_migration_file *migf = mvdev->saving_migf; + struct mlx5_vhca_data_buffer *buf; + enum mlx5_vf_migf_state state; + size_t size; + + ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &size, NULL, + MLX5VF_QUERY_INC | MLX5VF_QUERY_CLEANUP); + if (ret) + return ERR_PTR(ret); + buf = mlx5vf_get_data_buffer(migf, size, DMA_FROM_DEVICE); + if (IS_ERR(buf)) + return ERR_CAST(buf); + /* pre_copy cleanup */ + ret = mlx5vf_cmd_save_vhca_state(mvdev, migf, buf, false, false); + if (ret) { + mlx5vf_put_data_buffer(buf); + return ERR_PTR(ret); + } + mlx5vf_disable_fds(mvdev, &state); + return (state != MLX5_MIGF_STATE_ERROR) ? NULL : ERR_PTR(-EIO); } if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RESUMING) { @@ -1237,14 +1176,7 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev, } if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) { - if (!MLX5VF_PRE_COPY_SUPP(mvdev)) { - ret = mlx5vf_cmd_load_vhca_state(mvdev, - mvdev->resuming_migf, - mvdev->resuming_migf->buf[0]); - if (ret) - return ERR_PTR(ret); - } - mlx5vf_disable_fds(mvdev); + mlx5vf_disable_fds(mvdev, NULL); return NULL; } @@ -1289,7 +1221,7 @@ again: mvdev->deferred_reset = false; spin_unlock(&mvdev->reset_lock); mvdev->mig_state = VFIO_DEVICE_STATE_RUNNING; - mlx5vf_disable_fds(mvdev); + mlx5vf_disable_fds(mvdev, NULL); goto again; } mutex_unlock(&mvdev->state_mutex); |