diff options
-rw-r--r-- | drivers/vfio/pci/virtio/common.h | 4 | ||||
-rw-r--r-- | drivers/vfio/pci/virtio/migrate.c | 231 |
2 files changed, 227 insertions, 8 deletions
diff --git a/drivers/vfio/pci/virtio/common.h b/drivers/vfio/pci/virtio/common.h index 3bdfb3ea1174..5704603f0f9d 100644 --- a/drivers/vfio/pci/virtio/common.h +++ b/drivers/vfio/pci/virtio/common.h @@ -10,6 +10,8 @@ enum virtiovf_migf_state { VIRTIOVF_MIGF_STATE_ERROR = 1, + VIRTIOVF_MIGF_STATE_PRECOPY = 2, + VIRTIOVF_MIGF_STATE_COMPLETE = 3, }; enum virtiovf_load_state { @@ -57,6 +59,8 @@ struct virtiovf_migration_file { /* synchronize access to the file state */ struct mutex lock; loff_t max_pos; + u64 pre_copy_initial_bytes; + struct ratelimit_state pre_copy_rl_state; u64 record_size; u32 record_tag; u8 has_obj_id:1; diff --git a/drivers/vfio/pci/virtio/migrate.c b/drivers/vfio/pci/virtio/migrate.c index 4fdf6ca17a3a..ee54f4c17857 100644 --- a/drivers/vfio/pci/virtio/migrate.c +++ b/drivers/vfio/pci/virtio/migrate.c @@ -26,6 +26,10 @@ /* Initial target buffer size */ #define VIRTIOVF_TARGET_INITIAL_BUF_SIZE SZ_1M +static int +virtiovf_read_device_context_chunk(struct virtiovf_migration_file *migf, + u32 ctx_size); + static struct page * virtiovf_get_migration_page(struct virtiovf_data_buffer *buf, unsigned long offset) @@ -159,6 +163,41 @@ virtiovf_pci_free_obj_id(struct virtiovf_pci_core_device *virtvdev, u32 obj_id) VIRTIO_RESOURCE_OBJ_DEV_PARTS, obj_id); } +static struct virtiovf_data_buffer * +virtiovf_get_data_buffer(struct virtiovf_migration_file *migf, size_t length) +{ + struct virtiovf_data_buffer *buf, *temp_buf; + struct list_head free_list; + + INIT_LIST_HEAD(&free_list); + + spin_lock_irq(&migf->list_lock); + list_for_each_entry_safe(buf, temp_buf, &migf->avail_list, buf_elm) { + list_del_init(&buf->buf_elm); + if (buf->allocated_length >= length) { + spin_unlock_irq(&migf->list_lock); + goto found; + } + /* + * Prevent holding redundant buffers. Put in a free + * list and call at the end not under the spin lock + * (&migf->list_lock) to minimize its scope usage. + */ + list_add(&buf->buf_elm, &free_list); + } + spin_unlock_irq(&migf->list_lock); + buf = virtiovf_alloc_data_buffer(migf, length); + +found: + while ((temp_buf = list_first_entry_or_null(&free_list, + struct virtiovf_data_buffer, buf_elm))) { + list_del(&temp_buf->buf_elm); + virtiovf_free_data_buffer(temp_buf); + } + + return buf; +} + static void virtiovf_clean_migf_resources(struct virtiovf_migration_file *migf) { struct virtiovf_data_buffer *entry; @@ -345,6 +384,7 @@ static ssize_t virtiovf_save_read(struct file *filp, char __user *buf, size_t le { struct virtiovf_migration_file *migf = filp->private_data; struct virtiovf_data_buffer *vhca_buf; + bool first_loop_call = true; bool end_of_data; ssize_t done = 0; @@ -362,6 +402,19 @@ static ssize_t virtiovf_save_read(struct file *filp, char __user *buf, size_t le ssize_t count; vhca_buf = virtiovf_get_data_buff_from_pos(migf, *pos, &end_of_data); + if (first_loop_call) { + first_loop_call = false; + /* Temporary end of file as part of PRE_COPY */ + if (end_of_data && migf->state == VIRTIOVF_MIGF_STATE_PRECOPY) { + done = -ENOMSG; + goto out_unlock; + } + if (end_of_data && migf->state != VIRTIOVF_MIGF_STATE_COMPLETE) { + done = -EINVAL; + goto out_unlock; + } + } + if (end_of_data) goto out_unlock; @@ -383,9 +436,101 @@ out_unlock: return done; } +static long virtiovf_precopy_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + struct virtiovf_migration_file *migf = filp->private_data; + struct virtiovf_pci_core_device *virtvdev = migf->virtvdev; + struct vfio_precopy_info info = {}; + loff_t *pos = &filp->f_pos; + bool end_of_data = false; + unsigned long minsz; + u32 ctx_size = 0; + int ret; + + if (cmd != VFIO_MIG_GET_PRECOPY_INFO) + return -ENOTTY; + + minsz = offsetofend(struct vfio_precopy_info, dirty_bytes); + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; + + if (info.argsz < minsz) + return -EINVAL; + + mutex_lock(&virtvdev->state_mutex); + if (virtvdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY && + virtvdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY_P2P) { + ret = -EINVAL; + goto err_state_unlock; + } + + /* + * The virtio specification does not include a PRE_COPY concept. + * Since we can expect the data to remain the same for a certain period, + * we use a rate limiter mechanism before making a call to the device. + */ + if (__ratelimit(&migf->pre_copy_rl_state)) { + + ret = virtio_pci_admin_dev_parts_metadata_get(virtvdev->core_device.pdev, + VIRTIO_RESOURCE_OBJ_DEV_PARTS, migf->obj_id, + VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE, + &ctx_size); + if (ret) + goto err_state_unlock; + } + + mutex_lock(&migf->lock); + if (migf->state == VIRTIOVF_MIGF_STATE_ERROR) { + ret = -ENODEV; + goto err_migf_unlock; + } + + if (migf->pre_copy_initial_bytes > *pos) { + info.initial_bytes = migf->pre_copy_initial_bytes - *pos; + } else { + info.dirty_bytes = migf->max_pos - *pos; + if (!info.dirty_bytes) + end_of_data = true; + info.dirty_bytes += ctx_size; + } + + if (!end_of_data || !ctx_size) { + mutex_unlock(&migf->lock); + goto done; + } + + mutex_unlock(&migf->lock); + /* + * We finished transferring the current state and the device has a + * dirty state, read a new state. + */ + ret = virtiovf_read_device_context_chunk(migf, ctx_size); + if (ret) + /* + * The machine is running, and context size could be grow, so no reason to mark + * the device state as VIRTIOVF_MIGF_STATE_ERROR. + */ + goto err_state_unlock; + +done: + virtiovf_state_mutex_unlock(virtvdev); + if (copy_to_user((void __user *)arg, &info, minsz)) + return -EFAULT; + return 0; + +err_migf_unlock: + mutex_unlock(&migf->lock); +err_state_unlock: + virtiovf_state_mutex_unlock(virtvdev); + return ret; +} + static const struct file_operations virtiovf_save_fops = { .owner = THIS_MODULE, .read = virtiovf_save_read, + .unlocked_ioctl = virtiovf_precopy_ioctl, + .compat_ioctl = compat_ptr_ioctl, .release = virtiovf_release_file, }; @@ -429,7 +574,7 @@ virtiovf_read_device_context_chunk(struct virtiovf_migration_file *migf, int nent; int ret; - buf = virtiovf_alloc_data_buffer(migf, ctx_size); + buf = virtiovf_get_data_buffer(migf, ctx_size); if (IS_ERR(buf)) return PTR_ERR(buf); @@ -464,7 +609,7 @@ virtiovf_read_device_context_chunk(struct virtiovf_migration_file *migf, goto out; buf->length = res_size; - header_buf = virtiovf_alloc_data_buffer(migf, + header_buf = virtiovf_get_data_buffer(migf, sizeof(struct virtiovf_migration_header)); if (IS_ERR(header_buf)) { ret = PTR_ERR(header_buf); @@ -489,8 +634,43 @@ out: return ret; } +static int +virtiovf_pci_save_device_final_data(struct virtiovf_pci_core_device *virtvdev) +{ + struct virtiovf_migration_file *migf = virtvdev->saving_migf; + u32 ctx_size; + int ret; + + if (migf->state == VIRTIOVF_MIGF_STATE_ERROR) + return -ENODEV; + + ret = virtio_pci_admin_dev_parts_metadata_get(virtvdev->core_device.pdev, + VIRTIO_RESOURCE_OBJ_DEV_PARTS, migf->obj_id, + VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE, + &ctx_size); + if (ret) + goto err; + + if (!ctx_size) { + ret = -EINVAL; + goto err; + } + + ret = virtiovf_read_device_context_chunk(migf, ctx_size); + if (ret) + goto err; + + migf->state = VIRTIOVF_MIGF_STATE_COMPLETE; + return 0; + +err: + migf->state = VIRTIOVF_MIGF_STATE_ERROR; + return ret; +} + static struct virtiovf_migration_file * -virtiovf_pci_save_device_data(struct virtiovf_pci_core_device *virtvdev) +virtiovf_pci_save_device_data(struct virtiovf_pci_core_device *virtvdev, + bool pre_copy) { struct virtiovf_migration_file *migf; u32 ctx_size; @@ -541,6 +721,18 @@ virtiovf_pci_save_device_data(struct virtiovf_pci_core_device *virtvdev) if (ret) goto out_clean; + if (pre_copy) { + migf->pre_copy_initial_bytes = migf->max_pos; + /* Arbitrarily set the pre-copy rate limit to 1-second intervals */ + ratelimit_state_init(&migf->pre_copy_rl_state, 1 * HZ, 1); + /* Prevent any rate messages upon its usage */ + ratelimit_set_flags(&migf->pre_copy_rl_state, + RATELIMIT_MSG_ON_RELEASE); + migf->state = VIRTIOVF_MIGF_STATE_PRECOPY; + } else { + migf->state = VIRTIOVF_MIGF_STATE_COMPLETE; + } + return migf; out_clean: @@ -950,7 +1142,8 @@ virtiovf_pci_step_device_state_locked(struct virtiovf_pci_core_device *virtvdev, return NULL; } - if (cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_RUNNING_P2P) { + if ((cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_RUNNING_P2P) || + (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_PRE_COPY_P2P)) { ret = virtio_pci_admin_mode_set(virtvdev->core_device.pdev, BIT(VIRTIO_ADMIN_CMD_DEV_MODE_F_STOPPED)); if (ret) @@ -958,7 +1151,8 @@ virtiovf_pci_step_device_state_locked(struct virtiovf_pci_core_device *virtvdev, return NULL; } - if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_RUNNING) { + if ((cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_RUNNING) || + (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_PRE_COPY)) { ret = virtio_pci_admin_mode_set(virtvdev->core_device.pdev, 0); if (ret) return ERR_PTR(ret); @@ -968,7 +1162,7 @@ virtiovf_pci_step_device_state_locked(struct virtiovf_pci_core_device *virtvdev, if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_STOP_COPY) { struct virtiovf_migration_file *migf; - migf = virtiovf_pci_save_device_data(virtvdev); + migf = virtiovf_pci_save_device_data(virtvdev, false); if (IS_ERR(migf)) return ERR_CAST(migf); get_file(migf->filp); @@ -976,7 +1170,9 @@ virtiovf_pci_step_device_state_locked(struct virtiovf_pci_core_device *virtvdev, return migf->filp; } - if (cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP) { + if ((cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP) || + (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_RUNNING) || + (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_RUNNING_P2P)) { virtiovf_disable_fds(virtvdev); return NULL; } @@ -997,6 +1193,24 @@ virtiovf_pci_step_device_state_locked(struct virtiovf_pci_core_device *virtvdev, return NULL; } + if ((cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_PRE_COPY) || + (cur == VFIO_DEVICE_STATE_RUNNING_P2P && + new == VFIO_DEVICE_STATE_PRE_COPY_P2P)) { + struct virtiovf_migration_file *migf; + + migf = virtiovf_pci_save_device_data(virtvdev, true); + if (IS_ERR(migf)) + return ERR_CAST(migf); + get_file(migf->filp); + virtvdev->saving_migf = migf; + return migf->filp; + } + + if (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_STOP_COPY) { + ret = virtiovf_pci_save_device_final_data(virtvdev); + return ret ? ERR_PTR(ret) : NULL; + } + /* * vfio_mig_get_next_state() does not use arcs other than the above */ @@ -1101,7 +1315,8 @@ void virtiovf_set_migratable(struct virtiovf_pci_core_device *virtvdev) spin_lock_init(&virtvdev->reset_lock); virtvdev->core_device.vdev.migration_flags = VFIO_MIGRATION_STOP_COPY | - VFIO_MIGRATION_P2P; + VFIO_MIGRATION_P2P | + VFIO_MIGRATION_PRE_COPY; virtvdev->core_device.vdev.mig_ops = &virtvdev_pci_mig_ops; } |