summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/vfio/pci/virtio/common.h4
-rw-r--r--drivers/vfio/pci/virtio/migrate.c231
2 files changed, 227 insertions, 8 deletions
diff --git a/drivers/vfio/pci/virtio/common.h b/drivers/vfio/pci/virtio/common.h
index 3bdfb3ea1174..5704603f0f9d 100644
--- a/drivers/vfio/pci/virtio/common.h
+++ b/drivers/vfio/pci/virtio/common.h
@@ -10,6 +10,8 @@
enum virtiovf_migf_state {
VIRTIOVF_MIGF_STATE_ERROR = 1,
+ VIRTIOVF_MIGF_STATE_PRECOPY = 2,
+ VIRTIOVF_MIGF_STATE_COMPLETE = 3,
};
enum virtiovf_load_state {
@@ -57,6 +59,8 @@ struct virtiovf_migration_file {
/* synchronize access to the file state */
struct mutex lock;
loff_t max_pos;
+ u64 pre_copy_initial_bytes;
+ struct ratelimit_state pre_copy_rl_state;
u64 record_size;
u32 record_tag;
u8 has_obj_id:1;
diff --git a/drivers/vfio/pci/virtio/migrate.c b/drivers/vfio/pci/virtio/migrate.c
index 4fdf6ca17a3a..ee54f4c17857 100644
--- a/drivers/vfio/pci/virtio/migrate.c
+++ b/drivers/vfio/pci/virtio/migrate.c
@@ -26,6 +26,10 @@
/* Initial target buffer size */
#define VIRTIOVF_TARGET_INITIAL_BUF_SIZE SZ_1M
+static int
+virtiovf_read_device_context_chunk(struct virtiovf_migration_file *migf,
+ u32 ctx_size);
+
static struct page *
virtiovf_get_migration_page(struct virtiovf_data_buffer *buf,
unsigned long offset)
@@ -159,6 +163,41 @@ virtiovf_pci_free_obj_id(struct virtiovf_pci_core_device *virtvdev, u32 obj_id)
VIRTIO_RESOURCE_OBJ_DEV_PARTS, obj_id);
}
+static struct virtiovf_data_buffer *
+virtiovf_get_data_buffer(struct virtiovf_migration_file *migf, size_t length)
+{
+ struct virtiovf_data_buffer *buf, *temp_buf;
+ struct list_head free_list;
+
+ INIT_LIST_HEAD(&free_list);
+
+ spin_lock_irq(&migf->list_lock);
+ list_for_each_entry_safe(buf, temp_buf, &migf->avail_list, buf_elm) {
+ list_del_init(&buf->buf_elm);
+ if (buf->allocated_length >= length) {
+ spin_unlock_irq(&migf->list_lock);
+ goto found;
+ }
+ /*
+ * Prevent holding redundant buffers. Put in a free
+ * list and call at the end not under the spin lock
+ * (&migf->list_lock) to minimize its scope usage.
+ */
+ list_add(&buf->buf_elm, &free_list);
+ }
+ spin_unlock_irq(&migf->list_lock);
+ buf = virtiovf_alloc_data_buffer(migf, length);
+
+found:
+ while ((temp_buf = list_first_entry_or_null(&free_list,
+ struct virtiovf_data_buffer, buf_elm))) {
+ list_del(&temp_buf->buf_elm);
+ virtiovf_free_data_buffer(temp_buf);
+ }
+
+ return buf;
+}
+
static void virtiovf_clean_migf_resources(struct virtiovf_migration_file *migf)
{
struct virtiovf_data_buffer *entry;
@@ -345,6 +384,7 @@ static ssize_t virtiovf_save_read(struct file *filp, char __user *buf, size_t le
{
struct virtiovf_migration_file *migf = filp->private_data;
struct virtiovf_data_buffer *vhca_buf;
+ bool first_loop_call = true;
bool end_of_data;
ssize_t done = 0;
@@ -362,6 +402,19 @@ static ssize_t virtiovf_save_read(struct file *filp, char __user *buf, size_t le
ssize_t count;
vhca_buf = virtiovf_get_data_buff_from_pos(migf, *pos, &end_of_data);
+ if (first_loop_call) {
+ first_loop_call = false;
+ /* Temporary end of file as part of PRE_COPY */
+ if (end_of_data && migf->state == VIRTIOVF_MIGF_STATE_PRECOPY) {
+ done = -ENOMSG;
+ goto out_unlock;
+ }
+ if (end_of_data && migf->state != VIRTIOVF_MIGF_STATE_COMPLETE) {
+ done = -EINVAL;
+ goto out_unlock;
+ }
+ }
+
if (end_of_data)
goto out_unlock;
@@ -383,9 +436,101 @@ out_unlock:
return done;
}
+static long virtiovf_precopy_ioctl(struct file *filp, unsigned int cmd,
+ unsigned long arg)
+{
+ struct virtiovf_migration_file *migf = filp->private_data;
+ struct virtiovf_pci_core_device *virtvdev = migf->virtvdev;
+ struct vfio_precopy_info info = {};
+ loff_t *pos = &filp->f_pos;
+ bool end_of_data = false;
+ unsigned long minsz;
+ u32 ctx_size = 0;
+ int ret;
+
+ if (cmd != VFIO_MIG_GET_PRECOPY_INFO)
+ return -ENOTTY;
+
+ minsz = offsetofend(struct vfio_precopy_info, dirty_bytes);
+ if (copy_from_user(&info, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ if (info.argsz < minsz)
+ return -EINVAL;
+
+ mutex_lock(&virtvdev->state_mutex);
+ if (virtvdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY &&
+ virtvdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY_P2P) {
+ ret = -EINVAL;
+ goto err_state_unlock;
+ }
+
+ /*
+ * The virtio specification does not include a PRE_COPY concept.
+ * Since we can expect the data to remain the same for a certain period,
+ * we use a rate limiter mechanism before making a call to the device.
+ */
+ if (__ratelimit(&migf->pre_copy_rl_state)) {
+
+ ret = virtio_pci_admin_dev_parts_metadata_get(virtvdev->core_device.pdev,
+ VIRTIO_RESOURCE_OBJ_DEV_PARTS, migf->obj_id,
+ VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE,
+ &ctx_size);
+ if (ret)
+ goto err_state_unlock;
+ }
+
+ mutex_lock(&migf->lock);
+ if (migf->state == VIRTIOVF_MIGF_STATE_ERROR) {
+ ret = -ENODEV;
+ goto err_migf_unlock;
+ }
+
+ if (migf->pre_copy_initial_bytes > *pos) {
+ info.initial_bytes = migf->pre_copy_initial_bytes - *pos;
+ } else {
+ info.dirty_bytes = migf->max_pos - *pos;
+ if (!info.dirty_bytes)
+ end_of_data = true;
+ info.dirty_bytes += ctx_size;
+ }
+
+ if (!end_of_data || !ctx_size) {
+ mutex_unlock(&migf->lock);
+ goto done;
+ }
+
+ mutex_unlock(&migf->lock);
+ /*
+ * We finished transferring the current state and the device has a
+ * dirty state, read a new state.
+ */
+ ret = virtiovf_read_device_context_chunk(migf, ctx_size);
+ if (ret)
+ /*
+ * The machine is running, and context size could be grow, so no reason to mark
+ * the device state as VIRTIOVF_MIGF_STATE_ERROR.
+ */
+ goto err_state_unlock;
+
+done:
+ virtiovf_state_mutex_unlock(virtvdev);
+ if (copy_to_user((void __user *)arg, &info, minsz))
+ return -EFAULT;
+ return 0;
+
+err_migf_unlock:
+ mutex_unlock(&migf->lock);
+err_state_unlock:
+ virtiovf_state_mutex_unlock(virtvdev);
+ return ret;
+}
+
static const struct file_operations virtiovf_save_fops = {
.owner = THIS_MODULE,
.read = virtiovf_save_read,
+ .unlocked_ioctl = virtiovf_precopy_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
.release = virtiovf_release_file,
};
@@ -429,7 +574,7 @@ virtiovf_read_device_context_chunk(struct virtiovf_migration_file *migf,
int nent;
int ret;
- buf = virtiovf_alloc_data_buffer(migf, ctx_size);
+ buf = virtiovf_get_data_buffer(migf, ctx_size);
if (IS_ERR(buf))
return PTR_ERR(buf);
@@ -464,7 +609,7 @@ virtiovf_read_device_context_chunk(struct virtiovf_migration_file *migf,
goto out;
buf->length = res_size;
- header_buf = virtiovf_alloc_data_buffer(migf,
+ header_buf = virtiovf_get_data_buffer(migf,
sizeof(struct virtiovf_migration_header));
if (IS_ERR(header_buf)) {
ret = PTR_ERR(header_buf);
@@ -489,8 +634,43 @@ out:
return ret;
}
+static int
+virtiovf_pci_save_device_final_data(struct virtiovf_pci_core_device *virtvdev)
+{
+ struct virtiovf_migration_file *migf = virtvdev->saving_migf;
+ u32 ctx_size;
+ int ret;
+
+ if (migf->state == VIRTIOVF_MIGF_STATE_ERROR)
+ return -ENODEV;
+
+ ret = virtio_pci_admin_dev_parts_metadata_get(virtvdev->core_device.pdev,
+ VIRTIO_RESOURCE_OBJ_DEV_PARTS, migf->obj_id,
+ VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE,
+ &ctx_size);
+ if (ret)
+ goto err;
+
+ if (!ctx_size) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ ret = virtiovf_read_device_context_chunk(migf, ctx_size);
+ if (ret)
+ goto err;
+
+ migf->state = VIRTIOVF_MIGF_STATE_COMPLETE;
+ return 0;
+
+err:
+ migf->state = VIRTIOVF_MIGF_STATE_ERROR;
+ return ret;
+}
+
static struct virtiovf_migration_file *
-virtiovf_pci_save_device_data(struct virtiovf_pci_core_device *virtvdev)
+virtiovf_pci_save_device_data(struct virtiovf_pci_core_device *virtvdev,
+ bool pre_copy)
{
struct virtiovf_migration_file *migf;
u32 ctx_size;
@@ -541,6 +721,18 @@ virtiovf_pci_save_device_data(struct virtiovf_pci_core_device *virtvdev)
if (ret)
goto out_clean;
+ if (pre_copy) {
+ migf->pre_copy_initial_bytes = migf->max_pos;
+ /* Arbitrarily set the pre-copy rate limit to 1-second intervals */
+ ratelimit_state_init(&migf->pre_copy_rl_state, 1 * HZ, 1);
+ /* Prevent any rate messages upon its usage */
+ ratelimit_set_flags(&migf->pre_copy_rl_state,
+ RATELIMIT_MSG_ON_RELEASE);
+ migf->state = VIRTIOVF_MIGF_STATE_PRECOPY;
+ } else {
+ migf->state = VIRTIOVF_MIGF_STATE_COMPLETE;
+ }
+
return migf;
out_clean:
@@ -950,7 +1142,8 @@ virtiovf_pci_step_device_state_locked(struct virtiovf_pci_core_device *virtvdev,
return NULL;
}
- if (cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_RUNNING_P2P) {
+ if ((cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_RUNNING_P2P) ||
+ (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_PRE_COPY_P2P)) {
ret = virtio_pci_admin_mode_set(virtvdev->core_device.pdev,
BIT(VIRTIO_ADMIN_CMD_DEV_MODE_F_STOPPED));
if (ret)
@@ -958,7 +1151,8 @@ virtiovf_pci_step_device_state_locked(struct virtiovf_pci_core_device *virtvdev,
return NULL;
}
- if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_RUNNING) {
+ if ((cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_RUNNING) ||
+ (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_PRE_COPY)) {
ret = virtio_pci_admin_mode_set(virtvdev->core_device.pdev, 0);
if (ret)
return ERR_PTR(ret);
@@ -968,7 +1162,7 @@ virtiovf_pci_step_device_state_locked(struct virtiovf_pci_core_device *virtvdev,
if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_STOP_COPY) {
struct virtiovf_migration_file *migf;
- migf = virtiovf_pci_save_device_data(virtvdev);
+ migf = virtiovf_pci_save_device_data(virtvdev, false);
if (IS_ERR(migf))
return ERR_CAST(migf);
get_file(migf->filp);
@@ -976,7 +1170,9 @@ virtiovf_pci_step_device_state_locked(struct virtiovf_pci_core_device *virtvdev,
return migf->filp;
}
- if (cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP) {
+ if ((cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP) ||
+ (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_RUNNING) ||
+ (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_RUNNING_P2P)) {
virtiovf_disable_fds(virtvdev);
return NULL;
}
@@ -997,6 +1193,24 @@ virtiovf_pci_step_device_state_locked(struct virtiovf_pci_core_device *virtvdev,
return NULL;
}
+ if ((cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_PRE_COPY) ||
+ (cur == VFIO_DEVICE_STATE_RUNNING_P2P &&
+ new == VFIO_DEVICE_STATE_PRE_COPY_P2P)) {
+ struct virtiovf_migration_file *migf;
+
+ migf = virtiovf_pci_save_device_data(virtvdev, true);
+ if (IS_ERR(migf))
+ return ERR_CAST(migf);
+ get_file(migf->filp);
+ virtvdev->saving_migf = migf;
+ return migf->filp;
+ }
+
+ if (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_STOP_COPY) {
+ ret = virtiovf_pci_save_device_final_data(virtvdev);
+ return ret ? ERR_PTR(ret) : NULL;
+ }
+
/*
* vfio_mig_get_next_state() does not use arcs other than the above
*/
@@ -1101,7 +1315,8 @@ void virtiovf_set_migratable(struct virtiovf_pci_core_device *virtvdev)
spin_lock_init(&virtvdev->reset_lock);
virtvdev->core_device.vdev.migration_flags =
VFIO_MIGRATION_STOP_COPY |
- VFIO_MIGRATION_P2P;
+ VFIO_MIGRATION_P2P |
+ VFIO_MIGRATION_PRE_COPY;
virtvdev->core_device.vdev.mig_ops = &virtvdev_pci_mig_ops;
}