diff options
Diffstat (limited to 'drivers/vhost/vhost.c')
-rw-r--r-- | drivers/vhost/vhost.c | 213 |
1 files changed, 166 insertions, 47 deletions
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index ea966b356352..c14c42b95ab8 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -37,6 +37,8 @@ enum { VHOST_MEMORY_F_LOG = 0x1, }; +static unsigned vhost_zcopy_mask __read_mostly; + #define vhost_used_event(vq) ((u16 __user *)&vq->avail->ring[vq->num]) #define vhost_avail_event(vq) ((u16 __user *)&vq->used->ring[vq->num]) @@ -179,6 +181,9 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->call_ctx = NULL; vq->call = NULL; vq->log_ctx = NULL; + vq->upend_idx = 0; + vq->done_idx = 0; + vq->ubufs = NULL; } static int vhost_worker(void *data) @@ -225,10 +230,28 @@ static int vhost_worker(void *data) return 0; } +static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq) +{ + kfree(vq->indirect); + vq->indirect = NULL; + kfree(vq->log); + vq->log = NULL; + kfree(vq->heads); + vq->heads = NULL; + kfree(vq->ubuf_info); + vq->ubuf_info = NULL; +} + +void vhost_enable_zcopy(int vq) +{ + vhost_zcopy_mask |= 0x1 << vq; +} + /* Helper to allocate iovec buffers for all vqs. */ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev) { int i; + bool zcopy; for (i = 0; i < dev->nvqs; ++i) { dev->vqs[i].indirect = kmalloc(sizeof *dev->vqs[i].indirect * @@ -237,19 +260,21 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev) GFP_KERNEL); dev->vqs[i].heads = kmalloc(sizeof *dev->vqs[i].heads * UIO_MAXIOV, GFP_KERNEL); - + zcopy = vhost_zcopy_mask & (0x1 << i); + if (zcopy) + dev->vqs[i].ubuf_info = + kmalloc(sizeof *dev->vqs[i].ubuf_info * + UIO_MAXIOV, GFP_KERNEL); if (!dev->vqs[i].indirect || !dev->vqs[i].log || - !dev->vqs[i].heads) + !dev->vqs[i].heads || + (zcopy && !dev->vqs[i].ubuf_info)) goto err_nomem; } return 0; err_nomem: - for (; i >= 0; --i) { - kfree(dev->vqs[i].indirect); - kfree(dev->vqs[i].log); - kfree(dev->vqs[i].heads); - } + for (; i >= 0; --i) + vhost_vq_free_iovecs(&dev->vqs[i]); return -ENOMEM; } @@ -257,14 +282,8 @@ static void vhost_dev_free_iovecs(struct vhost_dev *dev) { int i; - for (i = 0; i < dev->nvqs; ++i) { - kfree(dev->vqs[i].indirect); - dev->vqs[i].indirect = NULL; - kfree(dev->vqs[i].log); - dev->vqs[i].log = NULL; - kfree(dev->vqs[i].heads); - dev->vqs[i].heads = NULL; - } + for (i = 0; i < dev->nvqs; ++i) + vhost_vq_free_iovecs(&dev->vqs[i]); } long vhost_dev_init(struct vhost_dev *dev, @@ -287,6 +306,7 @@ long vhost_dev_init(struct vhost_dev *dev, dev->vqs[i].log = NULL; dev->vqs[i].indirect = NULL; dev->vqs[i].heads = NULL; + dev->vqs[i].ubuf_info = NULL; dev->vqs[i].dev = dev; mutex_init(&dev->vqs[i].mutex); vhost_vq_reset(dev, dev->vqs + i); @@ -390,6 +410,30 @@ long vhost_dev_reset_owner(struct vhost_dev *dev) return 0; } +/* In case of DMA done not in order in lower device driver for some reason. + * upend_idx is used to track end of used idx, done_idx is used to track head + * of used idx. Once lower device DMA done contiguously, we will signal KVM + * guest used idx. + */ +int vhost_zerocopy_signal_used(struct vhost_virtqueue *vq) +{ + int i; + int j = 0; + + for (i = vq->done_idx; i != vq->upend_idx; i = (i + 1) % UIO_MAXIOV) { + if ((vq->heads[i].len == VHOST_DMA_DONE_LEN)) { + vq->heads[i].len = VHOST_DMA_CLEAR_LEN; + vhost_add_used_and_signal(vq->dev, vq, + vq->heads[i].id, 0); + ++j; + } else + break; + } + if (j) + vq->done_idx = i; + return j; +} + /* Caller should have device mutex */ void vhost_dev_cleanup(struct vhost_dev *dev) { @@ -400,6 +444,13 @@ void vhost_dev_cleanup(struct vhost_dev *dev) vhost_poll_stop(&dev->vqs[i].poll); vhost_poll_flush(&dev->vqs[i].poll); } + /* Wait for all lower device DMAs done. */ + if (dev->vqs[i].ubufs) + vhost_ubuf_put_and_wait(dev->vqs[i].ubufs); + + /* Signal guest as appropriate. */ + vhost_zerocopy_signal_used(&dev->vqs[i]); + if (dev->vqs[i].error_ctx) eventfd_ctx_put(dev->vqs[i].error_ctx); if (dev->vqs[i].error) @@ -578,17 +629,6 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) return 0; } -static int init_used(struct vhost_virtqueue *vq, - struct vring_used __user *used) -{ - int r = put_user(vq->used_flags, &used->flags); - - if (r) - return r; - vq->signalled_used_valid = false; - return get_user(vq->last_used_idx, &used->idx); -} - static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp) { struct file *eventfp, *filep = NULL, @@ -701,10 +741,6 @@ static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp) } } - r = init_used(vq, (struct vring_used __user *)(unsigned long) - a.used_user_addr); - if (r) - break; vq->log_used = !!(a.flags & (0x1 << VHOST_VRING_F_LOG)); vq->desc = (void __user *)(unsigned long)a.desc_user_addr; vq->avail = (void __user *)(unsigned long)a.avail_user_addr; @@ -959,6 +995,57 @@ int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, return 0; } +static int vhost_update_used_flags(struct vhost_virtqueue *vq) +{ + void __user *used; + if (__put_user(vq->used_flags, &vq->used->flags) < 0) + return -EFAULT; + if (unlikely(vq->log_used)) { + /* Make sure the flag is seen before log. */ + smp_wmb(); + /* Log used flag write. */ + used = &vq->used->flags; + log_write(vq->log_base, vq->log_addr + + (used - (void __user *)vq->used), + sizeof vq->used->flags); + if (vq->log_ctx) + eventfd_signal(vq->log_ctx, 1); + } + return 0; +} + +static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event) +{ + if (__put_user(vq->avail_idx, vhost_avail_event(vq))) + return -EFAULT; + if (unlikely(vq->log_used)) { + void __user *used; + /* Make sure the event is seen before log. */ + smp_wmb(); + /* Log avail event write */ + used = vhost_avail_event(vq); + log_write(vq->log_base, vq->log_addr + + (used - (void __user *)vq->used), + sizeof *vhost_avail_event(vq)); + if (vq->log_ctx) + eventfd_signal(vq->log_ctx, 1); + } + return 0; +} + +int vhost_init_used(struct vhost_virtqueue *vq) +{ + int r; + if (!vq->private_data) + return 0; + + r = vhost_update_used_flags(vq); + if (r) + return r; + vq->signalled_used_valid = false; + return get_user(vq->last_used_idx, &vq->used->idx); +} + static int translate_desc(struct vhost_dev *dev, u64 addr, u32 len, struct iovec iov[], int iov_size) { @@ -1430,34 +1517,20 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) return false; vq->used_flags &= ~VRING_USED_F_NO_NOTIFY; if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) { - r = put_user(vq->used_flags, &vq->used->flags); + r = vhost_update_used_flags(vq); if (r) { vq_err(vq, "Failed to enable notification at %p: %d\n", &vq->used->flags, r); return false; } } else { - r = put_user(vq->avail_idx, vhost_avail_event(vq)); + r = vhost_update_avail_event(vq, vq->avail_idx); if (r) { vq_err(vq, "Failed to update avail event index at %p: %d\n", vhost_avail_event(vq), r); return false; } } - if (unlikely(vq->log_used)) { - void __user *used; - /* Make sure data is seen before log. */ - smp_wmb(); - used = vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX) ? - &vq->used->flags : vhost_avail_event(vq); - /* Log used flags or event index entry write. Both are 16 bit - * fields. */ - log_write(vq->log_base, vq->log_addr + - (used - (void __user *)vq->used), - sizeof(u16)); - if (vq->log_ctx) - eventfd_signal(vq->log_ctx, 1); - } /* They could have slipped one in as we were doing that: make * sure it's written, then check again. */ smp_mb(); @@ -1480,9 +1553,55 @@ void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) return; vq->used_flags |= VRING_USED_F_NO_NOTIFY; if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) { - r = put_user(vq->used_flags, &vq->used->flags); + r = vhost_update_used_flags(vq); if (r) vq_err(vq, "Failed to enable notification at %p: %d\n", &vq->used->flags, r); } } + +static void vhost_zerocopy_done_signal(struct kref *kref) +{ + struct vhost_ubuf_ref *ubufs = container_of(kref, struct vhost_ubuf_ref, + kref); + wake_up(&ubufs->wait); +} + +struct vhost_ubuf_ref *vhost_ubuf_alloc(struct vhost_virtqueue *vq, + bool zcopy) +{ + struct vhost_ubuf_ref *ubufs; + /* No zero copy backend? Nothing to count. */ + if (!zcopy) + return NULL; + ubufs = kmalloc(sizeof *ubufs, GFP_KERNEL); + if (!ubufs) + return ERR_PTR(-ENOMEM); + kref_init(&ubufs->kref); + init_waitqueue_head(&ubufs->wait); + ubufs->vq = vq; + return ubufs; +} + +void vhost_ubuf_put(struct vhost_ubuf_ref *ubufs) +{ + kref_put(&ubufs->kref, vhost_zerocopy_done_signal); +} + +void vhost_ubuf_put_and_wait(struct vhost_ubuf_ref *ubufs) +{ + kref_put(&ubufs->kref, vhost_zerocopy_done_signal); + wait_event(ubufs->wait, !atomic_read(&ubufs->kref.refcount)); + kfree(ubufs); +} + +void vhost_zerocopy_callback(void *arg) +{ + struct ubuf_info *ubuf = arg; + struct vhost_ubuf_ref *ubufs = ubuf->arg; + struct vhost_virtqueue *vq = ubufs->vq; + + /* set len = 1 to mark this desc buffers done DMA */ + vq->heads[ubuf->desc].len = VHOST_DMA_DONE_LEN; + kref_put(&ubufs->kref, vhost_zerocopy_done_signal); +} |