summaryrefslogtreecommitdiff
path: root/drivers/vhost
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/vhost')
-rw-r--r--drivers/vhost/iotlb.c8
-rw-r--r--drivers/vhost/vdpa.c153
-rw-r--r--drivers/vhost/vhost.c35
3 files changed, 119 insertions, 77 deletions
diff --git a/drivers/vhost/iotlb.c b/drivers/vhost/iotlb.c
index 1f0ca6e44410..0fd3f87e913c 100644
--- a/drivers/vhost/iotlb.c
+++ b/drivers/vhost/iotlb.c
@@ -149,7 +149,7 @@ EXPORT_SYMBOL_GPL(vhost_iotlb_free);
* vhost_iotlb_itree_first - return the first overlapped range
* @iotlb: the IOTLB
* @start: start of IOVA range
- * @end: end of IOVA range
+ * @last: last byte in IOVA range
*/
struct vhost_iotlb_map *
vhost_iotlb_itree_first(struct vhost_iotlb *iotlb, u64 start, u64 last)
@@ -159,10 +159,10 @@ vhost_iotlb_itree_first(struct vhost_iotlb *iotlb, u64 start, u64 last)
EXPORT_SYMBOL_GPL(vhost_iotlb_itree_first);
/**
- * vhost_iotlb_itree_first - return the next overlapped range
- * @iotlb: the IOTLB
+ * vhost_iotlb_itree_next - return the next overlapped range
+ * @map: the starting map node
* @start: start of IOVA range
- * @end: end of IOVA range
+ * @last: last byte IOVA range
*/
struct vhost_iotlb_map *
vhost_iotlb_itree_next(struct vhost_iotlb_map *map, u64 start, u64 last)
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index 3fab94f88894..62a9bb0efc55 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -353,8 +353,6 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
struct vdpa_callback cb;
struct vhost_virtqueue *vq;
struct vhost_vring_state s;
- u64 __user *featurep = argp;
- u64 features;
u32 idx;
long r;
@@ -381,18 +379,6 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
vq->last_avail_idx = vq_state.avail_index;
break;
- case VHOST_GET_BACKEND_FEATURES:
- features = VHOST_VDPA_BACKEND_FEATURES;
- if (copy_to_user(featurep, &features, sizeof(features)))
- return -EFAULT;
- return 0;
- case VHOST_SET_BACKEND_FEATURES:
- if (copy_from_user(&features, featurep, sizeof(features)))
- return -EFAULT;
- if (features & ~VHOST_VDPA_BACKEND_FEATURES)
- return -EOPNOTSUPP;
- vhost_set_backend_features(&v->vdev, features);
- return 0;
}
r = vhost_vring_ioctl(&v->vdev, cmd, argp);
@@ -440,8 +426,20 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
struct vhost_vdpa *v = filep->private_data;
struct vhost_dev *d = &v->vdev;
void __user *argp = (void __user *)arg;
+ u64 __user *featurep = argp;
+ u64 features;
long r;
+ if (cmd == VHOST_SET_BACKEND_FEATURES) {
+ r = copy_from_user(&features, featurep, sizeof(features));
+ if (r)
+ return r;
+ if (features & ~VHOST_VDPA_BACKEND_FEATURES)
+ return -EOPNOTSUPP;
+ vhost_set_backend_features(&v->vdev, features);
+ return 0;
+ }
+
mutex_lock(&d->mutex);
switch (cmd) {
@@ -476,6 +474,10 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
case VHOST_VDPA_SET_CONFIG_CALL:
r = vhost_vdpa_set_config_call(v, argp);
break;
+ case VHOST_GET_BACKEND_FEATURES:
+ features = VHOST_VDPA_BACKEND_FEATURES;
+ r = copy_to_user(featurep, &features, sizeof(features));
+ break;
default:
r = vhost_dev_ioctl(&v->vdev, cmd, argp);
if (r == -ENOIOCTLCMD)
@@ -563,6 +565,9 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
perm_to_iommu_flags(perm));
}
+ if (r)
+ vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1);
+
return r;
}
@@ -590,21 +595,19 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
struct vhost_dev *dev = &v->vdev;
struct vhost_iotlb *iotlb = dev->iotlb;
struct page **page_list;
- unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
+ struct vm_area_struct **vmas;
unsigned int gup_flags = FOLL_LONGTERM;
- unsigned long npages, cur_base, map_pfn, last_pfn = 0;
- unsigned long locked, lock_limit, pinned, i;
+ unsigned long map_pfn, last_pfn = 0;
+ unsigned long npages, lock_limit;
+ unsigned long i, nmap = 0;
u64 iova = msg->iova;
+ long pinned;
int ret = 0;
if (vhost_iotlb_itree_first(iotlb, msg->iova,
msg->iova + msg->size - 1))
return -EEXIST;
- page_list = (struct page **) __get_free_page(GFP_KERNEL);
- if (!page_list)
- return -ENOMEM;
-
if (msg->perm & VHOST_ACCESS_WO)
gup_flags |= FOLL_WRITE;
@@ -612,61 +615,86 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
if (!npages)
return -EINVAL;
+ page_list = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
+ vmas = kvmalloc_array(npages, sizeof(struct vm_area_struct *),
+ GFP_KERNEL);
+ if (!page_list || !vmas) {
+ ret = -ENOMEM;
+ goto free;
+ }
+
mmap_read_lock(dev->mm);
- locked = atomic64_add_return(npages, &dev->mm->pinned_vm);
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-
- if (locked > lock_limit) {
+ if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) {
ret = -ENOMEM;
- goto out;
+ goto unlock;
}
- cur_base = msg->uaddr & PAGE_MASK;
- iova &= PAGE_MASK;
+ pinned = pin_user_pages(msg->uaddr & PAGE_MASK, npages, gup_flags,
+ page_list, vmas);
+ if (npages != pinned) {
+ if (pinned < 0) {
+ ret = pinned;
+ } else {
+ unpin_user_pages(page_list, pinned);
+ ret = -ENOMEM;
+ }
+ goto unlock;
+ }
- while (npages) {
- pinned = min_t(unsigned long, npages, list_size);
- ret = pin_user_pages(cur_base, pinned,
- gup_flags, page_list, NULL);
- if (ret != pinned)
- goto out;
-
- if (!last_pfn)
- map_pfn = page_to_pfn(page_list[0]);
-
- for (i = 0; i < ret; i++) {
- unsigned long this_pfn = page_to_pfn(page_list[i]);
- u64 csize;
-
- if (last_pfn && (this_pfn != last_pfn + 1)) {
- /* Pin a contiguous chunk of memory */
- csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT;
- if (vhost_vdpa_map(v, iova, csize,
- map_pfn << PAGE_SHIFT,
- msg->perm))
- goto out;
- map_pfn = this_pfn;
- iova += csize;
+ iova &= PAGE_MASK;
+ map_pfn = page_to_pfn(page_list[0]);
+
+ /* One more iteration to avoid extra vdpa_map() call out of loop. */
+ for (i = 0; i <= npages; i++) {
+ unsigned long this_pfn;
+ u64 csize;
+
+ /* The last chunk may have no valid PFN next to it */
+ this_pfn = i < npages ? page_to_pfn(page_list[i]) : -1UL;
+
+ if (last_pfn && (this_pfn == -1UL ||
+ this_pfn != last_pfn + 1)) {
+ /* Pin a contiguous chunk of memory */
+ csize = last_pfn - map_pfn + 1;
+ ret = vhost_vdpa_map(v, iova, csize << PAGE_SHIFT,
+ map_pfn << PAGE_SHIFT,
+ msg->perm);
+ if (ret) {
+ /*
+ * Unpin the rest chunks of memory on the
+ * flight with no corresponding vdpa_map()
+ * calls having been made yet. On the other
+ * hand, vdpa_unmap() in the failure path
+ * is in charge of accounting the number of
+ * pinned pages for its own.
+ * This asymmetrical pattern of accounting
+ * is for efficiency to pin all pages at
+ * once, while there is no other callsite
+ * of vdpa_map() than here above.
+ */
+ unpin_user_pages(&page_list[nmap],
+ npages - nmap);
+ goto out;
}
-
- last_pfn = this_pfn;
+ atomic64_add(csize, &dev->mm->pinned_vm);
+ nmap += csize;
+ iova += csize << PAGE_SHIFT;
+ map_pfn = this_pfn;
}
-
- cur_base += ret << PAGE_SHIFT;
- npages -= ret;
+ last_pfn = this_pfn;
}
- /* Pin the rest chunk */
- ret = vhost_vdpa_map(v, iova, (last_pfn - map_pfn + 1) << PAGE_SHIFT,
- map_pfn << PAGE_SHIFT, msg->perm);
+ WARN_ON(nmap != npages);
out:
- if (ret) {
+ if (ret)
vhost_vdpa_unmap(v, msg->iova, msg->size);
- atomic64_sub(npages, &dev->mm->pinned_vm);
- }
+unlock:
mmap_read_unlock(dev->mm);
- free_page((unsigned long)page_list);
+free:
+ kvfree(vmas);
+ kvfree(page_list);
return ret;
}
@@ -808,6 +836,7 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep)
err_init_iotlb:
vhost_dev_cleanup(&v->vdev);
+ kfree(vqs);
err:
atomic_dec(&v->opened);
return r;
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 5857d4eec9d7..9ad45e1d27f0 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1290,6 +1290,11 @@ static bool vq_access_ok(struct vhost_virtqueue *vq, unsigned int num,
vring_used_t __user *used)
{
+ /* If an IOTLB device is present, the vring addresses are
+ * GIOVAs. Access validation occurs at prefetch time. */
+ if (vq->iotlb)
+ return true;
+
return access_ok(desc, vhost_get_desc_size(vq, num)) &&
access_ok(avail, vhost_get_avail_size(vq, num)) &&
access_ok(used, vhost_get_used_size(vq, num));
@@ -1365,6 +1370,20 @@ bool vhost_log_access_ok(struct vhost_dev *dev)
}
EXPORT_SYMBOL_GPL(vhost_log_access_ok);
+static bool vq_log_used_access_ok(struct vhost_virtqueue *vq,
+ void __user *log_base,
+ bool log_used,
+ u64 log_addr)
+{
+ /* If an IOTLB device is present, log_addr is a GIOVA that
+ * will never be logged by log_used(). */
+ if (vq->iotlb)
+ return true;
+
+ return !log_used || log_access_ok(log_base, log_addr,
+ vhost_get_used_size(vq, vq->num));
+}
+
/* Verify access for write logging. */
/* Caller should have vq mutex and device mutex */
static bool vq_log_access_ok(struct vhost_virtqueue *vq,
@@ -1372,8 +1391,7 @@ static bool vq_log_access_ok(struct vhost_virtqueue *vq,
{
return vq_memory_access_ok(log_base, vq->umem,
vhost_has_feature(vq, VHOST_F_LOG_ALL)) &&
- (!vq->log_used || log_access_ok(log_base, vq->log_addr,
- vhost_get_used_size(vq, vq->num)));
+ vq_log_used_access_ok(vq, log_base, vq->log_used, vq->log_addr);
}
/* Can we start vq? */
@@ -1383,10 +1401,6 @@ bool vhost_vq_access_ok(struct vhost_virtqueue *vq)
if (!vq_log_access_ok(vq, vq->log_base))
return false;
- /* Access validation occurs at prefetch time with IOTLB */
- if (vq->iotlb)
- return true;
-
return vq_access_ok(vq, vq->num, vq->desc, vq->avail, vq->used);
}
EXPORT_SYMBOL_GPL(vhost_vq_access_ok);
@@ -1516,10 +1530,9 @@ static long vhost_vring_set_addr(struct vhost_dev *d,
return -EINVAL;
/* Also validate log access for used ring if enabled. */
- if ((a.flags & (0x1 << VHOST_VRING_F_LOG)) &&
- !log_access_ok(vq->log_base, a.log_guest_addr,
- sizeof *vq->used +
- vq->num * sizeof *vq->used->ring))
+ if (!vq_log_used_access_ok(vq, vq->log_base,
+ a.flags & (0x1 << VHOST_VRING_F_LOG),
+ a.log_guest_addr))
return -EINVAL;
}
@@ -2537,7 +2550,7 @@ void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) {
r = vhost_update_used_flags(vq);
if (r)
- vq_err(vq, "Failed to enable notification at %p: %d\n",
+ vq_err(vq, "Failed to disable notification at %p: %d\n",
&vq->used->flags, r);
}
}