diff options
Diffstat (limited to 'drivers/vhost')
-rw-r--r-- | drivers/vhost/iotlb.c | 8 | ||||
-rw-r--r-- | drivers/vhost/vdpa.c | 153 | ||||
-rw-r--r-- | drivers/vhost/vhost.c | 35 |
3 files changed, 119 insertions, 77 deletions
diff --git a/drivers/vhost/iotlb.c b/drivers/vhost/iotlb.c index 1f0ca6e44410..0fd3f87e913c 100644 --- a/drivers/vhost/iotlb.c +++ b/drivers/vhost/iotlb.c @@ -149,7 +149,7 @@ EXPORT_SYMBOL_GPL(vhost_iotlb_free); * vhost_iotlb_itree_first - return the first overlapped range * @iotlb: the IOTLB * @start: start of IOVA range - * @end: end of IOVA range + * @last: last byte in IOVA range */ struct vhost_iotlb_map * vhost_iotlb_itree_first(struct vhost_iotlb *iotlb, u64 start, u64 last) @@ -159,10 +159,10 @@ vhost_iotlb_itree_first(struct vhost_iotlb *iotlb, u64 start, u64 last) EXPORT_SYMBOL_GPL(vhost_iotlb_itree_first); /** - * vhost_iotlb_itree_first - return the next overlapped range - * @iotlb: the IOTLB + * vhost_iotlb_itree_next - return the next overlapped range + * @map: the starting map node * @start: start of IOVA range - * @end: end of IOVA range + * @last: last byte IOVA range */ struct vhost_iotlb_map * vhost_iotlb_itree_next(struct vhost_iotlb_map *map, u64 start, u64 last) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 3fab94f88894..62a9bb0efc55 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -353,8 +353,6 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, struct vdpa_callback cb; struct vhost_virtqueue *vq; struct vhost_vring_state s; - u64 __user *featurep = argp; - u64 features; u32 idx; long r; @@ -381,18 +379,6 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, vq->last_avail_idx = vq_state.avail_index; break; - case VHOST_GET_BACKEND_FEATURES: - features = VHOST_VDPA_BACKEND_FEATURES; - if (copy_to_user(featurep, &features, sizeof(features))) - return -EFAULT; - return 0; - case VHOST_SET_BACKEND_FEATURES: - if (copy_from_user(&features, featurep, sizeof(features))) - return -EFAULT; - if (features & ~VHOST_VDPA_BACKEND_FEATURES) - return -EOPNOTSUPP; - vhost_set_backend_features(&v->vdev, features); - return 0; } r = vhost_vring_ioctl(&v->vdev, cmd, argp); @@ -440,8 +426,20 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, struct vhost_vdpa *v = filep->private_data; struct vhost_dev *d = &v->vdev; void __user *argp = (void __user *)arg; + u64 __user *featurep = argp; + u64 features; long r; + if (cmd == VHOST_SET_BACKEND_FEATURES) { + r = copy_from_user(&features, featurep, sizeof(features)); + if (r) + return r; + if (features & ~VHOST_VDPA_BACKEND_FEATURES) + return -EOPNOTSUPP; + vhost_set_backend_features(&v->vdev, features); + return 0; + } + mutex_lock(&d->mutex); switch (cmd) { @@ -476,6 +474,10 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, case VHOST_VDPA_SET_CONFIG_CALL: r = vhost_vdpa_set_config_call(v, argp); break; + case VHOST_GET_BACKEND_FEATURES: + features = VHOST_VDPA_BACKEND_FEATURES; + r = copy_to_user(featurep, &features, sizeof(features)); + break; default: r = vhost_dev_ioctl(&v->vdev, cmd, argp); if (r == -ENOIOCTLCMD) @@ -563,6 +565,9 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, perm_to_iommu_flags(perm)); } + if (r) + vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1); + return r; } @@ -590,21 +595,19 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, struct vhost_dev *dev = &v->vdev; struct vhost_iotlb *iotlb = dev->iotlb; struct page **page_list; - unsigned long list_size = PAGE_SIZE / sizeof(struct page *); + struct vm_area_struct **vmas; unsigned int gup_flags = FOLL_LONGTERM; - unsigned long npages, cur_base, map_pfn, last_pfn = 0; - unsigned long locked, lock_limit, pinned, i; + unsigned long map_pfn, last_pfn = 0; + unsigned long npages, lock_limit; + unsigned long i, nmap = 0; u64 iova = msg->iova; + long pinned; int ret = 0; if (vhost_iotlb_itree_first(iotlb, msg->iova, msg->iova + msg->size - 1)) return -EEXIST; - page_list = (struct page **) __get_free_page(GFP_KERNEL); - if (!page_list) - return -ENOMEM; - if (msg->perm & VHOST_ACCESS_WO) gup_flags |= FOLL_WRITE; @@ -612,61 +615,86 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, if (!npages) return -EINVAL; + page_list = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL); + vmas = kvmalloc_array(npages, sizeof(struct vm_area_struct *), + GFP_KERNEL); + if (!page_list || !vmas) { + ret = -ENOMEM; + goto free; + } + mmap_read_lock(dev->mm); - locked = atomic64_add_return(npages, &dev->mm->pinned_vm); lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - - if (locked > lock_limit) { + if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) { ret = -ENOMEM; - goto out; + goto unlock; } - cur_base = msg->uaddr & PAGE_MASK; - iova &= PAGE_MASK; + pinned = pin_user_pages(msg->uaddr & PAGE_MASK, npages, gup_flags, + page_list, vmas); + if (npages != pinned) { + if (pinned < 0) { + ret = pinned; + } else { + unpin_user_pages(page_list, pinned); + ret = -ENOMEM; + } + goto unlock; + } - while (npages) { - pinned = min_t(unsigned long, npages, list_size); - ret = pin_user_pages(cur_base, pinned, - gup_flags, page_list, NULL); - if (ret != pinned) - goto out; - - if (!last_pfn) - map_pfn = page_to_pfn(page_list[0]); - - for (i = 0; i < ret; i++) { - unsigned long this_pfn = page_to_pfn(page_list[i]); - u64 csize; - - if (last_pfn && (this_pfn != last_pfn + 1)) { - /* Pin a contiguous chunk of memory */ - csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT; - if (vhost_vdpa_map(v, iova, csize, - map_pfn << PAGE_SHIFT, - msg->perm)) - goto out; - map_pfn = this_pfn; - iova += csize; + iova &= PAGE_MASK; + map_pfn = page_to_pfn(page_list[0]); + + /* One more iteration to avoid extra vdpa_map() call out of loop. */ + for (i = 0; i <= npages; i++) { + unsigned long this_pfn; + u64 csize; + + /* The last chunk may have no valid PFN next to it */ + this_pfn = i < npages ? page_to_pfn(page_list[i]) : -1UL; + + if (last_pfn && (this_pfn == -1UL || + this_pfn != last_pfn + 1)) { + /* Pin a contiguous chunk of memory */ + csize = last_pfn - map_pfn + 1; + ret = vhost_vdpa_map(v, iova, csize << PAGE_SHIFT, + map_pfn << PAGE_SHIFT, + msg->perm); + if (ret) { + /* + * Unpin the rest chunks of memory on the + * flight with no corresponding vdpa_map() + * calls having been made yet. On the other + * hand, vdpa_unmap() in the failure path + * is in charge of accounting the number of + * pinned pages for its own. + * This asymmetrical pattern of accounting + * is for efficiency to pin all pages at + * once, while there is no other callsite + * of vdpa_map() than here above. + */ + unpin_user_pages(&page_list[nmap], + npages - nmap); + goto out; } - - last_pfn = this_pfn; + atomic64_add(csize, &dev->mm->pinned_vm); + nmap += csize; + iova += csize << PAGE_SHIFT; + map_pfn = this_pfn; } - - cur_base += ret << PAGE_SHIFT; - npages -= ret; + last_pfn = this_pfn; } - /* Pin the rest chunk */ - ret = vhost_vdpa_map(v, iova, (last_pfn - map_pfn + 1) << PAGE_SHIFT, - map_pfn << PAGE_SHIFT, msg->perm); + WARN_ON(nmap != npages); out: - if (ret) { + if (ret) vhost_vdpa_unmap(v, msg->iova, msg->size); - atomic64_sub(npages, &dev->mm->pinned_vm); - } +unlock: mmap_read_unlock(dev->mm); - free_page((unsigned long)page_list); +free: + kvfree(vmas); + kvfree(page_list); return ret; } @@ -808,6 +836,7 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep) err_init_iotlb: vhost_dev_cleanup(&v->vdev); + kfree(vqs); err: atomic_dec(&v->opened); return r; diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 5857d4eec9d7..9ad45e1d27f0 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -1290,6 +1290,11 @@ static bool vq_access_ok(struct vhost_virtqueue *vq, unsigned int num, vring_used_t __user *used) { + /* If an IOTLB device is present, the vring addresses are + * GIOVAs. Access validation occurs at prefetch time. */ + if (vq->iotlb) + return true; + return access_ok(desc, vhost_get_desc_size(vq, num)) && access_ok(avail, vhost_get_avail_size(vq, num)) && access_ok(used, vhost_get_used_size(vq, num)); @@ -1365,6 +1370,20 @@ bool vhost_log_access_ok(struct vhost_dev *dev) } EXPORT_SYMBOL_GPL(vhost_log_access_ok); +static bool vq_log_used_access_ok(struct vhost_virtqueue *vq, + void __user *log_base, + bool log_used, + u64 log_addr) +{ + /* If an IOTLB device is present, log_addr is a GIOVA that + * will never be logged by log_used(). */ + if (vq->iotlb) + return true; + + return !log_used || log_access_ok(log_base, log_addr, + vhost_get_used_size(vq, vq->num)); +} + /* Verify access for write logging. */ /* Caller should have vq mutex and device mutex */ static bool vq_log_access_ok(struct vhost_virtqueue *vq, @@ -1372,8 +1391,7 @@ static bool vq_log_access_ok(struct vhost_virtqueue *vq, { return vq_memory_access_ok(log_base, vq->umem, vhost_has_feature(vq, VHOST_F_LOG_ALL)) && - (!vq->log_used || log_access_ok(log_base, vq->log_addr, - vhost_get_used_size(vq, vq->num))); + vq_log_used_access_ok(vq, log_base, vq->log_used, vq->log_addr); } /* Can we start vq? */ @@ -1383,10 +1401,6 @@ bool vhost_vq_access_ok(struct vhost_virtqueue *vq) if (!vq_log_access_ok(vq, vq->log_base)) return false; - /* Access validation occurs at prefetch time with IOTLB */ - if (vq->iotlb) - return true; - return vq_access_ok(vq, vq->num, vq->desc, vq->avail, vq->used); } EXPORT_SYMBOL_GPL(vhost_vq_access_ok); @@ -1516,10 +1530,9 @@ static long vhost_vring_set_addr(struct vhost_dev *d, return -EINVAL; /* Also validate log access for used ring if enabled. */ - if ((a.flags & (0x1 << VHOST_VRING_F_LOG)) && - !log_access_ok(vq->log_base, a.log_guest_addr, - sizeof *vq->used + - vq->num * sizeof *vq->used->ring)) + if (!vq_log_used_access_ok(vq, vq->log_base, + a.flags & (0x1 << VHOST_VRING_F_LOG), + a.log_guest_addr)) return -EINVAL; } @@ -2537,7 +2550,7 @@ void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) { r = vhost_update_used_flags(vq); if (r) - vq_err(vq, "Failed to enable notification at %p: %d\n", + vq_err(vq, "Failed to disable notification at %p: %d\n", &vq->used->flags, r); } } |