From d3eeb1d77c5d0af9df442db63722928238310a86 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 12 Nov 2019 16:22:31 -0400 Subject: xen/gntdev: use mmu_interval_notifier_insert gntdev simply wants to monitor a specific VMA for any notifier events, this can be done straightforwardly using mmu_interval_notifier_insert() over the VMA's VA range. The notifier should be attached until the original VMA is destroyed. It is unclear if any of this is even sane, but at least a lot of duplicate code is removed. Link: https://lore.kernel.org/r/20191112202231.3856-15-jgg@ziepe.ca Reviewed-by: Boris Ostrovsky Signed-off-by: Jason Gunthorpe --- drivers/xen/gntdev.c | 179 ++++++++++++++------------------------------------- 1 file changed, 48 insertions(+), 131 deletions(-) (limited to 'drivers/xen/gntdev.c') diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 81401f386c9c..a04ddf2a68af 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -63,7 +63,6 @@ MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by " static atomic_t pages_mapped = ATOMIC_INIT(0); static int use_ptemod; -#define populate_freeable_maps use_ptemod static int unmap_grant_pages(struct gntdev_grant_map *map, int offset, int pages); @@ -249,12 +248,6 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map) evtchn_put(map->notify.event); } - if (populate_freeable_maps && priv) { - mutex_lock(&priv->lock); - list_del(&map->next); - mutex_unlock(&priv->lock); - } - if (map->pages && !use_ptemod) unmap_grant_pages(map, 0, map->count); gntdev_free_map(map); @@ -444,16 +437,9 @@ static void gntdev_vma_close(struct vm_area_struct *vma) pr_debug("gntdev_vma_close %p\n", vma); if (use_ptemod) { - /* It is possible that an mmu notifier could be running - * concurrently, so take priv->lock to ensure that the vma won't - * vanishing during the unmap_grant_pages call, since we will - * spin here until that completes. Such a concurrent call will - * not do any unmapping, since that has been done prior to - * closing the vma, but it may still iterate the unmap_ops list. - */ - mutex_lock(&priv->lock); + WARN_ON(map->vma != vma); + mmu_interval_notifier_remove(&map->notifier); map->vma = NULL; - mutex_unlock(&priv->lock); } vma->vm_private_data = NULL; gntdev_put_map(priv, map); @@ -475,109 +461,44 @@ static const struct vm_operations_struct gntdev_vmops = { /* ------------------------------------------------------------------ */ -static bool in_range(struct gntdev_grant_map *map, - unsigned long start, unsigned long end) -{ - if (!map->vma) - return false; - if (map->vma->vm_start >= end) - return false; - if (map->vma->vm_end <= start) - return false; - - return true; -} - -static int unmap_if_in_range(struct gntdev_grant_map *map, - unsigned long start, unsigned long end, - bool blockable) +static bool gntdev_invalidate(struct mmu_interval_notifier *mn, + const struct mmu_notifier_range *range, + unsigned long cur_seq) { + struct gntdev_grant_map *map = + container_of(mn, struct gntdev_grant_map, notifier); unsigned long mstart, mend; int err; - if (!in_range(map, start, end)) - return 0; + if (!mmu_notifier_range_blockable(range)) + return false; - if (!blockable) - return -EAGAIN; + /* + * If the VMA is split or otherwise changed the notifier is not + * updated, but we don't want to process VA's outside the modified + * VMA. FIXME: It would be much more understandable to just prevent + * modifying the VMA in the first place. + */ + if (map->vma->vm_start >= range->end || + map->vma->vm_end <= range->start) + return true; - mstart = max(start, map->vma->vm_start); - mend = min(end, map->vma->vm_end); + mstart = max(range->start, map->vma->vm_start); + mend = min(range->end, map->vma->vm_end); pr_debug("map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n", map->index, map->count, map->vma->vm_start, map->vma->vm_end, - start, end, mstart, mend); + range->start, range->end, mstart, mend); err = unmap_grant_pages(map, (mstart - map->vma->vm_start) >> PAGE_SHIFT, (mend - mstart) >> PAGE_SHIFT); WARN_ON(err); - return 0; -} - -static int mn_invl_range_start(struct mmu_notifier *mn, - const struct mmu_notifier_range *range) -{ - struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn); - struct gntdev_grant_map *map; - int ret = 0; - - if (mmu_notifier_range_blockable(range)) - mutex_lock(&priv->lock); - else if (!mutex_trylock(&priv->lock)) - return -EAGAIN; - - list_for_each_entry(map, &priv->maps, next) { - ret = unmap_if_in_range(map, range->start, range->end, - mmu_notifier_range_blockable(range)); - if (ret) - goto out_unlock; - } - list_for_each_entry(map, &priv->freeable_maps, next) { - ret = unmap_if_in_range(map, range->start, range->end, - mmu_notifier_range_blockable(range)); - if (ret) - goto out_unlock; - } - -out_unlock: - mutex_unlock(&priv->lock); - - return ret; -} - -static void mn_release(struct mmu_notifier *mn, - struct mm_struct *mm) -{ - struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn); - struct gntdev_grant_map *map; - int err; - - mutex_lock(&priv->lock); - list_for_each_entry(map, &priv->maps, next) { - if (!map->vma) - continue; - pr_debug("map %d+%d (%lx %lx)\n", - map->index, map->count, - map->vma->vm_start, map->vma->vm_end); - err = unmap_grant_pages(map, /* offset */ 0, map->count); - WARN_ON(err); - } - list_for_each_entry(map, &priv->freeable_maps, next) { - if (!map->vma) - continue; - pr_debug("map %d+%d (%lx %lx)\n", - map->index, map->count, - map->vma->vm_start, map->vma->vm_end); - err = unmap_grant_pages(map, /* offset */ 0, map->count); - WARN_ON(err); - } - mutex_unlock(&priv->lock); + return true; } -static const struct mmu_notifier_ops gntdev_mmu_ops = { - .release = mn_release, - .invalidate_range_start = mn_invl_range_start, +static const struct mmu_interval_notifier_ops gntdev_mmu_ops = { + .invalidate = gntdev_invalidate, }; /* ------------------------------------------------------------------ */ @@ -592,7 +513,6 @@ static int gntdev_open(struct inode *inode, struct file *flip) return -ENOMEM; INIT_LIST_HEAD(&priv->maps); - INIT_LIST_HEAD(&priv->freeable_maps); mutex_init(&priv->lock); #ifdef CONFIG_XEN_GNTDEV_DMABUF @@ -604,17 +524,6 @@ static int gntdev_open(struct inode *inode, struct file *flip) } #endif - if (use_ptemod) { - priv->mm = get_task_mm(current); - if (!priv->mm) { - kfree(priv); - return -ENOMEM; - } - priv->mn.ops = &gntdev_mmu_ops; - ret = mmu_notifier_register(&priv->mn, priv->mm); - mmput(priv->mm); - } - if (ret) { kfree(priv); return ret; @@ -644,16 +553,12 @@ static int gntdev_release(struct inode *inode, struct file *flip) list_del(&map->next); gntdev_put_map(NULL /* already removed */, map); } - WARN_ON(!list_empty(&priv->freeable_maps)); mutex_unlock(&priv->lock); #ifdef CONFIG_XEN_GNTDEV_DMABUF gntdev_dmabuf_fini(priv->dmabuf_priv); #endif - if (use_ptemod) - mmu_notifier_unregister(&priv->mn, priv->mm); - kfree(priv); return 0; } @@ -714,8 +619,6 @@ static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv, map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count); if (map) { list_del(&map->next); - if (populate_freeable_maps) - list_add_tail(&map->next, &priv->freeable_maps); err = 0; } mutex_unlock(&priv->lock); @@ -1087,11 +990,6 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) goto unlock_out; if (use_ptemod && map->vma) goto unlock_out; - if (use_ptemod && priv->mm != vma->vm_mm) { - pr_warn("Huh? Other mm?\n"); - goto unlock_out; - } - refcount_inc(&map->users); vma->vm_ops = &gntdev_vmops; @@ -1102,10 +1000,6 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) vma->vm_flags |= VM_DONTCOPY; vma->vm_private_data = map; - - if (use_ptemod) - map->vma = vma; - if (map->flags) { if ((vma->vm_flags & VM_WRITE) && (map->flags & GNTMAP_readonly)) @@ -1116,8 +1010,28 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) map->flags |= GNTMAP_readonly; } + if (use_ptemod) { + map->vma = vma; + err = mmu_interval_notifier_insert_locked( + &map->notifier, vma->vm_mm, vma->vm_start, + vma->vm_end - vma->vm_start, &gntdev_mmu_ops); + if (err) + goto out_unlock_put; + } mutex_unlock(&priv->lock); + /* + * gntdev takes the address of the PTE in find_grant_ptes() and passes + * it to the hypervisor in gntdev_map_grant_pages(). The purpose of + * the notifier is to prevent the hypervisor pointer to the PTE from + * going stale. + * + * Since this vma's mappings can't be touched without the mmap_sem, + * and we are holding it now, there is no need for the notifier_range + * locking pattern. + */ + mmu_interval_read_begin(&map->notifier); + if (use_ptemod) { map->pages_vm_start = vma->vm_start; err = apply_to_page_range(vma->vm_mm, vma->vm_start, @@ -1166,8 +1080,11 @@ out_unlock_put: mutex_unlock(&priv->lock); out_put_map: if (use_ptemod) { - map->vma = NULL; unmap_grant_pages(map, 0, map->count); + if (map->vma) { + mmu_interval_notifier_remove(&map->notifier); + map->vma = NULL; + } } gntdev_put_map(priv, map); return err; -- cgit v1.2.3 From d41b26d81a83e04500e926fbab746ae87c20bb0e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 11 Nov 2019 12:20:09 +0000 Subject: xen/gntdev: remove redundant non-zero check on ret The non-zero check on ret is always going to be false because ret was initialized as zero and the only place it is set to non-zero contains a return path before the non-zero check. Hence the check is redundant and can be removed. [ jgross@suse.com: limit scope of ret ] Addresses-Coverity: ("Logically dead code") Signed-off-by: Colin Ian King Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross --- drivers/xen/gntdev.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'drivers/xen/gntdev.c') diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index a04ddf2a68af..3d40f8074dbb 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -506,7 +506,6 @@ static const struct mmu_interval_notifier_ops gntdev_mmu_ops = { static int gntdev_open(struct inode *inode, struct file *flip) { struct gntdev_priv *priv; - int ret = 0; priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) @@ -518,16 +517,12 @@ static int gntdev_open(struct inode *inode, struct file *flip) #ifdef CONFIG_XEN_GNTDEV_DMABUF priv->dmabuf_priv = gntdev_dmabuf_init(flip); if (IS_ERR(priv->dmabuf_priv)) { - ret = PTR_ERR(priv->dmabuf_priv); - kfree(priv); - return ret; - } -#endif + int ret = PTR_ERR(priv->dmabuf_priv); - if (ret) { kfree(priv); return ret; } +#endif flip->private_data = priv; #ifdef CONFIG_XEN_GRANT_DMA_ALLOC -- cgit v1.2.3 From 3b06ac6707c196b5036fe013c460da86c9060085 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 7 Nov 2019 12:15:45 +0100 Subject: xen/gntdev: replace global limit of mapped pages by limit per call Today there is a global limit of pages mapped via /dev/xen/gntdev set to 1 million pages per default. There is no reason why that limit is existing, as total number of grant mappings is limited by the hypervisor anyway and preferring kernel mappings over userspace ones doesn't make sense. It should be noted that the gntdev device is usable by root only. Additionally checking of that limit is fragile, as the number of pages to map via one call is specified in a 32-bit unsigned variable which isn't tested to stay within reasonable limits (the only test is the value to be <= zero, which basically excludes only calls without any mapping requested). So trying to map e.g. 0xffff0000 pages while already nearly 1000000 pages are mapped will effectively lower the global number of mapped pages such that a parallel call mapping a reasonable amount of pages can succeed in spite of the global limit being violated. So drop the global limit and introduce per call limit instead. This per call limit (default: 65536 grant mappings) protects against allocating insane large arrays in the kernel for doing a hypercall which will fail anyway in case a user is e.g. trying to map billions of pages. Signed-off-by: Juergen Gross Reviewed-by: Oleksandr Andrushchenko Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross --- drivers/xen/gntdev-common.h | 2 +- drivers/xen/gntdev-dmabuf.c | 11 +++-------- drivers/xen/gntdev.c | 24 +++++++----------------- 3 files changed, 11 insertions(+), 26 deletions(-) (limited to 'drivers/xen/gntdev.c') diff --git a/drivers/xen/gntdev-common.h b/drivers/xen/gntdev-common.h index 91e44c04f787..9a3960ecff6c 100644 --- a/drivers/xen/gntdev-common.h +++ b/drivers/xen/gntdev-common.h @@ -81,7 +81,7 @@ void gntdev_add_map(struct gntdev_priv *priv, struct gntdev_grant_map *add); void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map); -bool gntdev_account_mapped_pages(int count); +bool gntdev_test_page_count(unsigned int count); int gntdev_map_grant_pages(struct gntdev_grant_map *map); diff --git a/drivers/xen/gntdev-dmabuf.c b/drivers/xen/gntdev-dmabuf.c index 2c4f324f8626..63f0857bf62d 100644 --- a/drivers/xen/gntdev-dmabuf.c +++ b/drivers/xen/gntdev-dmabuf.c @@ -446,7 +446,7 @@ dmabuf_exp_alloc_backing_storage(struct gntdev_priv *priv, int dmabuf_flags, { struct gntdev_grant_map *map; - if (unlikely(count <= 0)) + if (unlikely(gntdev_test_page_count(count))) return ERR_PTR(-EINVAL); if ((dmabuf_flags & GNTDEV_DMA_FLAG_WC) && @@ -459,11 +459,6 @@ dmabuf_exp_alloc_backing_storage(struct gntdev_priv *priv, int dmabuf_flags, if (!map) return ERR_PTR(-ENOMEM); - if (unlikely(gntdev_account_mapped_pages(count))) { - pr_debug("can't map %d pages: over limit\n", count); - gntdev_put_map(NULL, map); - return ERR_PTR(-ENOMEM); - } return map; } @@ -771,7 +766,7 @@ long gntdev_ioctl_dmabuf_exp_from_refs(struct gntdev_priv *priv, int use_ptemod, if (copy_from_user(&op, u, sizeof(op)) != 0) return -EFAULT; - if (unlikely(op.count <= 0)) + if (unlikely(gntdev_test_page_count(op.count))) return -EINVAL; refs = kcalloc(op.count, sizeof(*refs), GFP_KERNEL); @@ -818,7 +813,7 @@ long gntdev_ioctl_dmabuf_imp_to_refs(struct gntdev_priv *priv, if (copy_from_user(&op, u, sizeof(op)) != 0) return -EFAULT; - if (unlikely(op.count <= 0)) + if (unlikely(gntdev_test_page_count(op.count))) return -EINVAL; gntdev_dmabuf = dmabuf_imp_to_refs(priv->dmabuf_priv, diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 3d40f8074dbb..ad621ec1912c 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -55,12 +55,10 @@ MODULE_AUTHOR("Derek G. Murray , " "Gerd Hoffmann "); MODULE_DESCRIPTION("User-space granted page access driver"); -static int limit = 1024*1024; -module_param(limit, int, 0644); -MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by " - "the gntdev device"); - -static atomic_t pages_mapped = ATOMIC_INIT(0); +static unsigned int limit = 64*1024; +module_param(limit, uint, 0644); +MODULE_PARM_DESC(limit, + "Maximum number of grants that may be mapped by one mapping request"); static int use_ptemod; @@ -71,9 +69,9 @@ static struct miscdevice gntdev_miscdev; /* ------------------------------------------------------------------ */ -bool gntdev_account_mapped_pages(int count) +bool gntdev_test_page_count(unsigned int count) { - return atomic_add_return(count, &pages_mapped) > limit; + return !count || count > limit; } static void gntdev_print_maps(struct gntdev_priv *priv, @@ -241,8 +239,6 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map) if (!refcount_dec_and_test(&map->users)) return; - atomic_sub(map->count, &pages_mapped); - if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) { notify_remote_via_evtchn(map->notify.event); evtchn_put(map->notify.event); @@ -568,7 +564,7 @@ static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv, if (copy_from_user(&op, u, sizeof(op)) != 0) return -EFAULT; pr_debug("priv %p, add %d\n", priv, op.count); - if (unlikely(op.count <= 0)) + if (unlikely(gntdev_test_page_count(op.count))) return -EINVAL; err = -ENOMEM; @@ -576,12 +572,6 @@ static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv, if (!map) return err; - if (unlikely(gntdev_account_mapped_pages(op.count))) { - pr_debug("can't map: over limit\n"); - gntdev_put_map(NULL, map); - return err; - } - if (copy_from_user(map->grants, &u->refs, sizeof(map->grants[0]) * op.count) != 0) { gntdev_put_map(NULL, map); -- cgit v1.2.3 From b3f7931f5c61ba39e81a5c958bf5d65ebb1838af Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 7 Nov 2019 12:15:46 +0100 Subject: xen/gntdev: switch from kcalloc() to kvcalloc() With sufficient many pages to map gntdev can reach order 9 allocation sizes. As there is no need to have physically contiguous buffers switch to kvcalloc() in order to avoid failing allocations. Signed-off-by: Juergen Gross Reviewed-by: Oleksandr Andrushchenko Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross --- drivers/xen/gntdev.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) (limited to 'drivers/xen/gntdev.c') diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index ad621ec1912c..4fc83e3f5ad3 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -112,14 +112,14 @@ static void gntdev_free_map(struct gntdev_grant_map *map) gnttab_free_pages(map->count, map->pages); #ifdef CONFIG_XEN_GRANT_DMA_ALLOC - kfree(map->frames); + kvfree(map->frames); #endif - kfree(map->pages); - kfree(map->grants); - kfree(map->map_ops); - kfree(map->unmap_ops); - kfree(map->kmap_ops); - kfree(map->kunmap_ops); + kvfree(map->pages); + kvfree(map->grants); + kvfree(map->map_ops); + kvfree(map->unmap_ops); + kvfree(map->kmap_ops); + kvfree(map->kunmap_ops); kfree(map); } @@ -133,12 +133,13 @@ struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count, if (NULL == add) return NULL; - add->grants = kcalloc(count, sizeof(add->grants[0]), GFP_KERNEL); - add->map_ops = kcalloc(count, sizeof(add->map_ops[0]), GFP_KERNEL); - add->unmap_ops = kcalloc(count, sizeof(add->unmap_ops[0]), GFP_KERNEL); - add->kmap_ops = kcalloc(count, sizeof(add->kmap_ops[0]), GFP_KERNEL); - add->kunmap_ops = kcalloc(count, sizeof(add->kunmap_ops[0]), GFP_KERNEL); - add->pages = kcalloc(count, sizeof(add->pages[0]), GFP_KERNEL); + add->grants = kvcalloc(count, sizeof(add->grants[0]), GFP_KERNEL); + add->map_ops = kvcalloc(count, sizeof(add->map_ops[0]), GFP_KERNEL); + add->unmap_ops = kvcalloc(count, sizeof(add->unmap_ops[0]), GFP_KERNEL); + add->kmap_ops = kvcalloc(count, sizeof(add->kmap_ops[0]), GFP_KERNEL); + add->kunmap_ops = kvcalloc(count, + sizeof(add->kunmap_ops[0]), GFP_KERNEL); + add->pages = kvcalloc(count, sizeof(add->pages[0]), GFP_KERNEL); if (NULL == add->grants || NULL == add->map_ops || NULL == add->unmap_ops || @@ -157,8 +158,8 @@ struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count, if (dma_flags & (GNTDEV_DMA_FLAG_WC | GNTDEV_DMA_FLAG_COHERENT)) { struct gnttab_dma_alloc_args args; - add->frames = kcalloc(count, sizeof(add->frames[0]), - GFP_KERNEL); + add->frames = kvcalloc(count, sizeof(add->frames[0]), + GFP_KERNEL); if (!add->frames) goto err; -- cgit v1.2.3