From f20bef6a951b6ef619655ed846113f706d0824d7 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 19 Aug 2019 14:17:03 +0300 Subject: RDMA/odp: Make the three ways to create a umem_odp clear The three paths to build the umem_odps are kind of muddled, they are: - As a normal ib_mr umem - As a child in an implicit ODP umem tree - As the root of an implicit ODP umem tree Only the first two are actually umem's, the last is an abuse. The implicit case can only be triggered by explicit driver request, it should never be co-mingled with the normal case. While we are here, make sensible function names and add some comments to make this clearer. Link: https://lore.kernel.org/r/20190819111710.18440-6-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/umem_odp.c | 80 +++++++++++++++++++++++++++++++++++--- drivers/infiniband/hw/mlx5/odp.c | 23 ++++++----- include/rdma/ib_umem_odp.h | 6 ++- 3 files changed, 89 insertions(+), 20 deletions(-) diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 1643ff374b58..198c0ce04998 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -46,6 +46,8 @@ #include #include +#include "uverbs.h" + static void ib_umem_notifier_start_account(struct ib_umem_odp *umem_odp) { mutex_lock(&umem_odp->umem_mutex); @@ -344,8 +346,67 @@ out_page_list: return ret; } -struct ib_umem_odp *ib_alloc_odp_umem(struct ib_umem_odp *root, - unsigned long addr, size_t size) +/** + * ib_umem_odp_alloc_implicit - Allocate a parent implicit ODP umem + * + * Implicit ODP umems do not have a VA range and do not have any page lists. + * They exist only to hold the per_mm reference to help the driver create + * children umems. + * + * @udata: udata from the syscall being used to create the umem + * @access: ib_reg_mr access flags + */ +struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_udata *udata, + int access) +{ + struct ib_ucontext *context = + container_of(udata, struct uverbs_attr_bundle, driver_udata) + ->context; + struct ib_umem *umem; + struct ib_umem_odp *umem_odp; + int ret; + + if (access & IB_ACCESS_HUGETLB) + return ERR_PTR(-EINVAL); + + if (!context) + return ERR_PTR(-EIO); + if (WARN_ON_ONCE(!context->invalidate_range)) + return ERR_PTR(-EINVAL); + + umem_odp = kzalloc(sizeof(*umem_odp), GFP_KERNEL); + if (!umem_odp) + return ERR_PTR(-ENOMEM); + umem = &umem_odp->umem; + umem->context = context; + umem->writable = ib_access_writable(access); + umem->owning_mm = current->mm; + umem_odp->is_implicit_odp = 1; + umem_odp->page_shift = PAGE_SHIFT; + + ret = ib_init_umem_odp(umem_odp, NULL); + if (ret) { + kfree(umem_odp); + return ERR_PTR(ret); + } + + mmgrab(umem->owning_mm); + + return umem_odp; +} +EXPORT_SYMBOL(ib_umem_odp_alloc_implicit); + +/** + * ib_umem_odp_alloc_child - Allocate a child ODP umem under an implicit + * parent ODP umem + * + * @root: The parent umem enclosing the child. This must be allocated using + * ib_alloc_implicit_odp_umem() + * @addr: The starting userspace VA + * @size: The length of the userspace VA + */ +struct ib_umem_odp *ib_umem_odp_alloc_child(struct ib_umem_odp *root, + unsigned long addr, size_t size) { /* * Caller must ensure that root cannot be freed during the call to @@ -355,6 +416,9 @@ struct ib_umem_odp *ib_alloc_odp_umem(struct ib_umem_odp *root, struct ib_umem *umem; int ret; + if (WARN_ON(!root->is_implicit_odp)) + return ERR_PTR(-EINVAL); + odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL); if (!odp_data) return ERR_PTR(-ENOMEM); @@ -376,8 +440,15 @@ struct ib_umem_odp *ib_alloc_odp_umem(struct ib_umem_odp *root, return odp_data; } -EXPORT_SYMBOL(ib_alloc_odp_umem); +EXPORT_SYMBOL(ib_umem_odp_alloc_child); +/** + * ib_umem_odp_get - Complete ib_umem_get() + * + * @umem_odp: The partially configured umem from ib_umem_get() + * @addr: The starting userspace VA + * @access: ib_reg_mr access flags + */ int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access) { /* @@ -386,9 +457,6 @@ int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access) */ struct mm_struct *mm = umem_odp->umem.owning_mm; - if (umem_odp->umem.address == 0 && umem_odp->umem.length == 0) - umem_odp->is_implicit_odp = 1; - umem_odp->page_shift = PAGE_SHIFT; if (access & IB_ACCESS_HUGETLB) { struct vm_area_struct *vma; diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 80c07d85b966..ad209ae44f05 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -384,7 +384,7 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev, } static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd, - struct ib_umem *umem, + struct ib_umem_odp *umem_odp, bool ksm, int access_flags) { struct mlx5_ib_dev *dev = to_mdev(pd->device); @@ -402,7 +402,7 @@ static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd, mr->dev = dev; mr->access_flags = access_flags; mr->mmkey.iova = 0; - mr->umem = umem; + mr->umem = &umem_odp->umem; if (ksm) { err = mlx5_ib_update_xlt(mr, 0, @@ -462,14 +462,13 @@ next_mr: if (nentries) nentries++; } else { - odp = ib_alloc_odp_umem(odp_mr, addr, - MLX5_IMR_MTT_SIZE); + odp = ib_umem_odp_alloc_child(odp_mr, addr, MLX5_IMR_MTT_SIZE); if (IS_ERR(odp)) { mutex_unlock(&odp_mr->umem_mutex); return ERR_CAST(odp); } - mtt = implicit_mr_alloc(mr->ibmr.pd, &odp->umem, 0, + mtt = implicit_mr_alloc(mr->ibmr.pd, odp, 0, mr->access_flags); if (IS_ERR(mtt)) { mutex_unlock(&odp_mr->umem_mutex); @@ -519,19 +518,19 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, int access_flags) { struct mlx5_ib_mr *imr; - struct ib_umem *umem; + struct ib_umem_odp *umem_odp; - umem = ib_umem_get(udata, 0, 0, access_flags, 0); - if (IS_ERR(umem)) - return ERR_CAST(umem); + umem_odp = ib_umem_odp_alloc_implicit(udata, access_flags); + if (IS_ERR(umem_odp)) + return ERR_CAST(umem_odp); - imr = implicit_mr_alloc(&pd->ibpd, umem, 1, access_flags); + imr = implicit_mr_alloc(&pd->ibpd, umem_odp, 1, access_flags); if (IS_ERR(imr)) { - ib_umem_release(umem); + ib_umem_release(&umem_odp->umem); return ERR_CAST(imr); } - imr->umem = umem; + imr->umem = &umem_odp->umem; init_waitqueue_head(&imr->q_leaf_free); atomic_set(&imr->num_leaf_free, 0); atomic_set(&imr->num_pending_prefetch, 0); diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h index 14b38b4459c5..219fe7015e7d 100644 --- a/include/rdma/ib_umem_odp.h +++ b/include/rdma/ib_umem_odp.h @@ -140,8 +140,10 @@ struct ib_ucontext_per_mm { }; int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access); -struct ib_umem_odp *ib_alloc_odp_umem(struct ib_umem_odp *root_umem, - unsigned long addr, size_t size); +struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_udata *udata, + int access); +struct ib_umem_odp *ib_umem_odp_alloc_child(struct ib_umem_odp *root_umem, + unsigned long addr, size_t size); void ib_umem_odp_release(struct ib_umem_odp *umem_odp); int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset, -- cgit v1.2.3