summaryrefslogtreecommitdiff
path: root/include/rdma
diff options
context:
space:
mode:
authorJason Gunthorpe <jgg@mellanox.com>2019-11-12 23:22:22 +0300
committerJason Gunthorpe <jgg@mellanox.com>2019-11-24 02:56:44 +0300
commitf25a546e65292b36f15cca0912450c4944fae031 (patch)
tree045faece87dbe0b3294db953432c84221a620b50 /include/rdma
parent107e899874e95dcddc779142942bf285eba38bc5 (diff)
downloadlinux-f25a546e65292b36f15cca0912450c4944fae031.tar.xz
RDMA/odp: Use mmu_interval_notifier_insert()
Replace the internal interval tree based mmu notifier with the new common mmu_interval_notifier_insert() API. This removes a lot of code and fixes a deadlock that can be triggered in ODP: zap_page_range() mmu_notifier_invalidate_range_start() [..] ib_umem_notifier_invalidate_range_start() down_read(&per_mm->umem_rwsem) unmap_single_vma() [..] __split_huge_page_pmd() mmu_notifier_invalidate_range_start() [..] ib_umem_notifier_invalidate_range_start() down_read(&per_mm->umem_rwsem) // DEADLOCK mmu_notifier_invalidate_range_end() up_read(&per_mm->umem_rwsem) mmu_notifier_invalidate_range_end() up_read(&per_mm->umem_rwsem) The umem_rwsem is held across the range_start/end as the ODP algorithm for invalidate_range_end cannot tolerate changes to the interval tree. However, due to the nested invalidation regions the second down_read() can deadlock if there are competing writers. The new core code provides an alternative scheme to solve this problem. Fixes: ca748c39ea3f ("RDMA/umem: Get rid of per_mm->notifier_count") Link: https://lore.kernel.org/r/20191112202231.3856-6-jgg@ziepe.ca Tested-by: Artemy Kovalyov <artemyko@mellanox.com> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Diffstat (limited to 'include/rdma')
-rw-r--r--include/rdma/ib_umem_odp.h68
-rw-r--r--include/rdma/ib_verbs.h2
2 files changed, 14 insertions, 56 deletions
diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h
index 09b0e4494986..81429acc8257 100644
--- a/include/rdma/ib_umem_odp.h
+++ b/include/rdma/ib_umem_odp.h
@@ -35,11 +35,11 @@
#include <rdma/ib_umem.h>
#include <rdma/ib_verbs.h>
-#include <linux/interval_tree.h>
struct ib_umem_odp {
struct ib_umem umem;
- struct ib_ucontext_per_mm *per_mm;
+ struct mmu_interval_notifier notifier;
+ struct pid *tgid;
/*
* An array of the pages included in the on-demand paging umem.
@@ -62,13 +62,8 @@ struct ib_umem_odp {
struct mutex umem_mutex;
void *private; /* for the HW driver to use. */
- int notifiers_seq;
- int notifiers_count;
int npages;
- /* Tree tracking */
- struct interval_tree_node interval_tree;
-
/*
* An implicit odp umem cannot be DMA mapped, has 0 length, and serves
* only as an anchor for the driver to hold onto the per_mm. FIXME:
@@ -77,7 +72,6 @@ struct ib_umem_odp {
*/
bool is_implicit_odp;
- struct completion notifier_completion;
unsigned int page_shift;
};
@@ -89,13 +83,13 @@ static inline struct ib_umem_odp *to_ib_umem_odp(struct ib_umem *umem)
/* Returns the first page of an ODP umem. */
static inline unsigned long ib_umem_start(struct ib_umem_odp *umem_odp)
{
- return umem_odp->interval_tree.start;
+ return umem_odp->notifier.interval_tree.start;
}
/* Returns the address of the page after the last one of an ODP umem. */
static inline unsigned long ib_umem_end(struct ib_umem_odp *umem_odp)
{
- return umem_odp->interval_tree.last + 1;
+ return umem_odp->notifier.interval_tree.last + 1;
}
static inline size_t ib_umem_odp_num_pages(struct ib_umem_odp *umem_odp)
@@ -119,21 +113,15 @@ static inline size_t ib_umem_odp_num_pages(struct ib_umem_odp *umem_odp)
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-struct ib_ucontext_per_mm {
- struct mmu_notifier mn;
- struct pid *tgid;
-
- struct rb_root_cached umem_tree;
- /* Protects umem_tree */
- struct rw_semaphore umem_rwsem;
-};
-
-struct ib_umem_odp *ib_umem_odp_get(struct ib_udata *udata, unsigned long addr,
- size_t size, int access);
+struct ib_umem_odp *
+ib_umem_odp_get(struct ib_udata *udata, unsigned long addr, size_t size,
+ int access, const struct mmu_interval_notifier_ops *ops);
struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_udata *udata,
int access);
-struct ib_umem_odp *ib_umem_odp_alloc_child(struct ib_umem_odp *root_umem,
- unsigned long addr, size_t size);
+struct ib_umem_odp *
+ib_umem_odp_alloc_child(struct ib_umem_odp *root_umem, unsigned long addr,
+ size_t size,
+ const struct mmu_interval_notifier_ops *ops);
void ib_umem_odp_release(struct ib_umem_odp *umem_odp);
int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset,
@@ -143,39 +131,11 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset,
void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset,
u64 bound);
-typedef int (*umem_call_back)(struct ib_umem_odp *item, u64 start, u64 end,
- void *cookie);
-/*
- * Call the callback on each ib_umem in the range. Returns the logical or of
- * the return values of the functions called.
- */
-int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root,
- u64 start, u64 end,
- umem_call_back cb,
- bool blockable, void *cookie);
-
-static inline int ib_umem_mmu_notifier_retry(struct ib_umem_odp *umem_odp,
- unsigned long mmu_seq)
-{
- /*
- * This code is strongly based on the KVM code from
- * mmu_notifier_retry. Should be called with
- * the relevant locks taken (umem_odp->umem_mutex
- * and the ucontext umem_mutex semaphore locked for read).
- */
-
- if (unlikely(umem_odp->notifiers_count))
- return 1;
- if (umem_odp->notifiers_seq != mmu_seq)
- return 1;
- return 0;
-}
-
#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
-static inline struct ib_umem_odp *ib_umem_odp_get(struct ib_udata *udata,
- unsigned long addr,
- size_t size, int access)
+static inline struct ib_umem_odp *
+ib_umem_odp_get(struct ib_udata *udata, unsigned long addr, size_t size,
+ int access, const struct mmu_interval_notifier_ops *ops)
{
return ERR_PTR(-EINVAL);
}
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 6a47ba85c54c..2c30c859ae0d 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2422,8 +2422,6 @@ struct ib_device_ops {
u64 iova);
int (*unmap_fmr)(struct list_head *fmr_list);
int (*dealloc_fmr)(struct ib_fmr *fmr);
- void (*invalidate_range)(struct ib_umem_odp *umem_odp,
- unsigned long start, unsigned long end);
int (*attach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
int (*detach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
struct ib_xrcd *(*alloc_xrcd)(struct ib_device *device,