summaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw/mlx5/mr.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/mlx5/mr.c')
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c230
1 files changed, 97 insertions, 133 deletions
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 1eff031ef048..ea8bfc3e2d8d 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -50,7 +50,6 @@ enum {
static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
static int mr_cache_max_order(struct mlx5_ib_dev *dev);
-static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
{
@@ -59,13 +58,9 @@ static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
- int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
+ WARN_ON(xa_load(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)));
- if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
- /* Wait until all page fault handlers using the mr complete. */
- synchronize_srcu(&dev->mr_srcu);
-
- return err;
+ return mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
}
static int order2idx(struct mlx5_ib_dev *dev, int order)
@@ -84,32 +79,6 @@ static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length)
length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1));
}
-static void update_odp_mr(struct mlx5_ib_mr *mr)
-{
- if (is_odp_mr(mr)) {
- /*
- * This barrier prevents the compiler from moving the
- * setting of umem->odp_data->private to point to our
- * MR, before reg_umr finished, to ensure that the MR
- * initialization have finished before starting to
- * handle invalidations.
- */
- smp_wmb();
- to_ib_umem_odp(mr->umem)->private = mr;
- /*
- * Make sure we will see the new
- * umem->odp_data->private value in the invalidation
- * routines, before we can get page faults on the
- * MR. Page faults can happen once we put the MR in
- * the tree, below this line. Without the barrier,
- * there can be a fault handling and an invalidation
- * before umem->odp_data->private == mr is visible to
- * the invalidation handler.
- */
- smp_wmb();
- }
-}
-
static void reg_mr_callback(int status, struct mlx5_async_work *context)
{
struct mlx5_ib_mr *mr =
@@ -120,8 +89,6 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context)
struct mlx5_cache_ent *ent = &cache->ent[c];
u8 key;
unsigned long flags;
- struct xarray *mkeys = &dev->mdev->priv.mkey_table;
- int err;
spin_lock_irqsave(&ent->lock, flags);
ent->pending--;
@@ -148,13 +115,6 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context)
ent->size++;
spin_unlock_irqrestore(&ent->lock, flags);
- xa_lock_irqsave(mkeys, flags);
- err = xa_err(__xa_store(mkeys, mlx5_base_mkey(mr->mmkey.key),
- &mr->mmkey, GFP_ATOMIC));
- xa_unlock_irqrestore(mkeys, flags);
- if (err)
- pr_err("Error inserting to mkey tree. 0x%x\n", -err);
-
if (!completion_done(&ent->compl))
complete(&ent->compl);
}
@@ -244,9 +204,6 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
}
- if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
- synchronize_srcu(&dev->mr_srcu);
-
list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
list_del(&mr->list);
kfree(mr);
@@ -454,7 +411,7 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry)
if (entry < 0 || entry >= MAX_MR_CACHE_ENTRIES) {
mlx5_ib_err(dev, "cache entry %d is out of range\n", entry);
- return NULL;
+ return ERR_PTR(-EINVAL);
}
ent = &cache->ent[entry];
@@ -537,7 +494,7 @@ void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
c = order2idx(dev, mr->order);
WARN_ON(c < 0 || c >= MAX_MR_CACHE_ENTRIES);
- if (unreg_umr(dev, mr)) {
+ if (mlx5_mr_cache_invalidate(mr)) {
mr->allocated_from_cache = false;
destroy_mkey(dev, mr);
ent = &cache->ent[c];
@@ -581,10 +538,6 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
}
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- synchronize_srcu(&dev->mr_srcu);
-#endif
-
list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
list_del(&mr->list);
kfree(mr);
@@ -705,6 +658,20 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
return 0;
}
+static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr,
+ struct ib_pd *pd)
+{
+ MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
+ MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
+ MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
+ MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
+ MLX5_SET(mkc, mkc, lr, 1);
+
+ MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+ MLX5_SET64(mkc, mkc, start_addr, start_addr);
+}
+
struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
@@ -728,16 +695,8 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
- MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
- MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
- MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
- MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
- MLX5_SET(mkc, mkc, lr, 1);
-
MLX5_SET(mkc, mkc, length64, 1);
- MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
- MLX5_SET(mkc, mkc, qpn, 0xffffff);
- MLX5_SET64(mkc, mkc, start_addr, 0);
+ set_mkc_access_pd_addr_fields(mkc, acc, 0, pd);
err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen);
if (err)
@@ -790,7 +749,8 @@ static int mr_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata,
if (access_flags & IB_ACCESS_ON_DEMAND) {
struct ib_umem_odp *odp;
- odp = ib_umem_odp_get(udata, start, length, access_flags);
+ odp = ib_umem_odp_get(udata, start, length, access_flags,
+ &mlx5_mn_ops);
if (IS_ERR(odp)) {
mlx5_ib_dbg(dev, "umem get failed (%ld)\n",
PTR_ERR(odp));
@@ -805,7 +765,7 @@ static int mr_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata,
if (order)
*order = ilog2(roundup_pow_of_two(*ncont));
} else {
- u = ib_umem_get(udata, start, length, access_flags, 0);
+ u = ib_umem_get(udata, start, length, access_flags);
if (IS_ERR(u)) {
mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(u));
return PTR_ERR(u);
@@ -1195,16 +1155,8 @@ static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr,
MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3);
MLX5_SET(mkc, mkc, access_mode_4_2, (mode >> 2) & 0x7);
- MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
- MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
- MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
- MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
- MLX5_SET(mkc, mkc, lr, 1);
-
MLX5_SET64(mkc, mkc, len, length);
- MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
- MLX5_SET(mkc, mkc, qpn, 0xffffff);
- MLX5_SET64(mkc, mkc, start_addr, start_addr);
+ set_mkc_access_pd_addr_fields(mkc, acc, start_addr, pd);
err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen);
if (err)
@@ -1346,8 +1298,6 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mr->umem = umem;
set_mr_fields(dev, mr, npages, length, access_flags);
- update_odp_mr(mr);
-
if (use_umr) {
int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE;
@@ -1363,9 +1313,16 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
}
- if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
- mr->live = 1;
- atomic_set(&mr->num_pending_prefetch, 0);
+ if (is_odp_mr(mr)) {
+ to_ib_umem_odp(mr->umem)->private = mr;
+ atomic_set(&mr->num_deferred_work, 0);
+ err = xa_err(xa_store(&dev->odp_mkeys,
+ mlx5_base_mkey(mr->mmkey.key), &mr->mmkey,
+ GFP_KERNEL));
+ if (err) {
+ dereg_mr(dev, mr);
+ return ERR_PTR(err);
+ }
}
return &mr->ibmr;
@@ -1374,22 +1331,29 @@ error:
return ERR_PTR(err);
}
-static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
+/**
+ * mlx5_mr_cache_invalidate - Fence all DMA on the MR
+ * @mr: The MR to fence
+ *
+ * Upon return the NIC will not be doing any DMA to the pages under the MR,
+ * and any DMA inprogress will be completed. Failure of this function
+ * indicates the HW has failed catastrophically.
+ */
+int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr)
{
- struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_umr_wr umrwr = {};
- if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+ if (mr->dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
return 0;
umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR |
MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
umrwr.wr.opcode = MLX5_IB_WR_UMR;
- umrwr.pd = dev->umrc.pd;
+ umrwr.pd = mr->dev->umrc.pd;
umrwr.mkey = mr->mmkey.key;
umrwr.ignore_free_state = 1;
- return mlx5_ib_post_send_wait(dev, &umrwr);
+ return mlx5_ib_post_send_wait(mr->dev, &umrwr);
}
static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr,
@@ -1441,6 +1405,9 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
if (!mr->umem)
return -EINVAL;
+ if (is_odp_mr(mr))
+ return -EOPNOTSUPP;
+
if (flags & IB_MR_REREG_TRANS) {
addr = virt_addr;
len = length;
@@ -1470,7 +1437,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
* UMR can't be used - MKey needs to be replaced.
*/
if (mr->allocated_from_cache)
- err = unreg_umr(dev, mr);
+ err = mlx5_mr_cache_invalidate(mr);
else
err = destroy_mkey(dev, mr);
if (err)
@@ -1486,8 +1453,6 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
}
mr->allocated_from_cache = 0;
- if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
- mr->live = 1;
} else {
/*
* Send a UMR WQE
@@ -1516,7 +1481,6 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
set_mr_fields(dev, mr, npages, len, access_flags);
- update_odp_mr(mr);
return 0;
err:
@@ -1586,6 +1550,7 @@ static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
mr->sig->psv_wire.psv_idx))
mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
mr->sig->psv_wire.psv_idx);
+ xa_erase(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key));
kfree(mr->sig);
mr->sig = NULL;
}
@@ -1601,53 +1566,20 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
int npages = mr->npages;
struct ib_umem *umem = mr->umem;
- if (is_odp_mr(mr)) {
- struct ib_umem_odp *umem_odp = to_ib_umem_odp(umem);
-
- /* Prevent new page faults and
- * prefetch requests from succeeding
- */
- mr->live = 0;
-
- /* dequeue pending prefetch requests for the mr */
- if (atomic_read(&mr->num_pending_prefetch))
- flush_workqueue(system_unbound_wq);
- WARN_ON(atomic_read(&mr->num_pending_prefetch));
-
- /* Wait for all running page-fault handlers to finish. */
- synchronize_srcu(&dev->mr_srcu);
- /* Destroy all page mappings */
- if (!umem_odp->is_implicit_odp)
- mlx5_ib_invalidate_range(umem_odp,
- ib_umem_start(umem_odp),
- ib_umem_end(umem_odp));
- else
- mlx5_ib_free_implicit_mr(mr);
- /*
- * We kill the umem before the MR for ODP,
- * so that there will not be any invalidations in
- * flight, looking at the *mr struct.
- */
- ib_umem_odp_release(umem_odp);
- atomic_sub(npages, &dev->mdev->priv.reg_pages);
-
- /* Avoid double-freeing the umem. */
- umem = NULL;
- }
+ /* Stop all DMA */
+ if (is_odp_mr(mr))
+ mlx5_ib_fence_odp_mr(mr);
+ else
+ clean_mr(dev, mr);
- clean_mr(dev, mr);
+ if (mr->allocated_from_cache)
+ mlx5_mr_cache_free(dev, mr);
+ else
+ kfree(mr);
- /*
- * We should unregister the DMA address from the HCA before
- * remove the DMA mapping.
- */
- mlx5_mr_cache_free(dev, mr);
ib_umem_release(umem);
- if (umem)
- atomic_sub(npages, &dev->mdev->priv.reg_pages);
+ atomic_sub(npages, &dev->mdev->priv.reg_pages);
- if (!mr->allocated_from_cache)
- kfree(mr);
}
int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
@@ -1659,6 +1591,11 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
dereg_mr(to_mdev(mmr->klm_mr->ibmr.device), mmr->klm_mr);
}
+ if (is_odp_mr(mmr) && to_ib_umem_odp(mmr->umem)->is_implicit_odp) {
+ mlx5_ib_free_implicit_mr(mmr);
+ return 0;
+ }
+
dereg_mr(to_mdev(ibmr->device), mmr);
return 0;
@@ -1822,8 +1759,15 @@ static int mlx5_alloc_integrity_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
if (err)
goto err_free_mtt_mr;
+ err = xa_err(xa_store(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key),
+ mr->sig, GFP_KERNEL));
+ if (err)
+ goto err_free_descs;
return 0;
+err_free_descs:
+ destroy_mkey(dev, mr);
+ mlx5_free_priv_descs(mr);
err_free_mtt_mr:
dereg_mr(to_mdev(mr->mtt_mr->ibmr.device), mr->mtt_mr);
mr->mtt_mr = NULL;
@@ -1976,9 +1920,19 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
}
}
+ if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
+ err = xa_err(xa_store(&dev->odp_mkeys,
+ mlx5_base_mkey(mw->mmkey.key), &mw->mmkey,
+ GFP_KERNEL));
+ if (err)
+ goto free_mkey;
+ }
+
kfree(in);
return &mw->ibmw;
+free_mkey:
+ mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey);
free:
kfree(mw);
kfree(in);
@@ -1987,14 +1941,24 @@ free:
int mlx5_ib_dealloc_mw(struct ib_mw *mw)
{
+ struct mlx5_ib_dev *dev = to_mdev(mw->device);
struct mlx5_ib_mw *mmw = to_mmw(mw);
int err;
- err = mlx5_core_destroy_mkey((to_mdev(mw->device))->mdev,
- &mmw->mmkey);
- if (!err)
- kfree(mmw);
- return err;
+ if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
+ xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key));
+ /*
+ * pagefault_single_data_segment() may be accessing mmw under
+ * SRCU if the user bound an ODP MR to this MW.
+ */
+ synchronize_srcu(&dev->odp_srcu);
+ }
+
+ err = mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey);
+ if (err)
+ return err;
+ kfree(mmw);
+ return 0;
}
int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,