diff options
Diffstat (limited to 'drivers/infiniband/hw/mlx5/mr.c')
-rw-r--r-- | drivers/infiniband/hw/mlx5/mr.c | 421 |
1 files changed, 18 insertions, 403 deletions
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 32ef67e9a6a7..1e7653c997b5 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -44,13 +44,7 @@ #include <rdma/ib_verbs.h> #include "dm.h" #include "mlx5_ib.h" - -/* - * We can't use an array for xlt_emergency_page because dma_map_single doesn't - * work on kernel modules memory - */ -void *xlt_emergency_page; -static DEFINE_MUTEX(xlt_emergency_page_mutex); +#include "umr.h" enum { MAX_PENDING_REG_MR = 8, @@ -128,11 +122,6 @@ mlx5_ib_create_mkey_cb(struct mlx5_ib_dev *dev, static int mr_cache_max_order(struct mlx5_ib_dev *dev); static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent); -static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev) -{ - return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled); -} - static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { WARN_ON(xa_load(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key))); @@ -600,7 +589,7 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr; /* Matches access in alloc_cache_mr() */ - if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) + if (!mlx5r_umr_can_reconfig(dev, 0, access_flags)) return ERR_PTR(-EOPNOTSUPP); spin_lock_irq(&ent->lock); @@ -741,7 +730,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT; if ((dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) && !dev->is_rep && mlx5_core_is_pf(dev->mdev) && - mlx5_ib_can_load_pas_with_umr(dev, 0)) + mlx5r_umr_can_load_pas(dev, 0)) ent->limit = dev->mdev->profile.mr_cache[i].limit; else ent->limit = 0; @@ -848,49 +837,6 @@ static int mr_cache_max_order(struct mlx5_ib_dev *dev) return MLX5_MAX_UMR_SHIFT; } -static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc) -{ - struct mlx5_ib_umr_context *context = - container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe); - - context->status = wc->status; - complete(&context->done); -} - -static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context) -{ - context->cqe.done = mlx5_ib_umr_done; - context->status = -1; - init_completion(&context->done); -} - -static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev, - struct mlx5_umr_wr *umrwr) -{ - struct umr_common *umrc = &dev->umrc; - const struct ib_send_wr *bad; - int err; - struct mlx5_ib_umr_context umr_context; - - mlx5_ib_init_umr_context(&umr_context); - umrwr->wr.wr_cqe = &umr_context.cqe; - - down(&umrc->sem); - err = ib_post_send(umrc->qp, &umrwr->wr, &bad); - if (err) { - mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err); - } else { - wait_for_completion(&umr_context.done); - if (umr_context.status != IB_WC_SUCCESS) { - mlx5_ib_warn(dev, "reg umr failed (%u)\n", - umr_context.status); - err = -EFAULT; - } - } - up(&umrc->sem); - return err; -} - static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev, unsigned int order) { @@ -949,7 +895,7 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, * cache then synchronously create an uncached one. */ if (!ent || ent->limit == 0 || - !mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) { + !mlx5r_umr_can_reconfig(dev, 0, access_flags)) { mutex_lock(&dev->slow_path_mutex); mr = reg_create(pd, umem, iova, access_flags, page_size, false); mutex_unlock(&dev->slow_path_mutex); @@ -968,289 +914,6 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, return mr; } -#define MLX5_MAX_UMR_CHUNK ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - \ - MLX5_UMR_MTT_ALIGNMENT) -#define MLX5_SPARE_UMR_CHUNK 0x10000 - -/* - * Allocate a temporary buffer to hold the per-page information to transfer to - * HW. For efficiency this should be as large as it can be, but buffer - * allocation failure is not allowed, so try smaller sizes. - */ -static void *mlx5_ib_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask) -{ - const size_t xlt_chunk_align = - MLX5_UMR_MTT_ALIGNMENT / ent_size; - size_t size; - void *res = NULL; - - static_assert(PAGE_SIZE % MLX5_UMR_MTT_ALIGNMENT == 0); - - /* - * MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the - * allocation can't trigger any kind of reclaim. - */ - might_sleep(); - - gfp_mask |= __GFP_ZERO | __GFP_NORETRY; - - /* - * If the system already has a suitable high order page then just use - * that, but don't try hard to create one. This max is about 1M, so a - * free x86 huge page will satisfy it. - */ - size = min_t(size_t, ent_size * ALIGN(*nents, xlt_chunk_align), - MLX5_MAX_UMR_CHUNK); - *nents = size / ent_size; - res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, - get_order(size)); - if (res) - return res; - - if (size > MLX5_SPARE_UMR_CHUNK) { - size = MLX5_SPARE_UMR_CHUNK; - *nents = size / ent_size; - res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, - get_order(size)); - if (res) - return res; - } - - *nents = PAGE_SIZE / ent_size; - res = (void *)__get_free_page(gfp_mask); - if (res) - return res; - - mutex_lock(&xlt_emergency_page_mutex); - memset(xlt_emergency_page, 0, PAGE_SIZE); - return xlt_emergency_page; -} - -static void mlx5_ib_free_xlt(void *xlt, size_t length) -{ - if (xlt == xlt_emergency_page) { - mutex_unlock(&xlt_emergency_page_mutex); - return; - } - - free_pages((unsigned long)xlt, get_order(length)); -} - -/* - * Create a MLX5_IB_SEND_UMR_UPDATE_XLT work request and XLT buffer ready for - * submission. - */ -static void *mlx5_ib_create_xlt_wr(struct mlx5_ib_mr *mr, - struct mlx5_umr_wr *wr, struct ib_sge *sg, - size_t nents, size_t ent_size, - unsigned int flags) -{ - struct mlx5_ib_dev *dev = mr_to_mdev(mr); - struct device *ddev = &dev->mdev->pdev->dev; - dma_addr_t dma; - void *xlt; - - xlt = mlx5_ib_alloc_xlt(&nents, ent_size, - flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : - GFP_KERNEL); - sg->length = nents * ent_size; - dma = dma_map_single(ddev, xlt, sg->length, DMA_TO_DEVICE); - if (dma_mapping_error(ddev, dma)) { - mlx5_ib_err(dev, "unable to map DMA during XLT update.\n"); - mlx5_ib_free_xlt(xlt, sg->length); - return NULL; - } - sg->addr = dma; - sg->lkey = dev->umrc.pd->local_dma_lkey; - - memset(wr, 0, sizeof(*wr)); - wr->wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT; - if (!(flags & MLX5_IB_UPD_XLT_ENABLE)) - wr->wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE; - wr->wr.sg_list = sg; - wr->wr.num_sge = 1; - wr->wr.opcode = MLX5_IB_WR_UMR; - wr->pd = mr->ibmr.pd; - wr->mkey = mr->mmkey.key; - wr->length = mr->ibmr.length; - wr->virt_addr = mr->ibmr.iova; - wr->access_flags = mr->access_flags; - wr->page_shift = mr->page_shift; - wr->xlt_size = sg->length; - return xlt; -} - -static void mlx5_ib_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt, - struct ib_sge *sg) -{ - struct device *ddev = &dev->mdev->pdev->dev; - - dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE); - mlx5_ib_free_xlt(xlt, sg->length); -} - -static unsigned int xlt_wr_final_send_flags(unsigned int flags) -{ - unsigned int res = 0; - - if (flags & MLX5_IB_UPD_XLT_ENABLE) - res |= MLX5_IB_SEND_UMR_ENABLE_MR | - MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS | - MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; - if (flags & MLX5_IB_UPD_XLT_PD || flags & MLX5_IB_UPD_XLT_ACCESS) - res |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; - if (flags & MLX5_IB_UPD_XLT_ADDR) - res |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; - return res; -} - -int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, - int page_shift, int flags) -{ - struct mlx5_ib_dev *dev = mr_to_mdev(mr); - struct device *ddev = &dev->mdev->pdev->dev; - void *xlt; - struct mlx5_umr_wr wr; - struct ib_sge sg; - int err = 0; - int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT) - ? sizeof(struct mlx5_klm) - : sizeof(struct mlx5_mtt); - const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size; - const int page_mask = page_align - 1; - size_t pages_mapped = 0; - size_t pages_to_map = 0; - size_t pages_iter; - size_t size_to_map = 0; - size_t orig_sg_length; - - if ((flags & MLX5_IB_UPD_XLT_INDIRECT) && - !umr_can_use_indirect_mkey(dev)) - return -EPERM; - - if (WARN_ON(!mr->umem->is_odp)) - return -EINVAL; - - /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes, - * so we need to align the offset and length accordingly - */ - if (idx & page_mask) { - npages += idx & page_mask; - idx &= ~page_mask; - } - pages_to_map = ALIGN(npages, page_align); - - xlt = mlx5_ib_create_xlt_wr(mr, &wr, &sg, npages, desc_size, flags); - if (!xlt) - return -ENOMEM; - pages_iter = sg.length / desc_size; - orig_sg_length = sg.length; - - if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) { - struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); - size_t max_pages = ib_umem_odp_num_pages(odp) - idx; - - pages_to_map = min_t(size_t, pages_to_map, max_pages); - } - - wr.page_shift = page_shift; - - for (pages_mapped = 0; - pages_mapped < pages_to_map && !err; - pages_mapped += pages_iter, idx += pages_iter) { - npages = min_t(int, pages_iter, pages_to_map - pages_mapped); - size_to_map = npages * desc_size; - dma_sync_single_for_cpu(ddev, sg.addr, sg.length, - DMA_TO_DEVICE); - mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags); - dma_sync_single_for_device(ddev, sg.addr, sg.length, - DMA_TO_DEVICE); - - sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT); - - if (pages_mapped + pages_iter >= pages_to_map) - wr.wr.send_flags |= xlt_wr_final_send_flags(flags); - - wr.offset = idx * desc_size; - wr.xlt_size = sg.length; - - err = mlx5_ib_post_send_wait(dev, &wr); - } - sg.length = orig_sg_length; - mlx5_ib_unmap_free_xlt(dev, xlt, &sg); - return err; -} - -/* - * Send the DMA list to the HW for a normal MR using UMR. - * Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP - * flag may be used. - */ -int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags) -{ - struct mlx5_ib_dev *dev = mr_to_mdev(mr); - struct device *ddev = &dev->mdev->pdev->dev; - struct ib_block_iter biter; - struct mlx5_mtt *cur_mtt; - struct mlx5_umr_wr wr; - size_t orig_sg_length; - struct mlx5_mtt *mtt; - size_t final_size; - struct ib_sge sg; - int err = 0; - - if (WARN_ON(mr->umem->is_odp)) - return -EINVAL; - - mtt = mlx5_ib_create_xlt_wr(mr, &wr, &sg, - ib_umem_num_dma_blocks(mr->umem, - 1 << mr->page_shift), - sizeof(*mtt), flags); - if (!mtt) - return -ENOMEM; - orig_sg_length = sg.length; - - cur_mtt = mtt; - rdma_for_each_block (mr->umem->sgt_append.sgt.sgl, &biter, - mr->umem->sgt_append.sgt.nents, - BIT(mr->page_shift)) { - if (cur_mtt == (void *)mtt + sg.length) { - dma_sync_single_for_device(ddev, sg.addr, sg.length, - DMA_TO_DEVICE); - err = mlx5_ib_post_send_wait(dev, &wr); - if (err) - goto err; - dma_sync_single_for_cpu(ddev, sg.addr, sg.length, - DMA_TO_DEVICE); - wr.offset += sg.length; - cur_mtt = mtt; - } - - cur_mtt->ptag = - cpu_to_be64(rdma_block_iter_dma_address(&biter) | - MLX5_IB_MTT_PRESENT); - - if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP)) - cur_mtt->ptag = 0; - - cur_mtt++; - } - - final_size = (void *)cur_mtt - (void *)mtt; - sg.length = ALIGN(final_size, MLX5_UMR_MTT_ALIGNMENT); - memset(cur_mtt, 0, sg.length - final_size); - wr.wr.send_flags |= xlt_wr_final_send_flags(flags); - wr.xlt_size = sg.length; - - dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE); - err = mlx5_ib_post_send_wait(dev, &wr); - -err: - sg.length = orig_sg_length; - mlx5_ib_unmap_free_xlt(dev, mtt, &sg); - return err; -} - /* * If ibmr is NULL it will be allocated by reg_create. * Else, the given ibmr will be used. @@ -1441,7 +1104,7 @@ static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem, bool xlt_with_umr; int err; - xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, umem->length); + xlt_with_umr = mlx5r_umr_can_load_pas(dev, umem->length); if (xlt_with_umr) { mr = alloc_cacheable_mr(pd, umem, iova, access_flags); } else { @@ -1467,7 +1130,7 @@ static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem, * configured properly but left disabled. It is safe to go ahead * and configure it again via UMR while enabling it. */ - err = mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE); + err = mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE); if (err) { mlx5_ib_dereg_mr(&mr->ibmr, NULL); return ERR_PTR(err); @@ -1504,7 +1167,7 @@ static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length, } /* ODP requires xlt update via umr to work. */ - if (!mlx5_ib_can_load_pas_with_umr(dev, length)) + if (!mlx5r_umr_can_load_pas(dev, length)) return ERR_PTR(-EINVAL); odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags, @@ -1566,7 +1229,7 @@ static void mlx5_ib_dmabuf_invalidate_cb(struct dma_buf_attachment *attach) if (!umem_dmabuf->sgt) return; - mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP); + mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP); ib_umem_dmabuf_unmap_pages(umem_dmabuf); } @@ -1594,7 +1257,7 @@ struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset, offset, virt_addr, length, fd, access_flags); /* dmabuf requires xlt update via umr to work. */ - if (!mlx5_ib_can_load_pas_with_umr(dev, length)) + if (!mlx5r_umr_can_load_pas(dev, length)) return ERR_PTR(-EINVAL); umem_dmabuf = ib_umem_dmabuf_get(&dev->ib_dev, offset, length, fd, @@ -1631,31 +1294,6 @@ err_dereg_mr: return ERR_PTR(err); } -/** - * revoke_mr - Fence all DMA on the MR - * @mr: The MR to fence - * - * Upon return the NIC will not be doing any DMA to the pages under the MR, - * and any DMA in progress will be completed. Failure of this function - * indicates the HW has failed catastrophically. - */ -static int revoke_mr(struct mlx5_ib_mr *mr) -{ - struct mlx5_umr_wr umrwr = {}; - - if (mr_to_mdev(mr)->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) - return 0; - - umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR | - MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; - umrwr.wr.opcode = MLX5_IB_WR_UMR; - umrwr.pd = mr_to_mdev(mr)->umrc.pd; - umrwr.mkey = mr->mmkey.key; - umrwr.ignore_free_state = 1; - - return mlx5_ib_post_send_wait(mr_to_mdev(mr), &umrwr); -} - /* * True if the change in access flags can be done via UMR, only some access * flags can be updated. @@ -1669,32 +1307,8 @@ static bool can_use_umr_rereg_access(struct mlx5_ib_dev *dev, if (diffs & ~(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ | IB_ACCESS_RELAXED_ORDERING)) return false; - return mlx5_ib_can_reconfig_with_umr(dev, current_access_flags, - target_access_flags); -} - -static int umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd, - int access_flags) -{ - struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); - struct mlx5_umr_wr umrwr = { - .wr = { - .send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE | - MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS, - .opcode = MLX5_IB_WR_UMR, - }, - .mkey = mr->mmkey.key, - .pd = pd, - .access_flags = access_flags, - }; - int err; - - err = mlx5_ib_post_send_wait(dev, &umrwr); - if (err) - return err; - - mr->access_flags = access_flags; - return 0; + return mlx5r_umr_can_reconfig(dev, current_access_flags, + target_access_flags); } static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr, @@ -1707,7 +1321,7 @@ static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr, /* We only track the allocated sizes of MRs from the cache */ if (!mr->cache_ent) return false; - if (!mlx5_ib_can_load_pas_with_umr(dev, new_umem->length)) + if (!mlx5r_umr_can_load_pas(dev, new_umem->length)) return false; *page_size = @@ -1732,7 +1346,7 @@ static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd, * with it. This ensure the change is atomic relative to any use of the * MR. */ - err = revoke_mr(mr); + err = mlx5r_umr_revoke_mr(mr); if (err) return err; @@ -1750,7 +1364,7 @@ static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd, mr->ibmr.length = new_umem->length; mr->page_shift = order_base_2(page_size); mr->umem = new_umem; - err = mlx5_ib_update_mr_pas(mr, upd_flags); + err = mlx5r_umr_update_mr_pas(mr, upd_flags); if (err) { /* * The MR is revoked at this point so there is no issue to free @@ -1797,7 +1411,8 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, /* Fast path for PD/access change */ if (can_use_umr_rereg_access(dev, mr->access_flags, new_access_flags)) { - err = umr_rereg_pd_access(mr, new_pd, new_access_flags); + err = mlx5r_umr_rereg_pd_access(mr, new_pd, + new_access_flags); if (err) return ERR_PTR(err); return NULL; @@ -1810,7 +1425,7 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, * Only one active MR can refer to a umem at one time, revoke * the old MR before assigning the umem to the new one. */ - err = revoke_mr(mr); + err = mlx5r_umr_revoke_mr(mr); if (err) return ERR_PTR(err); umem = mr->umem; @@ -1955,7 +1570,7 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) /* Stop DMA */ if (mr->cache_ent) { - if (revoke_mr(mr)) { + if (mlx5r_umr_revoke_mr(mr)) { spin_lock_irq(&mr->cache_ent->lock); mr->cache_ent->total_mrs--; spin_unlock_irq(&mr->cache_ent->lock); |