// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */ #include #include "mlx5_ib.h" #include "umr.h" #include "wr.h" /* * We can't use an array for xlt_emergency_page because dma_map_single doesn't * work on kernel modules memory */ void *xlt_emergency_page; static DEFINE_MUTEX(xlt_emergency_page_mutex); static __be64 get_umr_enable_mr_mask(void) { u64 result; result = MLX5_MKEY_MASK_KEY | MLX5_MKEY_MASK_FREE; return cpu_to_be64(result); } static __be64 get_umr_disable_mr_mask(void) { u64 result; result = MLX5_MKEY_MASK_FREE; return cpu_to_be64(result); } static __be64 get_umr_update_translation_mask(void) { u64 result; result = MLX5_MKEY_MASK_LEN | MLX5_MKEY_MASK_PAGE_SIZE | MLX5_MKEY_MASK_START_ADDR; return cpu_to_be64(result); } static __be64 get_umr_update_access_mask(struct mlx5_ib_dev *dev) { u64 result; result = MLX5_MKEY_MASK_LR | MLX5_MKEY_MASK_LW | MLX5_MKEY_MASK_RR | MLX5_MKEY_MASK_RW; if (MLX5_CAP_GEN(dev->mdev, atomic)) result |= MLX5_MKEY_MASK_A; if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) result |= MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE; if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) result |= MLX5_MKEY_MASK_RELAXED_ORDERING_READ; return cpu_to_be64(result); } static __be64 get_umr_update_pd_mask(void) { u64 result; result = MLX5_MKEY_MASK_PD; return cpu_to_be64(result); } static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask) { if (mask & MLX5_MKEY_MASK_PAGE_SIZE && MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) return -EPERM; if (mask & MLX5_MKEY_MASK_A && MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)) return -EPERM; if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE && !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) return -EPERM; if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_READ && !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) return -EPERM; return 0; } enum { MAX_UMR_WR = 128, }; static int mlx5r_umr_qp_rst2rts(struct mlx5_ib_dev *dev, struct ib_qp *qp) { struct ib_qp_attr attr = {}; int ret; attr.qp_state = IB_QPS_INIT; attr.port_num = 1; ret = ib_modify_qp(qp, &attr, IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT); if (ret) { mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n"); return ret; } memset(&attr, 0, sizeof(attr)); attr.qp_state = IB_QPS_RTR; ret = ib_modify_qp(qp, &attr, IB_QP_STATE); if (ret) { mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n"); return ret; } memset(&attr, 0, sizeof(attr)); attr.qp_state = IB_QPS_RTS; ret = ib_modify_qp(qp, &attr, IB_QP_STATE); if (ret) { mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n"); return ret; } return 0; } int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev) { struct ib_qp_init_attr init_attr = {}; struct ib_cq *cq; struct ib_qp *qp; int ret = 0; /* * UMR qp is set once, never changed until device unload. * Avoid taking the mutex if initialization is already done. */ if (dev->umrc.qp) return 0; mutex_lock(&dev->umrc.init_lock); /* First user allocates the UMR resources. Skip if already allocated. */ if (dev->umrc.qp) goto unlock; cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ); if (IS_ERR(cq)) { mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n"); ret = PTR_ERR(cq); goto unlock; } init_attr.send_cq = cq; init_attr.recv_cq = cq; init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; init_attr.cap.max_send_wr = MAX_UMR_WR; init_attr.cap.max_send_sge = 1; init_attr.qp_type = MLX5_IB_QPT_REG_UMR; init_attr.port_num = 1; qp = ib_create_qp(dev->umrc.pd, &init_attr); if (IS_ERR(qp)) { mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n"); ret = PTR_ERR(qp); goto destroy_cq; } ret = mlx5r_umr_qp_rst2rts(dev, qp); if (ret) goto destroy_qp; dev->umrc.cq = cq; sema_init(&dev->umrc.sem, MAX_UMR_WR); mutex_init(&dev->umrc.lock); dev->umrc.state = MLX5_UMR_STATE_ACTIVE; dev->umrc.qp = qp; mutex_unlock(&dev->umrc.init_lock); return 0; destroy_qp: ib_destroy_qp(qp); destroy_cq: ib_free_cq(cq); unlock: mutex_unlock(&dev->umrc.init_lock); return ret; } void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev) { if (dev->umrc.state == MLX5_UMR_STATE_UNINIT) return; mutex_destroy(&dev->umrc.lock); /* After device init, UMR cp/qp are not unset during the lifetime. */ ib_destroy_qp(dev->umrc.qp); ib_free_cq(dev->umrc.cq); } int mlx5r_umr_init(struct mlx5_ib_dev *dev) { struct ib_pd *pd; pd = ib_alloc_pd(&dev->ib_dev, 0); if (IS_ERR(pd)) { mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n"); return PTR_ERR(pd); } dev->umrc.pd = pd; mutex_init(&dev->umrc.init_lock); return 0; } void mlx5r_umr_cleanup(struct mlx5_ib_dev *dev) { if (!dev->umrc.pd) return; mutex_destroy(&dev->umrc.init_lock); ib_dealloc_pd(dev->umrc.pd); } static int mlx5r_umr_recover(struct mlx5_ib_dev *dev) { struct umr_common *umrc = &dev->umrc; struct ib_qp_attr attr; int err; attr.qp_state = IB_QPS_RESET; err = ib_modify_qp(umrc->qp, &attr, IB_QP_STATE); if (err) { mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n"); goto err; } err = mlx5r_umr_qp_rst2rts(dev, umrc->qp); if (err) goto err; umrc->state = MLX5_UMR_STATE_ACTIVE; return 0; err: umrc->state = MLX5_UMR_STATE_ERR; return err; } static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe, struct mlx5r_umr_wqe *wqe, bool with_data) { unsigned int wqe_size = with_data ? sizeof(struct mlx5r_umr_wqe) : sizeof(struct mlx5r_umr_wqe) - sizeof(struct mlx5_wqe_data_seg); struct mlx5_ib_dev *dev = to_mdev(ibqp->device); struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_ib_qp *qp = to_mqp(ibqp); struct mlx5_wqe_ctrl_seg *ctrl; union { struct ib_cqe *ib_cqe; u64 wr_id; } id; void *cur_edge, *seg; unsigned long flags; unsigned int idx; int size, err; if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)) return -EIO; spin_lock_irqsave(&qp->sq.lock, flags); err = mlx5r_begin_wqe(qp, &seg, &ctrl, &idx, &size, &cur_edge, 0, cpu_to_be32(mkey), false, false); if (WARN_ON(err)) goto out; qp->sq.wr_data[idx] = MLX5_IB_WR_UMR; mlx5r_memcpy_send_wqe(&qp->sq, &cur_edge, &seg, &size, wqe, wqe_size); id.ib_cqe = cqe; mlx5r_finish_wqe(qp, ctrl, seg, size, cur_edge, idx, id.wr_id, 0, MLX5_FENCE_MODE_INITIATOR_SMALL, MLX5_OPCODE_UMR); mlx5r_ring_db(qp, 1, ctrl); out: spin_unlock_irqrestore(&qp->sq.lock, flags); return err; } static void mlx5r_umr_done(struct ib_cq *cq, struct ib_wc *wc) { struct mlx5_ib_umr_context *context = container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe); context->status = wc->status; complete(&context->done); } static inline void mlx5r_umr_init_context(struct mlx5r_umr_context *context) { context->cqe.done = mlx5r_umr_done; init_completion(&context->done); } static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey, struct mlx5r_umr_wqe *wqe, bool with_data) { struct umr_common *umrc = &dev->umrc; struct mlx5r_umr_context umr_context; int err; err = umr_check_mkey_mask(dev, be64_to_cpu(wqe->ctrl_seg.mkey_mask)); if (WARN_ON(err)) return err; mlx5r_umr_init_context(&umr_context); down(&umrc->sem); while (true) { mutex_lock(&umrc->lock); if (umrc->state == MLX5_UMR_STATE_ERR) { mutex_unlock(&umrc->lock); err = -EFAULT; break; } if (umrc->state == MLX5_UMR_STATE_RECOVER) { mutex_unlock(&umrc->lock); usleep_range(3000, 5000); continue; } err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context.cqe, wqe, with_data); mutex_unlock(&umrc->lock); if (err) { mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err); break; } wait_for_completion(&umr_context.done); if (umr_context.status == IB_WC_SUCCESS) break; if (umr_context.status == IB_WC_WR_FLUSH_ERR) continue; WARN_ON_ONCE(1); mlx5_ib_warn(dev, "reg umr failed (%u). Trying to recover and resubmit the flushed WQEs, mkey = %u\n", umr_context.status, mkey); mutex_lock(&umrc->lock); err = mlx5r_umr_recover(dev); mutex_unlock(&umrc->lock); if (err) mlx5_ib_warn(dev, "couldn't recover UMR, err %d\n", err); err = -EFAULT; break; } up(&umrc->sem); return err; } /** * mlx5r_umr_revoke_mr - Fence all DMA on the MR * @mr: The MR to fence * * Upon return the NIC will not be doing any DMA to the pages under the MR, * and any DMA in progress will be completed. Failure of this function * indicates the HW has failed catastrophically. */ int mlx5r_umr_revoke_mr(struct mlx5_ib_mr *mr) { struct mlx5_ib_dev *dev = mr_to_mdev(mr); struct mlx5r_umr_wqe wqe = {}; if (dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) return 0; wqe.ctrl_seg.mkey_mask |= get_umr_update_pd_mask(); wqe.ctrl_seg.mkey_mask |= get_umr_disable_mr_mask(); wqe.ctrl_seg.flags |= MLX5_UMR_INLINE; MLX5_SET(mkc, &wqe.mkey_seg, free, 1); MLX5_SET(mkc, &wqe.mkey_seg, pd, to_mpd(dev->umrc.pd)->pdn); MLX5_SET(mkc, &wqe.mkey_seg, qpn, 0xffffff); MLX5_SET(mkc, &wqe.mkey_seg, mkey_7_0, mlx5_mkey_variant(mr->mmkey.key)); return mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, false); } static void mlx5r_umr_set_access_flags(struct mlx5_ib_dev *dev, struct mlx5_mkey_seg *seg, unsigned int access_flags) { bool ro_read = (access_flags & IB_ACCESS_RELAXED_ORDERING) && (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) || pcie_relaxed_ordering_enabled(dev->mdev->pdev)); MLX5_SET(mkc, seg, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); MLX5_SET(mkc, seg, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE)); MLX5_SET(mkc, seg, rr, !!(access_flags & IB_ACCESS_REMOTE_READ)); MLX5_SET(mkc, seg, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE)); MLX5_SET(mkc, seg, lr, 1); MLX5_SET(mkc, seg, relaxed_ordering_write, !!(access_flags & IB_ACCESS_RELAXED_ORDERING)); MLX5_SET(mkc, seg, relaxed_ordering_read, ro_read); } int mlx5r_umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd, int access_flags) { struct mlx5_ib_dev *dev = mr_to_mdev(mr); struct mlx5r_umr_wqe wqe = {}; int err; wqe.ctrl_seg.mkey_mask = get_umr_update_access_mask(dev); wqe.ctrl_seg.mkey_mask |= get_umr_update_pd_mask(); wqe.ctrl_seg.flags = MLX5_UMR_CHECK_FREE; wqe.ctrl_seg.flags |= MLX5_UMR_INLINE; mlx5r_umr_set_access_flags(dev, &wqe.mkey_seg, access_flags); MLX5_SET(mkc, &wqe.mkey_seg, pd, to_mpd(pd)->pdn); MLX5_SET(mkc, &wqe.mkey_seg, qpn, 0xffffff); MLX5_SET(mkc, &wqe.mkey_seg, mkey_7_0, mlx5_mkey_variant(mr->mmkey.key)); err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, false); if (err) return err; mr->access_flags = access_flags; return 0; } #define MLX5_MAX_UMR_CHUNK \ ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - MLX5_UMR_FLEX_ALIGNMENT) #define MLX5_SPARE_UMR_CHUNK 0x10000 /* * Allocate a temporary buffer to hold the per-page information to transfer to * HW. For efficiency this should be as large as it can be, but buffer * allocation failure is not allowed, so try smaller sizes. */ static void *mlx5r_umr_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask) { const size_t xlt_chunk_align = MLX5_UMR_FLEX_ALIGNMENT / ent_size; size_t size; void *res = NULL; static_assert(PAGE_SIZE % MLX5_UMR_FLEX_ALIGNMENT == 0); /* * MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the * allocation can't trigger any kind of reclaim. */ might_sleep(); gfp_mask |= __GFP_ZERO | __GFP_NORETRY; /* * If the system already has a suitable high order page then just use * that, but don't try hard to create one. This max is about 1M, so a * free x86 huge page will satisfy it. */ size = min_t(size_t, ent_size * ALIGN(*nents, xlt_chunk_align), MLX5_MAX_UMR_CHUNK); *nents = size / ent_size; res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, get_order(size)); if (res) return res; if (size > MLX5_SPARE_UMR_CHUNK) { size = MLX5_SPARE_UMR_CHUNK; *nents = size / ent_size; res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, get_order(size)); if (res) return res; } *nents = PAGE_SIZE / ent_size; res = (void *)__get_free_page(gfp_mask); if (res) return res; mutex_lock(&xlt_emergency_page_mutex); memset(xlt_emergency_page, 0, PAGE_SIZE); return xlt_emergency_page; } static void mlx5r_umr_free_xlt(void *xlt, size_t length) { if (xlt == xlt_emergency_page) { mutex_unlock(&xlt_emergency_page_mutex); return; } free_pages((unsigned long)xlt, get_order(length)); } static void mlx5r_umr_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt, struct ib_sge *sg) { struct device *ddev = &dev->mdev->pdev->dev; dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE); mlx5r_umr_free_xlt(xlt, sg->length); } /* * Create an XLT buffer ready for submission. */ static void *mlx5r_umr_create_xlt(struct mlx5_ib_dev *dev, struct ib_sge *sg, size_t nents, size_t ent_size, unsigned int flags) { struct device *ddev = &dev->mdev->pdev->dev; dma_addr_t dma; void *xlt; xlt = mlx5r_umr_alloc_xlt(&nents, ent_size, flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : GFP_KERNEL); sg->length = nents * ent_size; dma = dma_map_single(ddev, xlt, sg->length, DMA_TO_DEVICE); if (dma_mapping_error(ddev, dma)) { mlx5_ib_err(dev, "unable to map DMA during XLT update.\n"); mlx5r_umr_free_xlt(xlt, sg->length); return NULL; } sg->addr = dma; sg->lkey = dev->umrc.pd->local_dma_lkey; return xlt; } static void mlx5r_umr_set_update_xlt_ctrl_seg(struct mlx5_wqe_umr_ctrl_seg *ctrl_seg, unsigned int flags, struct ib_sge *sg) { if (!(flags & MLX5_IB_UPD_XLT_ENABLE)) /* fail if free */ ctrl_seg->flags = MLX5_UMR_CHECK_FREE; else /* fail if not free */ ctrl_seg->flags = MLX5_UMR_CHECK_NOT_FREE; ctrl_seg->xlt_octowords = cpu_to_be16(mlx5r_umr_get_xlt_octo(sg->length)); } static void mlx5r_umr_set_update_xlt_mkey_seg(struct mlx5_ib_dev *dev, struct mlx5_mkey_seg *mkey_seg, struct mlx5_ib_mr *mr, unsigned int page_shift) { mlx5r_umr_set_access_flags(dev, mkey_seg, mr->access_flags); MLX5_SET(mkc, mkey_seg, pd, to_mpd(mr->ibmr.pd)->pdn); MLX5_SET64(mkc, mkey_seg, start_addr, mr->ibmr.iova); MLX5_SET64(mkc, mkey_seg, len, mr->ibmr.length); MLX5_SET(mkc, mkey_seg, log_page_size, page_shift); MLX5_SET(mkc, mkey_seg, qpn, 0xffffff); MLX5_SET(mkc, mkey_seg, mkey_7_0, mlx5_mkey_variant(mr->mmkey.key)); } static void mlx5r_umr_set_update_xlt_data_seg(struct mlx5_wqe_data_seg *data_seg, struct ib_sge *sg) { data_seg->byte_count = cpu_to_be32(sg->length); data_seg->lkey = cpu_to_be32(sg->lkey); data_seg->addr = cpu_to_be64(sg->addr); } static void mlx5r_umr_update_offset(struct mlx5_wqe_umr_ctrl_seg *ctrl_seg, u64 offset) { u64 octo_offset = mlx5r_umr_get_xlt_octo(offset); ctrl_seg->xlt_offset = cpu_to_be16(octo_offset & 0xffff); ctrl_seg->xlt_offset_47_16 = cpu_to_be32(octo_offset >> 16); ctrl_seg->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN; } static void mlx5r_umr_final_update_xlt(struct mlx5_ib_dev *dev, struct mlx5r_umr_wqe *wqe, struct mlx5_ib_mr *mr, struct ib_sge *sg, unsigned int flags) { bool update_pd_access, update_translation; if (flags & MLX5_IB_UPD_XLT_ENABLE) wqe->ctrl_seg.mkey_mask |= get_umr_enable_mr_mask(); update_pd_access = flags & MLX5_IB_UPD_XLT_ENABLE || flags & MLX5_IB_UPD_XLT_PD || flags & MLX5_IB_UPD_XLT_ACCESS; if (update_pd_access) { wqe->ctrl_seg.mkey_mask |= get_umr_update_access_mask(dev); wqe->ctrl_seg.mkey_mask |= get_umr_update_pd_mask(); } update_translation = flags & MLX5_IB_UPD_XLT_ENABLE || flags & MLX5_IB_UPD_XLT_ADDR; if (update_translation) { wqe->ctrl_seg.mkey_mask |= get_umr_update_translation_mask(); if (!mr->ibmr.length) MLX5_SET(mkc, &wqe->mkey_seg, length64, 1); } wqe->ctrl_seg.xlt_octowords = cpu_to_be16(mlx5r_umr_get_xlt_octo(sg->length)); wqe->data_seg.byte_count = cpu_to_be32(sg->length); } static int _mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags, bool dd) { size_t ent_size = dd ? sizeof(struct mlx5_ksm) : sizeof(struct mlx5_mtt); struct mlx5_ib_dev *dev = mr_to_mdev(mr); struct device *ddev = &dev->mdev->pdev->dev; struct mlx5r_umr_wqe wqe = {}; struct ib_block_iter biter; struct mlx5_ksm *cur_ksm; struct mlx5_mtt *cur_mtt; size_t orig_sg_length; size_t final_size; void *curr_entry; struct ib_sge sg; void *entry; u64 offset = 0; int err = 0; entry = mlx5r_umr_create_xlt(dev, &sg, ib_umem_num_dma_blocks(mr->umem, 1 << mr->page_shift), ent_size, flags); if (!entry) return -ENOMEM; orig_sg_length = sg.length; mlx5r_umr_set_update_xlt_ctrl_seg(&wqe.ctrl_seg, flags, &sg); mlx5r_umr_set_update_xlt_mkey_seg(dev, &wqe.mkey_seg, mr, mr->page_shift); if (dd) { /* Use the data direct internal kernel PD */ MLX5_SET(mkc, &wqe.mkey_seg, pd, dev->ddr.pdn); cur_ksm = entry; } else { cur_mtt = entry; } mlx5r_umr_set_update_xlt_data_seg(&wqe.data_seg, &sg); curr_entry = entry; rdma_umem_for_each_dma_block(mr->umem, &biter, BIT(mr->page_shift)) { if (curr_entry == entry + sg.length) { dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE); err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, true); if (err) goto err; dma_sync_single_for_cpu(ddev, sg.addr, sg.length, DMA_TO_DEVICE); offset += sg.length; mlx5r_umr_update_offset(&wqe.ctrl_seg, offset); if (dd) cur_ksm = entry; else cur_mtt = entry; } if (dd) { cur_ksm->va = cpu_to_be64(rdma_block_iter_dma_address(&biter)); cur_ksm->key = cpu_to_be32(dev->ddr.mkey); cur_ksm++; curr_entry = cur_ksm; } else { cur_mtt->ptag = cpu_to_be64(rdma_block_iter_dma_address(&biter) | MLX5_IB_MTT_PRESENT); if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP)) cur_mtt->ptag = 0; cur_mtt++; curr_entry = cur_mtt; } } final_size = curr_entry - entry; sg.length = ALIGN(final_size, MLX5_UMR_FLEX_ALIGNMENT); memset(curr_entry, 0, sg.length - final_size); mlx5r_umr_final_update_xlt(dev, &wqe, mr, &sg, flags); dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE); err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, true); err: sg.length = orig_sg_length; mlx5r_umr_unmap_free_xlt(dev, entry, &sg); return err; } int mlx5r_umr_update_data_direct_ksm_pas(struct mlx5_ib_mr *mr, unsigned int flags) { /* No invalidation flow is expected */ if (WARN_ON(!mr->umem->is_dmabuf) || (flags & MLX5_IB_UPD_XLT_ZAP)) return -EINVAL; return _mlx5r_umr_update_mr_pas(mr, flags, true); } /* * Send the DMA list to the HW for a normal MR using UMR. * Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP * flag may be used. */ int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags) { if (WARN_ON(mr->umem->is_odp)) return -EINVAL; return _mlx5r_umr_update_mr_pas(mr, flags, false); } static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev) { return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled); } int mlx5r_umr_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, int page_shift, int flags) { int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT) ? sizeof(struct mlx5_klm) : sizeof(struct mlx5_mtt); const int page_align = MLX5_UMR_FLEX_ALIGNMENT / desc_size; struct mlx5_ib_dev *dev = mr_to_mdev(mr); struct device *ddev = &dev->mdev->pdev->dev; const int page_mask = page_align - 1; struct mlx5r_umr_wqe wqe = {}; size_t pages_mapped = 0; size_t pages_to_map = 0; size_t size_to_map = 0; size_t orig_sg_length; size_t pages_iter; struct ib_sge sg; int err = 0; void *xlt; if ((flags & MLX5_IB_UPD_XLT_INDIRECT) && !umr_can_use_indirect_mkey(dev)) return -EPERM; if (WARN_ON(!mr->umem->is_odp)) return -EINVAL; /* UMR copies MTTs in units of MLX5_UMR_FLEX_ALIGNMENT bytes, * so we need to align the offset and length accordingly */ if (idx & page_mask) { npages += idx & page_mask; idx &= ~page_mask; } pages_to_map = ALIGN(npages, page_align); xlt = mlx5r_umr_create_xlt(dev, &sg, npages, desc_size, flags); if (!xlt) return -ENOMEM; pages_iter = sg.length / desc_size; orig_sg_length = sg.length; if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) { struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); size_t max_pages = ib_umem_odp_num_pages(odp) - idx; pages_to_map = min_t(size_t, pages_to_map, max_pages); } mlx5r_umr_set_update_xlt_ctrl_seg(&wqe.ctrl_seg, flags, &sg); mlx5r_umr_set_update_xlt_mkey_seg(dev, &wqe.mkey_seg, mr, page_shift); mlx5r_umr_set_update_xlt_data_seg(&wqe.data_seg, &sg); for (pages_mapped = 0; pages_mapped < pages_to_map && !err; pages_mapped += pages_iter, idx += pages_iter) { npages = min_t(int, pages_iter, pages_to_map - pages_mapped); size_to_map = npages * desc_size; dma_sync_single_for_cpu(ddev, sg.addr, sg.length, DMA_TO_DEVICE); mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags); dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE); sg.length = ALIGN(size_to_map, MLX5_UMR_FLEX_ALIGNMENT); if (pages_mapped + pages_iter >= pages_to_map) mlx5r_umr_final_update_xlt(dev, &wqe, mr, &sg, flags); mlx5r_umr_update_offset(&wqe.ctrl_seg, idx * desc_size); err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, true); } sg.length = orig_sg_length; mlx5r_umr_unmap_free_xlt(dev, xlt, &sg); return err; }