diff options
Diffstat (limited to 'drivers/infiniband/hw/mlx5/qp.c')
-rw-r--r-- | drivers/infiniband/hw/mlx5/qp.c | 221 |
1 files changed, 179 insertions, 42 deletions
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index e261a53f9a02..be0cd358b080 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -70,15 +70,6 @@ static const u32 mlx5_ib_opcode[] = { [MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR, }; -struct umr_wr { - u64 virt_addr; - struct ib_pd *pd; - unsigned int page_shift; - unsigned int npages; - u32 length; - int access_flags; - u32 mkey; -}; static int is_qp0(enum ib_qp_type qp_type) { @@ -110,6 +101,77 @@ void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n) return get_wqe(qp, qp->sq.offset + (n << MLX5_IB_SQ_STRIDE)); } +/** + * mlx5_ib_read_user_wqe() - Copy a user-space WQE to kernel space. + * + * @qp: QP to copy from. + * @send: copy from the send queue when non-zero, use the receive queue + * otherwise. + * @wqe_index: index to start copying from. For send work queues, the + * wqe_index is in units of MLX5_SEND_WQE_BB. + * For receive work queue, it is the number of work queue + * element in the queue. + * @buffer: destination buffer. + * @length: maximum number of bytes to copy. + * + * Copies at least a single WQE, but may copy more data. + * + * Return: the number of bytes copied, or an error code. + */ +int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index, + void *buffer, u32 length) +{ + struct ib_device *ibdev = qp->ibqp.device; + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_ib_wq *wq = send ? &qp->sq : &qp->rq; + size_t offset; + size_t wq_end; + struct ib_umem *umem = qp->umem; + u32 first_copy_length; + int wqe_length; + int ret; + + if (wq->wqe_cnt == 0) { + mlx5_ib_dbg(dev, "mlx5_ib_read_user_wqe for a QP with wqe_cnt == 0. qp_type: 0x%x\n", + qp->ibqp.qp_type); + return -EINVAL; + } + + offset = wq->offset + ((wqe_index % wq->wqe_cnt) << wq->wqe_shift); + wq_end = wq->offset + (wq->wqe_cnt << wq->wqe_shift); + + if (send && length < sizeof(struct mlx5_wqe_ctrl_seg)) + return -EINVAL; + + if (offset > umem->length || + (send && offset + sizeof(struct mlx5_wqe_ctrl_seg) > umem->length)) + return -EINVAL; + + first_copy_length = min_t(u32, offset + length, wq_end) - offset; + ret = ib_umem_copy_from(buffer, umem, offset, first_copy_length); + if (ret) + return ret; + + if (send) { + struct mlx5_wqe_ctrl_seg *ctrl = buffer; + int ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK; + + wqe_length = ds * MLX5_WQE_DS_UNITS; + } else { + wqe_length = 1 << wq->wqe_shift; + } + + if (wqe_length <= first_copy_length) + return first_copy_length; + + ret = ib_umem_copy_from(buffer + first_copy_length, umem, wq->offset, + wqe_length - first_copy_length); + if (ret) + return ret; + + return wqe_length; +} + static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type) { struct ib_qp *ibqp = &to_mibqp(qp)->ibqp; @@ -647,7 +709,7 @@ err_unmap: mlx5_ib_db_unmap_user(context, &qp->db); err_free: - mlx5_vfree(*in); + kvfree(*in); err_umem: if (qp->umem) @@ -761,7 +823,7 @@ err_wrid: kfree(qp->rq.wrid); err_free: - mlx5_vfree(*in); + kvfree(*in); err_buf: mlx5_buf_free(dev->mdev, &qp->buf); @@ -814,6 +876,8 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, int inlen = sizeof(*in); int err; + mlx5_ib_odp_create_qp(qp); + gen = &dev->mdev->caps.gen; mutex_init(&qp->mutex); spin_lock_init(&qp->sq.lock); @@ -971,7 +1035,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, goto err_create; } - mlx5_vfree(in); + kvfree(in); /* Hardware wants QPN written in big-endian order (after * shifting) for send doorbell. Precompute this value to save * a little bit when posting sends. @@ -988,7 +1052,7 @@ err_create: else if (qp->create_type == MLX5_QP_KERNEL) destroy_qp_kernel(dev, qp); - mlx5_vfree(in); + kvfree(in); return err; } @@ -1011,9 +1075,14 @@ static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv } } else { spin_lock_irq(&send_cq->lock); + __acquire(&recv_cq->lock); } } else if (recv_cq) { spin_lock_irq(&recv_cq->lock); + __acquire(&send_cq->lock); + } else { + __acquire(&send_cq->lock); + __acquire(&recv_cq->lock); } } @@ -1033,10 +1102,15 @@ static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *re spin_unlock_irq(&recv_cq->lock); } } else { + __release(&recv_cq->lock); spin_unlock_irq(&send_cq->lock); } } else if (recv_cq) { + __release(&send_cq->lock); spin_unlock_irq(&recv_cq->lock); + } else { + __release(&recv_cq->lock); + __release(&send_cq->lock); } } @@ -1088,11 +1162,13 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) in = kzalloc(sizeof(*in), GFP_KERNEL); if (!in) return; - if (qp->state != IB_QPS_RESET) + if (qp->state != IB_QPS_RESET) { + mlx5_ib_qp_disable_pagefaults(qp); if (mlx5_core_qp_modify(dev->mdev, to_mlx5_state(qp->state), MLX5_QP_STATE_RST, in, sizeof(*in), &qp->mqp)) mlx5_ib_warn(dev, "mlx5_ib: modify QP %06x to RESET failed\n", qp->mqp.qpn); + } get_cqs(qp, &send_cq, &recv_cq); @@ -1640,6 +1716,15 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, if (mlx5_st < 0) goto out; + /* If moving to a reset or error state, we must disable page faults on + * this QP and flush all current page faults. Otherwise a stale page + * fault may attempt to work on this QP after it is reset and moved + * again to RTS, and may cause the driver and the device to get out of + * sync. */ + if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR && + (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR)) + mlx5_ib_qp_disable_pagefaults(qp); + optpar = ib_mask_to_mlx5_opt(attr_mask); optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st]; in->optparam = cpu_to_be32(optpar); @@ -1649,6 +1734,9 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, if (err) goto out; + if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) + mlx5_ib_qp_enable_pagefaults(qp); + qp->state = new_state; if (attr_mask & IB_QP_ACCESS_FLAGS) @@ -1838,37 +1926,70 @@ static void set_frwr_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, umr->mkey_mask = frwr_mkey_mask(); } +static __be64 get_umr_reg_mr_mask(void) +{ + u64 result; + + result = MLX5_MKEY_MASK_LEN | + MLX5_MKEY_MASK_PAGE_SIZE | + MLX5_MKEY_MASK_START_ADDR | + MLX5_MKEY_MASK_PD | + MLX5_MKEY_MASK_LR | + MLX5_MKEY_MASK_LW | + MLX5_MKEY_MASK_KEY | + MLX5_MKEY_MASK_RR | + MLX5_MKEY_MASK_RW | + MLX5_MKEY_MASK_A | + MLX5_MKEY_MASK_FREE; + + return cpu_to_be64(result); +} + +static __be64 get_umr_unreg_mr_mask(void) +{ + u64 result; + + result = MLX5_MKEY_MASK_FREE; + + return cpu_to_be64(result); +} + +static __be64 get_umr_update_mtt_mask(void) +{ + u64 result; + + result = MLX5_MKEY_MASK_FREE; + + return cpu_to_be64(result); +} + static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, struct ib_send_wr *wr) { - struct umr_wr *umrwr = (struct umr_wr *)&wr->wr.fast_reg; - u64 mask; + struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg; memset(umr, 0, sizeof(*umr)); + if (wr->send_flags & MLX5_IB_SEND_UMR_FAIL_IF_FREE) + umr->flags = MLX5_UMR_CHECK_FREE; /* fail if free */ + else + umr->flags = MLX5_UMR_CHECK_NOT_FREE; /* fail if not free */ + if (!(wr->send_flags & MLX5_IB_SEND_UMR_UNREG)) { - umr->flags = 1 << 5; /* fail if not free */ umr->klm_octowords = get_klm_octo(umrwr->npages); - mask = MLX5_MKEY_MASK_LEN | - MLX5_MKEY_MASK_PAGE_SIZE | - MLX5_MKEY_MASK_START_ADDR | - MLX5_MKEY_MASK_PD | - MLX5_MKEY_MASK_LR | - MLX5_MKEY_MASK_LW | - MLX5_MKEY_MASK_KEY | - MLX5_MKEY_MASK_RR | - MLX5_MKEY_MASK_RW | - MLX5_MKEY_MASK_A | - MLX5_MKEY_MASK_FREE; - umr->mkey_mask = cpu_to_be64(mask); + if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_MTT) { + umr->mkey_mask = get_umr_update_mtt_mask(); + umr->bsf_octowords = get_klm_octo(umrwr->target.offset); + umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN; + } else { + umr->mkey_mask = get_umr_reg_mr_mask(); + } } else { - umr->flags = 2 << 5; /* fail if free */ - mask = MLX5_MKEY_MASK_FREE; - umr->mkey_mask = cpu_to_be64(mask); + umr->mkey_mask = get_umr_unreg_mr_mask(); } if (!wr->num_sge) - umr->flags |= (1 << 7); /* inline */ + umr->flags |= MLX5_UMR_INLINE; } static u8 get_umr_flags(int acc) @@ -1885,7 +2006,7 @@ static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr, { memset(seg, 0, sizeof(*seg)); if (li) { - seg->status = 1 << 6; + seg->status = MLX5_MKEY_STATUS_FREE; return; } @@ -1902,19 +2023,23 @@ static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr, static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr) { + struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg; + memset(seg, 0, sizeof(*seg)); if (wr->send_flags & MLX5_IB_SEND_UMR_UNREG) { - seg->status = 1 << 6; + seg->status = MLX5_MKEY_STATUS_FREE; return; } - seg->flags = convert_access(wr->wr.fast_reg.access_flags); - seg->flags_pd = cpu_to_be32(to_mpd((struct ib_pd *)wr->wr.fast_reg.page_list)->pdn); - seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start); - seg->len = cpu_to_be64(wr->wr.fast_reg.length); - seg->log2_page_size = wr->wr.fast_reg.page_shift; + seg->flags = convert_access(umrwr->access_flags); + if (!(wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_MTT)) { + seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn); + seg->start_addr = cpu_to_be64(umrwr->target.virt_addr); + } + seg->len = cpu_to_be64(umrwr->length); + seg->log2_page_size = umrwr->page_shift; seg->qpn_mkey7_0 = cpu_to_be32(0xffffff00 | - mlx5_mkey_variant(wr->wr.fast_reg.rkey)); + mlx5_mkey_variant(umrwr->mkey)); } static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg, @@ -2411,7 +2536,7 @@ static u8 get_fence(u8 fence, struct ib_send_wr *wr) static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, struct mlx5_wqe_ctrl_seg **ctrl, - struct ib_send_wr *wr, int *idx, + struct ib_send_wr *wr, unsigned *idx, int *size, int nreq) { int err = 0; @@ -2737,6 +2862,8 @@ out: if (bf->need_lock) spin_lock(&bf->lock); + else + __acquire(&bf->lock); /* TBD enable WC */ if (0 && nreq == 1 && bf->uuarn && inl && size > 1 && size <= bf->buf_size / 16) { @@ -2753,6 +2880,8 @@ out: bf->offset ^= bf->buf_size; if (bf->need_lock) spin_unlock(&bf->lock); + else + __release(&bf->lock); } spin_unlock_irqrestore(&qp->sq.lock, flags); @@ -2913,6 +3042,14 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr int mlx5_state; int err = 0; +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + /* + * Wait for any outstanding page faults, in case the user frees memory + * based upon this query's result. + */ + flush_workqueue(mlx5_ib_page_fault_wq); +#endif + mutex_lock(&qp->mutex); outb = kzalloc(sizeof(*outb), GFP_KERNEL); if (!outb) { |