diff options
Diffstat (limited to 'drivers/infiniband/hw/mlx5/qp.c')
-rw-r--r-- | drivers/infiniband/hw/mlx5/qp.c | 294 |
1 files changed, 211 insertions, 83 deletions
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index a4f1f638509f..6cba2a02d11b 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -563,32 +563,21 @@ static int alloc_med_class_bfreg(struct mlx5_ib_dev *dev, } static int alloc_bfreg(struct mlx5_ib_dev *dev, - struct mlx5_bfreg_info *bfregi, - enum mlx5_ib_latency_class lat) + struct mlx5_bfreg_info *bfregi) { - int bfregn = -EINVAL; + int bfregn = -ENOMEM; mutex_lock(&bfregi->lock); - switch (lat) { - case MLX5_IB_LATENCY_CLASS_LOW: + if (bfregi->ver >= 2) { + bfregn = alloc_high_class_bfreg(dev, bfregi); + if (bfregn < 0) + bfregn = alloc_med_class_bfreg(dev, bfregi); + } + + if (bfregn < 0) { BUILD_BUG_ON(NUM_NON_BLUE_FLAME_BFREGS != 1); bfregn = 0; bfregi->count[bfregn]++; - break; - - case MLX5_IB_LATENCY_CLASS_MEDIUM: - if (bfregi->ver < 2) - bfregn = -ENOMEM; - else - bfregn = alloc_med_class_bfreg(dev, bfregi); - break; - - case MLX5_IB_LATENCY_CLASS_HIGH: - if (bfregi->ver < 2) - bfregn = -ENOMEM; - else - bfregn = alloc_high_class_bfreg(dev, bfregi); - break; } mutex_unlock(&bfregi->lock); @@ -641,13 +630,13 @@ static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq, static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv_cq); -static int bfregn_to_uar_index(struct mlx5_ib_dev *dev, - struct mlx5_bfreg_info *bfregi, int bfregn, - bool dyn_bfreg) +int bfregn_to_uar_index(struct mlx5_ib_dev *dev, + struct mlx5_bfreg_info *bfregi, u32 bfregn, + bool dyn_bfreg) { - int bfregs_per_sys_page; - int index_of_sys_page; - int offset; + unsigned int bfregs_per_sys_page; + u32 index_of_sys_page; + u32 offset; bfregs_per_sys_page = get_uars_per_sys_page(dev, bfregi->lib_uar_4k) * MLX5_NON_FP_BFREGS_PER_UAR; @@ -655,6 +644,10 @@ static int bfregn_to_uar_index(struct mlx5_ib_dev *dev, if (dyn_bfreg) { index_of_sys_page += bfregi->num_static_sys_pages; + + if (index_of_sys_page >= bfregi->num_sys_pages) + return -EINVAL; + if (bfregn > bfregi->num_dyn_bfregs || bfregi->sys_pages[index_of_sys_page] == MLX5_IB_INVALID_UAR_INDEX) { mlx5_ib_dbg(dev, "Invalid dynamic uar index\n"); @@ -819,21 +812,9 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, bfregn = MLX5_CROSS_CHANNEL_BFREG; } else { - bfregn = alloc_bfreg(dev, &context->bfregi, MLX5_IB_LATENCY_CLASS_HIGH); - if (bfregn < 0) { - mlx5_ib_dbg(dev, "failed to allocate low latency BFREG\n"); - mlx5_ib_dbg(dev, "reverting to medium latency\n"); - bfregn = alloc_bfreg(dev, &context->bfregi, MLX5_IB_LATENCY_CLASS_MEDIUM); - if (bfregn < 0) { - mlx5_ib_dbg(dev, "failed to allocate medium latency BFREG\n"); - mlx5_ib_dbg(dev, "reverting to high latency\n"); - bfregn = alloc_bfreg(dev, &context->bfregi, MLX5_IB_LATENCY_CLASS_LOW); - if (bfregn < 0) { - mlx5_ib_warn(dev, "bfreg allocation failed\n"); - return bfregn; - } - } - } + bfregn = alloc_bfreg(dev, &context->bfregi); + if (bfregn < 0) + return bfregn; } mlx5_ib_dbg(dev, "bfregn 0x%x, uar_index 0x%x\n", bfregn, uar_index); @@ -1626,7 +1607,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct mlx5_ib_resources *devr = &dev->devr; int inlen = MLX5_ST_SZ_BYTES(create_qp_in); struct mlx5_core_dev *mdev = dev->mdev; - struct mlx5_ib_create_qp_resp resp; + struct mlx5_ib_create_qp_resp resp = {}; struct mlx5_ib_cq *send_cq; struct mlx5_ib_cq *recv_cq; unsigned long flags; @@ -2555,18 +2536,16 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (ah->type == RDMA_AH_ATTR_TYPE_ROCE) { if (!(ah_flags & IB_AH_GRH)) return -EINVAL; - err = mlx5_get_roce_gid_type(dev, port, grh->sgid_index, - &gid_type); - if (err) - return err; + memcpy(path->rmac, ah->roce.dmac, sizeof(ah->roce.dmac)); if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC || qp->ibqp.qp_type == IB_QPT_XRC_INI || qp->ibqp.qp_type == IB_QPT_XRC_TGT) - path->udp_sport = mlx5_get_roce_udp_sport(dev, port, - grh->sgid_index); + path->udp_sport = + mlx5_get_roce_udp_sport(dev, ah->grh.sgid_attr); path->dci_cfi_prio_sl = (sl & 0x7) << 4; + gid_type = ah->grh.sgid_attr->gid_type; if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) path->ecn_dscp = (grh->traffic_class >> 2) & 0x3f; } else { @@ -3529,7 +3508,7 @@ static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg, } static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg, - struct ib_send_wr *wr, void *qend, + const struct ib_send_wr *wr, void *qend, struct mlx5_ib_qp *qp, int *size) { void *seg = eseg; @@ -3582,7 +3561,7 @@ static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg, } static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg, - struct ib_send_wr *wr) + const struct ib_send_wr *wr) { memcpy(&dseg->av, &to_mah(ud_wr(wr)->ah)->av, sizeof(struct mlx5_av)); dseg->av.dqp_dct = cpu_to_be32(ud_wr(wr)->remote_qpn | MLX5_EXTENDED_UD_AV); @@ -3730,9 +3709,9 @@ static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask) static int set_reg_umr_segment(struct mlx5_ib_dev *dev, struct mlx5_wqe_umr_ctrl_seg *umr, - struct ib_send_wr *wr, int atomic) + const struct ib_send_wr *wr, int atomic) { - struct mlx5_umr_wr *umrwr = umr_wr(wr); + const struct mlx5_umr_wr *umrwr = umr_wr(wr); memset(umr, 0, sizeof(*umr)); @@ -3803,9 +3782,10 @@ static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg) seg->status = MLX5_MKEY_STATUS_FREE; } -static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr) +static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, + const struct ib_send_wr *wr) { - struct mlx5_umr_wr *umrwr = umr_wr(wr); + const struct mlx5_umr_wr *umrwr = umr_wr(wr); memset(seg, 0, sizeof(*seg)); if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR) @@ -3854,7 +3834,7 @@ static void set_reg_umr_inline_seg(void *seg, struct mlx5_ib_qp *qp, seg += mr_list_size; } -static __be32 send_ieth(struct ib_send_wr *wr) +static __be32 send_ieth(const struct ib_send_wr *wr) { switch (wr->opcode) { case IB_WR_SEND_WITH_IMM: @@ -3886,7 +3866,7 @@ static u8 wq_sig(void *wqe) return calc_sig(wqe, (*((u8 *)wqe + 8) & 0x3f) << 4); } -static int set_data_inl_seg(struct mlx5_ib_qp *qp, struct ib_send_wr *wr, +static int set_data_inl_seg(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr, void *wqe, int *sz) { struct mlx5_wqe_inline_seg *seg; @@ -4032,7 +4012,7 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr, return 0; } -static int set_sig_data_segment(struct ib_sig_handover_wr *wr, +static int set_sig_data_segment(const struct ib_sig_handover_wr *wr, struct mlx5_ib_qp *qp, void **seg, int *size) { struct ib_sig_attrs *sig_attrs = wr->sig_attrs; @@ -4134,7 +4114,7 @@ static int set_sig_data_segment(struct ib_sig_handover_wr *wr, } static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg, - struct ib_sig_handover_wr *wr, u32 size, + const struct ib_sig_handover_wr *wr, u32 size, u32 length, u32 pdn) { struct ib_mr *sig_mr = wr->sig_mr; @@ -4165,10 +4145,10 @@ static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, } -static int set_sig_umr_wr(struct ib_send_wr *send_wr, struct mlx5_ib_qp *qp, - void **seg, int *size) +static int set_sig_umr_wr(const struct ib_send_wr *send_wr, + struct mlx5_ib_qp *qp, void **seg, int *size) { - struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr); + const struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr); struct mlx5_ib_mr *sig_mr = to_mmr(wr->sig_mr); u32 pdn = get_pd(qp)->pdn; u32 xlt_size; @@ -4243,7 +4223,7 @@ static int set_psv_wr(struct ib_sig_domain *domain, } static int set_reg_wr(struct mlx5_ib_qp *qp, - struct ib_reg_wr *wr, + const struct ib_reg_wr *wr, void **seg, int *size) { struct mlx5_ib_mr *mr = to_mmr(wr->mr); @@ -4314,10 +4294,10 @@ static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16) } } -static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, +static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg, struct mlx5_wqe_ctrl_seg **ctrl, - struct ib_send_wr *wr, unsigned *idx, - int *size, int nreq) + const struct ib_send_wr *wr, unsigned *idx, + int *size, int nreq, bool send_signaled, bool solicited) { if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) return -ENOMEM; @@ -4328,10 +4308,8 @@ static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, *(uint32_t *)(*seg + 8) = 0; (*ctrl)->imm = send_ieth(wr); (*ctrl)->fm_ce_se = qp->sq_signal_bits | - (wr->send_flags & IB_SEND_SIGNALED ? - MLX5_WQE_CTRL_CQ_UPDATE : 0) | - (wr->send_flags & IB_SEND_SOLICITED ? - MLX5_WQE_CTRL_SOLICITED : 0); + (send_signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0) | + (solicited ? MLX5_WQE_CTRL_SOLICITED : 0); *seg += sizeof(**ctrl); *size = sizeof(**ctrl) / 16; @@ -4339,6 +4317,16 @@ static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, return 0; } +static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, + struct mlx5_wqe_ctrl_seg **ctrl, + const struct ib_send_wr *wr, unsigned *idx, + int *size, int nreq) +{ + return __begin_wqe(qp, seg, ctrl, wr, idx, size, nreq, + wr->send_flags & IB_SEND_SIGNALED, + wr->send_flags & IB_SEND_SOLICITED); +} + static void finish_wqe(struct mlx5_ib_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl, u8 size, unsigned idx, u64 wr_id, @@ -4360,9 +4348,8 @@ static void finish_wqe(struct mlx5_ib_qp *qp, qp->sq.w_list[idx].next = qp->sq.cur_post; } - -int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr, bool drain) { struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */ struct mlx5_ib_dev *dev = to_mdev(ibqp->device); @@ -4393,7 +4380,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, spin_lock_irqsave(&qp->sq.lock, flags); - if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && !drain) { err = -EIO; *bad_wr = wr; nreq = 0; @@ -4498,10 +4485,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, * SET_PSV WQEs are not signaled and solicited * on error */ - wr->send_flags &= ~IB_SEND_SIGNALED; - wr->send_flags |= IB_SEND_SOLICITED; - err = begin_wqe(qp, &seg, &ctrl, wr, - &idx, &size, nreq); + err = __begin_wqe(qp, &seg, &ctrl, wr, &idx, + &size, nreq, false, true); if (err) { mlx5_ib_warn(dev, "\n"); err = -ENOMEM; @@ -4520,8 +4505,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, fence, MLX5_OPCODE_SET_PSV); - err = begin_wqe(qp, &seg, &ctrl, wr, - &idx, &size, nreq); + err = __begin_wqe(qp, &seg, &ctrl, wr, &idx, + &size, nreq, false, true); if (err) { mlx5_ib_warn(dev, "\n"); err = -ENOMEM; @@ -4690,13 +4675,19 @@ out: return err; } +int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) +{ + return _mlx5_ib_post_send(ibqp, wr, bad_wr, false); +} + static void set_sig_seg(struct mlx5_rwqe_sig *sig, int size) { sig->signature = calc_sig(sig, size); } -int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +static int _mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr, bool drain) { struct mlx5_ib_qp *qp = to_mqp(ibqp); struct mlx5_wqe_data_seg *scat; @@ -4714,7 +4705,7 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, spin_lock_irqsave(&qp->rq.lock, flags); - if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && !drain) { err = -EIO; *bad_wr = wr; nreq = 0; @@ -4776,6 +4767,12 @@ out: return err; } +int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) +{ + return _mlx5_ib_post_recv(ibqp, wr, bad_wr, false); +} + static inline enum ib_qp_state to_ib_qp_state(enum mlx5_qp_state mlx5_state) { switch (mlx5_state) { @@ -5365,7 +5362,9 @@ static int set_user_rq_size(struct mlx5_ib_dev *dev, rwq->wqe_count = ucmd->rq_wqe_count; rwq->wqe_shift = ucmd->rq_wqe_shift; - rwq->buf_size = (rwq->wqe_count << rwq->wqe_shift); + if (check_shl_overflow(rwq->wqe_count, rwq->wqe_shift, &rwq->buf_size)) + return -EINVAL; + rwq->log_rq_stride = rwq->wqe_shift; rwq->log_rq_size = ilog2(rwq->wqe_count); return 0; @@ -5697,3 +5696,132 @@ out: kvfree(in); return err; } + +struct mlx5_ib_drain_cqe { + struct ib_cqe cqe; + struct completion done; +}; + +static void mlx5_ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct mlx5_ib_drain_cqe *cqe = container_of(wc->wr_cqe, + struct mlx5_ib_drain_cqe, + cqe); + + complete(&cqe->done); +} + +/* This function returns only once the drained WR was completed */ +static void handle_drain_completion(struct ib_cq *cq, + struct mlx5_ib_drain_cqe *sdrain, + struct mlx5_ib_dev *dev) +{ + struct mlx5_core_dev *mdev = dev->mdev; + + if (cq->poll_ctx == IB_POLL_DIRECT) { + while (wait_for_completion_timeout(&sdrain->done, HZ / 10) <= 0) + ib_process_cq_direct(cq, -1); + return; + } + + if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + struct mlx5_ib_cq *mcq = to_mcq(cq); + bool triggered = false; + unsigned long flags; + + spin_lock_irqsave(&dev->reset_flow_resource_lock, flags); + /* Make sure that the CQ handler won't run if wasn't run yet */ + if (!mcq->mcq.reset_notify_added) + mcq->mcq.reset_notify_added = 1; + else + triggered = true; + spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags); + + if (triggered) { + /* Wait for any scheduled/running task to be ended */ + switch (cq->poll_ctx) { + case IB_POLL_SOFTIRQ: + irq_poll_disable(&cq->iop); + irq_poll_enable(&cq->iop); + break; + case IB_POLL_WORKQUEUE: + cancel_work_sync(&cq->work); + break; + default: + WARN_ON_ONCE(1); + } + } + + /* Run the CQ handler - this makes sure that the drain WR will + * be processed if wasn't processed yet. + */ + mcq->mcq.comp(&mcq->mcq); + } + + wait_for_completion(&sdrain->done); +} + +void mlx5_ib_drain_sq(struct ib_qp *qp) +{ + struct ib_cq *cq = qp->send_cq; + struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; + struct mlx5_ib_drain_cqe sdrain; + const struct ib_send_wr *bad_swr; + struct ib_rdma_wr swr = { + .wr = { + .next = NULL, + { .wr_cqe = &sdrain.cqe, }, + .opcode = IB_WR_RDMA_WRITE, + }, + }; + int ret; + struct mlx5_ib_dev *dev = to_mdev(qp->device); + struct mlx5_core_dev *mdev = dev->mdev; + + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); + if (ret && mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) { + WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); + return; + } + + sdrain.cqe.done = mlx5_ib_drain_qp_done; + init_completion(&sdrain.done); + + ret = _mlx5_ib_post_send(qp, &swr.wr, &bad_swr, true); + if (ret) { + WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); + return; + } + + handle_drain_completion(cq, &sdrain, dev); +} + +void mlx5_ib_drain_rq(struct ib_qp *qp) +{ + struct ib_cq *cq = qp->recv_cq; + struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; + struct mlx5_ib_drain_cqe rdrain; + struct ib_recv_wr rwr = {}; + const struct ib_recv_wr *bad_rwr; + int ret; + struct mlx5_ib_dev *dev = to_mdev(qp->device); + struct mlx5_core_dev *mdev = dev->mdev; + + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); + if (ret && mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) { + WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); + return; + } + + rwr.wr_cqe = &rdrain.cqe; + rdrain.cqe.done = mlx5_ib_drain_qp_done; + init_completion(&rdrain.done); + + ret = _mlx5_ib_post_recv(qp, &rwr, &bad_rwr, true); + if (ret) { + WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); + return; + } + + handle_drain_completion(cq, &rdrain, dev); +} |