diff options
author | Maor Gottlieb <maorg@mellanox.com> | 2016-06-17 15:01:38 +0300 |
---|---|---|
committer | Doug Ledford <dledford@redhat.com> | 2016-06-23 18:02:45 +0300 |
commit | 89ea94a7b6c40eb423c144aef1caceebaff79c8d (patch) | |
tree | 2831167a7aecf1b24275d3aca92bc1d3fadcc107 /drivers/infiniband/hw/mlx5/cq.c | |
parent | 7c2344c3bbf97eb5dfa732d5098285d15d3bf9bf (diff) | |
download | linux-89ea94a7b6c40eb423c144aef1caceebaff79c8d.tar.xz |
IB/mlx5: Reset flow support for IB kernel ULPs
The driver exposes interfaces that directly relate to HW state.
Upon fatal error, consumers of these interfaces (ULPs) that rely
on completion of all their posted work-request could hang, thereby
introducing dependencies in shutdown order. To prevent this from
happening, we manage the relevant resources (CQs, QPs) that are used
by the device. Upon a fatal error, we now generate simulated
completions for outstanding WQEs that were not completed at the
time the HW was reset.
It includes invoking the completion event handler for all involved
CQs so that the ULPs will poll those CQs. When polled we return
simulated CQEs with IB_WC_WR_FLUSH_ERR return code enabling ULPs
to clean up their resources and not wait forever for completions
upon receiving remove_one.
The above change requires an extra check in the data path to make
sure that when device is in error state, the simulated CQEs will
be returned and no further WQEs will be posted.
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
Diffstat (limited to 'drivers/infiniband/hw/mlx5/cq.c')
-rw-r--r-- | drivers/infiniband/hw/mlx5/cq.c | 87 |
1 files changed, 86 insertions, 1 deletions
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 9c0e67bd2ba7..308a358e5b46 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -424,6 +424,83 @@ static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe, item->key = be32_to_cpu(cqe->mkey); } +static void sw_send_comp(struct mlx5_ib_qp *qp, int num_entries, + struct ib_wc *wc, int *npolled) +{ + struct mlx5_ib_wq *wq; + unsigned int cur; + unsigned int idx; + int np; + int i; + + wq = &qp->sq; + cur = wq->head - wq->tail; + np = *npolled; + + if (cur == 0) + return; + + for (i = 0; i < cur && np < num_entries; i++) { + idx = wq->last_poll & (wq->wqe_cnt - 1); + wc->wr_id = wq->wrid[idx]; + wc->status = IB_WC_WR_FLUSH_ERR; + wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR; + wq->tail++; + np++; + wc->qp = &qp->ibqp; + wc++; + wq->last_poll = wq->w_list[idx].next; + } + *npolled = np; +} + +static void sw_recv_comp(struct mlx5_ib_qp *qp, int num_entries, + struct ib_wc *wc, int *npolled) +{ + struct mlx5_ib_wq *wq; + unsigned int cur; + int np; + int i; + + wq = &qp->rq; + cur = wq->head - wq->tail; + np = *npolled; + + if (cur == 0) + return; + + for (i = 0; i < cur && np < num_entries; i++) { + wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; + wc->status = IB_WC_WR_FLUSH_ERR; + wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR; + wq->tail++; + np++; + wc->qp = &qp->ibqp; + wc++; + } + *npolled = np; +} + +static void mlx5_ib_poll_sw_comp(struct mlx5_ib_cq *cq, int num_entries, + struct ib_wc *wc, int *npolled) +{ + struct mlx5_ib_qp *qp; + + *npolled = 0; + /* Find uncompleted WQEs belonging to that cq and retrun mmics ones */ + list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) { + sw_send_comp(qp, num_entries, wc + *npolled, npolled); + if (*npolled >= num_entries) + return; + } + + list_for_each_entry(qp, &cq->list_recv_qp, cq_recv_list) { + sw_recv_comp(qp, num_entries, wc + *npolled, npolled); + if (*npolled >= num_entries) + return; + } +} + static int mlx5_poll_one(struct mlx5_ib_cq *cq, struct mlx5_ib_qp **cur_qp, struct ib_wc *wc) @@ -594,12 +671,18 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) { struct mlx5_ib_cq *cq = to_mcq(ibcq); struct mlx5_ib_qp *cur_qp = NULL; + struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device); + struct mlx5_core_dev *mdev = dev->mdev; unsigned long flags; int soft_polled = 0; int npolled; int err = 0; spin_lock_irqsave(&cq->lock, flags); + if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + mlx5_ib_poll_sw_comp(cq, num_entries, wc, &npolled); + goto out; + } if (unlikely(!list_empty(&cq->wc_list))) soft_polled = poll_soft_wc(cq, num_entries, wc); @@ -612,7 +695,7 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) if (npolled) mlx5_cq_set_ci(&cq->mcq); - +out: spin_unlock_irqrestore(&cq->lock, flags); if (err == 0 || err == -EAGAIN) @@ -843,6 +926,8 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, cq->resize_buf = NULL; cq->resize_umem = NULL; cq->create_flags = attr->flags; + INIT_LIST_HEAD(&cq->list_send_qp); + INIT_LIST_HEAD(&cq->list_recv_qp); if (context) { err = create_cq_user(dev, udata, context, cq, entries, |