summaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw')
-rw-r--r--drivers/infiniband/hw/bnxt_re/hw_counters.c2
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c170
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.h10
-rw-r--r--drivers/infiniband/hw/bnxt_re/main.c23
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.c751
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.h127
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.h58
-rw-r--r--drivers/infiniband/hw/bnxt_re/roce_hsi.h1
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c2
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c2
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h9
-rw-r--r--drivers/infiniband/hw/cxgb4/mem.c3
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c22
-rw-r--r--drivers/infiniband/hw/cxgb4/restrack.c24
-rw-r--r--drivers/infiniband/hw/efa/efa_admin_cmds_defs.h15
-rw-r--r--drivers/infiniband/hw/efa/efa_com_cmd.c2
-rw-r--r--drivers/infiniband/hw/efa/efa_com_cmd.h2
-rw-r--r--drivers/infiniband/hw/efa/efa_main.c6
-rw-r--r--drivers/infiniband/hw/efa/efa_verbs.c43
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c27
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.c19
-rw-r--r--drivers/infiniband/hw/hfi1/firmware.c16
-rw-r--r--drivers/infiniband/hw/hfi1/init.c37
-rw-r--r--drivers/infiniband/hw/hfi1/iowait.h2
-rw-r--r--drivers/infiniband/hw/hfi1/ipoib.h6
-rw-r--r--drivers/infiniband/hw/hfi1/ipoib_tx.c104
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c9
-rw-r--r--drivers/infiniband/hw/hfi1/netdev_rx.c2
-rw-r--r--drivers/infiniband/hw/hfi1/pcie.c22
-rw-r--r--drivers/infiniband/hw/hfi1/pio.c2
-rw-r--r--drivers/infiniband/hw/hfi1/pio_copy.c12
-rw-r--r--drivers/infiniband/hw/hfi1/platform.c10
-rw-r--r--drivers/infiniband/hw/hfi1/qp.c9
-rw-r--r--drivers/infiniband/hw/hfi1/qp.h14
-rw-r--r--drivers/infiniband/hw/hfi1/qsfp.c4
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c25
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c9
-rw-r--r--drivers/infiniband/hw/hfi1/tid_rdma.c9
-rw-r--r--drivers/infiniband/hw/hfi1/uc.c8
-rw-r--r--drivers/infiniband/hw/hfi1/verbs_txreq.h2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h38
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.c11
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c304
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h19
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c217
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c10
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_restrack.c14
-rw-r--r--drivers/infiniband/hw/i40iw/Makefile1
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw.h2
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.c22
-rw-r--r--drivers/infiniband/hw/mlx4/main.c37
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h2
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c3
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c6
-rw-r--r--drivers/infiniband/hw/mlx5/Makefile6
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.c12
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.h1
-rw-r--r--drivers/infiniband/hw/mlx5/counters.c709
-rw-r--r--drivers/infiniband/hw/mlx5/counters.h17
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c6
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c104
-rw-r--r--drivers/infiniband/hw/mlx5/devx.h45
-rw-r--r--drivers/infiniband/hw/mlx5/flow.c765
-rw-r--r--drivers/infiniband/hw/mlx5/fs.c2516
-rw-r--r--drivers/infiniband/hw/mlx5/fs.h29
-rw-r--r--drivers/infiniband/hw/mlx5/main.c3256
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h109
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c2
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c55
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c155
-rw-r--r--drivers/infiniband/hw/mlx5/qp.h1
-rw-r--r--drivers/infiniband/hw/mlx5/qpc.c8
-rw-r--r--drivers/infiniband/hw/mlx5/restrack.c121
-rw-r--r--drivers/infiniband/hw/mlx5/restrack.h13
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c4
-rw-r--r--drivers/infiniband/hw/mlx5/srq_cmd.c4
-rw-r--r--drivers/infiniband/hw/mlx5/std_types.c45
-rw-r--r--drivers/infiniband/hw/mlx5/wr.c70
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c10
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c2
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.h2
-rw-r--r--drivers/infiniband/hw/qedr/main.c23
-rw-r--r--drivers/infiniband/hw/qedr/qedr.h5
-rw-r--r--drivers/infiniband/hw/qedr/qedr_iw_cm.c13
-rw-r--r--drivers/infiniband/hw/qedr/verbs.c142
-rw-r--r--drivers/infiniband/hw/qedr/verbs.h2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_fwd.c4
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c2
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h2
90 files changed, 5536 insertions, 5032 deletions
diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c
index 3421a0b15983..5f5408cdf008 100644
--- a/drivers/infiniband/hw/bnxt_re/hw_counters.c
+++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c
@@ -132,7 +132,7 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
stats->value[BNXT_RE_RECOVERABLE_ERRORS] =
le64_to_cpu(bnxt_re_stats->tx_bcast_pkts);
stats->value[BNXT_RE_RX_DROPS] =
- le64_to_cpu(bnxt_re_stats->rx_drop_pkts);
+ le64_to_cpu(bnxt_re_stats->rx_error_pkts);
stats->value[BNXT_RE_RX_DISCARDS] =
le64_to_cpu(bnxt_re_stats->rx_discard_pkts);
stats->value[BNXT_RE_RX_PKTS] =
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 8b6ad5cddfce..3f18efc0c297 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -842,16 +842,79 @@ static u8 __from_ib_qp_type(enum ib_qp_type type)
}
}
+static u16 bnxt_re_setup_rwqe_size(struct bnxt_qplib_qp *qplqp,
+ int rsge, int max)
+{
+ if (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC)
+ rsge = max;
+ return bnxt_re_get_rwqe_size(rsge);
+}
+
+static u16 bnxt_re_get_wqe_size(int ilsize, int nsge)
+{
+ u16 wqe_size, calc_ils;
+
+ wqe_size = bnxt_re_get_swqe_size(nsge);
+ if (ilsize) {
+ calc_ils = sizeof(struct sq_send_hdr) + ilsize;
+ wqe_size = max_t(u16, calc_ils, wqe_size);
+ wqe_size = ALIGN(wqe_size, sizeof(struct sq_send_hdr));
+ }
+ return wqe_size;
+}
+
+static int bnxt_re_setup_swqe_size(struct bnxt_re_qp *qp,
+ struct ib_qp_init_attr *init_attr)
+{
+ struct bnxt_qplib_dev_attr *dev_attr;
+ struct bnxt_qplib_qp *qplqp;
+ struct bnxt_re_dev *rdev;
+ struct bnxt_qplib_q *sq;
+ int align, ilsize;
+
+ rdev = qp->rdev;
+ qplqp = &qp->qplib_qp;
+ sq = &qplqp->sq;
+ dev_attr = &rdev->dev_attr;
+
+ align = sizeof(struct sq_send_hdr);
+ ilsize = ALIGN(init_attr->cap.max_inline_data, align);
+
+ sq->wqe_size = bnxt_re_get_wqe_size(ilsize, sq->max_sge);
+ if (sq->wqe_size > bnxt_re_get_swqe_size(dev_attr->max_qp_sges))
+ return -EINVAL;
+ /* For gen p4 and gen p5 backward compatibility mode
+ * wqe size is fixed to 128 bytes
+ */
+ if (sq->wqe_size < bnxt_re_get_swqe_size(dev_attr->max_qp_sges) &&
+ qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC)
+ sq->wqe_size = bnxt_re_get_swqe_size(dev_attr->max_qp_sges);
+
+ if (init_attr->cap.max_inline_data) {
+ qplqp->max_inline_data = sq->wqe_size -
+ sizeof(struct sq_send_hdr);
+ init_attr->cap.max_inline_data = qplqp->max_inline_data;
+ if (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC)
+ sq->max_sge = qplqp->max_inline_data /
+ sizeof(struct sq_sge);
+ }
+
+ return 0;
+}
+
static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd,
struct bnxt_re_qp *qp, struct ib_udata *udata)
{
+ struct bnxt_qplib_qp *qplib_qp;
+ struct bnxt_re_ucontext *cntx;
struct bnxt_re_qp_req ureq;
- struct bnxt_qplib_qp *qplib_qp = &qp->qplib_qp;
- struct ib_umem *umem;
int bytes = 0, psn_sz;
- struct bnxt_re_ucontext *cntx = rdma_udata_to_drv_context(
- udata, struct bnxt_re_ucontext, ib_uctx);
+ struct ib_umem *umem;
+ int psn_nume;
+ qplib_qp = &qp->qplib_qp;
+ cntx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext,
+ ib_uctx);
if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
return -EFAULT;
@@ -859,10 +922,15 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd,
/* Consider mapping PSN search memory only for RC QPs. */
if (qplib_qp->type == CMDQ_CREATE_QP_TYPE_RC) {
psn_sz = bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ?
- sizeof(struct sq_psn_search_ext) :
- sizeof(struct sq_psn_search);
- bytes += (qplib_qp->sq.max_wqe * psn_sz);
+ sizeof(struct sq_psn_search_ext) :
+ sizeof(struct sq_psn_search);
+ psn_nume = (qplib_qp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ?
+ qplib_qp->sq.max_wqe :
+ ((qplib_qp->sq.max_wqe * qplib_qp->sq.wqe_size) /
+ sizeof(struct bnxt_qplib_sge));
+ bytes += (psn_nume * psn_sz);
}
+
bytes = PAGE_ALIGN(bytes);
umem = ib_umem_get(&rdev->ibdev, ureq.qpsva, bytes,
IB_ACCESS_LOCAL_WRITE);
@@ -975,7 +1043,7 @@ static struct bnxt_re_qp *bnxt_re_create_shadow_qp
qp->qplib_qp.sig_type = true;
/* Shadow QP SQ depth should be same as QP1 RQ depth */
- qp->qplib_qp.sq.wqe_size = bnxt_re_get_swqe_size();
+ qp->qplib_qp.sq.wqe_size = bnxt_re_get_wqe_size(0, 6);
qp->qplib_qp.sq.max_wqe = qp1_qp->rq.max_wqe;
qp->qplib_qp.sq.max_sge = 2;
/* Q full delta can be 1 since it is internal QP */
@@ -986,7 +1054,7 @@ static struct bnxt_re_qp *bnxt_re_create_shadow_qp
qp->qplib_qp.scq = qp1_qp->scq;
qp->qplib_qp.rcq = qp1_qp->rcq;
- qp->qplib_qp.rq.wqe_size = bnxt_re_get_rwqe_size();
+ qp->qplib_qp.rq.wqe_size = bnxt_re_get_rwqe_size(6);
qp->qplib_qp.rq.max_wqe = qp1_qp->rq.max_wqe;
qp->qplib_qp.rq.max_sge = qp1_qp->rq.max_sge;
/* Q full delta can be 1 since it is internal QP */
@@ -1041,19 +1109,21 @@ static int bnxt_re_init_rq_attr(struct bnxt_re_qp *qp,
qplqp->srq = &srq->qplib_srq;
rq->max_wqe = 0;
} else {
- rq->wqe_size = bnxt_re_get_rwqe_size();
+ rq->max_sge = init_attr->cap.max_recv_sge;
+ if (rq->max_sge > dev_attr->max_qp_sges)
+ rq->max_sge = dev_attr->max_qp_sges;
+ init_attr->cap.max_recv_sge = rq->max_sge;
+ rq->wqe_size = bnxt_re_setup_rwqe_size(qplqp, rq->max_sge,
+ dev_attr->max_qp_sges);
/* Allocate 1 more than what's provided so posting max doesn't
* mean empty.
*/
entries = roundup_pow_of_two(init_attr->cap.max_recv_wr + 1);
rq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + 1);
- rq->q_full_delta = rq->max_wqe - init_attr->cap.max_recv_wr;
- rq->max_sge = init_attr->cap.max_recv_sge;
- if (rq->max_sge > dev_attr->max_qp_sges)
- rq->max_sge = dev_attr->max_qp_sges;
+ rq->q_full_delta = 0;
+ rq->sg_info.pgsize = PAGE_SIZE;
+ rq->sg_info.pgshft = PAGE_SHIFT;
}
- rq->sg_info.pgsize = PAGE_SIZE;
- rq->sg_info.pgshft = PAGE_SHIFT;
return 0;
}
@@ -1068,41 +1138,48 @@ static void bnxt_re_adjust_gsi_rq_attr(struct bnxt_re_qp *qp)
qplqp = &qp->qplib_qp;
dev_attr = &rdev->dev_attr;
- qplqp->rq.max_sge = dev_attr->max_qp_sges;
- if (qplqp->rq.max_sge > dev_attr->max_qp_sges)
+ if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) {
qplqp->rq.max_sge = dev_attr->max_qp_sges;
- qplqp->rq.max_sge = 6;
+ if (qplqp->rq.max_sge > dev_attr->max_qp_sges)
+ qplqp->rq.max_sge = dev_attr->max_qp_sges;
+ qplqp->rq.max_sge = 6;
+ }
}
-static void bnxt_re_init_sq_attr(struct bnxt_re_qp *qp,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata)
+static int bnxt_re_init_sq_attr(struct bnxt_re_qp *qp,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
{
struct bnxt_qplib_dev_attr *dev_attr;
struct bnxt_qplib_qp *qplqp;
struct bnxt_re_dev *rdev;
struct bnxt_qplib_q *sq;
int entries;
+ int diff;
+ int rc;
rdev = qp->rdev;
qplqp = &qp->qplib_qp;
sq = &qplqp->sq;
dev_attr = &rdev->dev_attr;
- sq->wqe_size = bnxt_re_get_swqe_size();
sq->max_sge = init_attr->cap.max_send_sge;
- if (sq->max_sge > dev_attr->max_qp_sges)
+ if (sq->max_sge > dev_attr->max_qp_sges) {
sq->max_sge = dev_attr->max_qp_sges;
- /*
- * Change the SQ depth if user has requested minimum using
- * configfs. Only supported for kernel consumers
- */
+ init_attr->cap.max_send_sge = sq->max_sge;
+ }
+
+ rc = bnxt_re_setup_swqe_size(qp, init_attr);
+ if (rc)
+ return rc;
+
entries = init_attr->cap.max_send_wr;
/* Allocate 128 + 1 more than what's provided */
- entries = roundup_pow_of_two(entries + BNXT_QPLIB_RESERVED_QP_WRS + 1);
- sq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes +
- BNXT_QPLIB_RESERVED_QP_WRS + 1);
- sq->q_full_delta = BNXT_QPLIB_RESERVED_QP_WRS + 1;
+ diff = (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE) ?
+ 0 : BNXT_QPLIB_RESERVED_QP_WRS;
+ entries = roundup_pow_of_two(entries + diff + 1);
+ sq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + diff + 1);
+ sq->q_full_delta = diff + 1;
/*
* Reserving one slot for Phantom WQE. Application can
* post one extra entry in this case. But allowing this to avoid
@@ -1111,6 +1188,8 @@ static void bnxt_re_init_sq_attr(struct bnxt_re_qp *qp,
qplqp->sq.q_full_delta -= 1;
qplqp->sq.sg_info.pgsize = PAGE_SIZE;
qplqp->sq.sg_info.pgshft = PAGE_SHIFT;
+
+ return 0;
}
static void bnxt_re_adjust_gsi_sq_attr(struct bnxt_re_qp *qp,
@@ -1125,13 +1204,16 @@ static void bnxt_re_adjust_gsi_sq_attr(struct bnxt_re_qp *qp,
qplqp = &qp->qplib_qp;
dev_attr = &rdev->dev_attr;
- entries = roundup_pow_of_two(init_attr->cap.max_send_wr + 1);
- qplqp->sq.max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + 1);
- qplqp->sq.q_full_delta = qplqp->sq.max_wqe -
- init_attr->cap.max_send_wr;
- qplqp->sq.max_sge++; /* Need one extra sge to put UD header */
- if (qplqp->sq.max_sge > dev_attr->max_qp_sges)
- qplqp->sq.max_sge = dev_attr->max_qp_sges;
+ if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) {
+ entries = roundup_pow_of_two(init_attr->cap.max_send_wr + 1);
+ qplqp->sq.max_wqe = min_t(u32, entries,
+ dev_attr->max_qp_wqes + 1);
+ qplqp->sq.q_full_delta = qplqp->sq.max_wqe -
+ init_attr->cap.max_send_wr;
+ qplqp->sq.max_sge++; /* Need one extra sge to put UD header */
+ if (qplqp->sq.max_sge > dev_attr->max_qp_sges)
+ qplqp->sq.max_sge = dev_attr->max_qp_sges;
+ }
}
static int bnxt_re_init_qp_type(struct bnxt_re_dev *rdev,
@@ -1183,6 +1265,7 @@ static int bnxt_re_init_qp_attr(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd,
goto out;
}
qplqp->type = (u8)qptype;
+ qplqp->wqe_mode = rdev->chip_ctx->modes.wqe_mode;
if (init_attr->qp_type == IB_QPT_RC) {
qplqp->max_rd_atomic = dev_attr->max_qp_rd_atom;
@@ -1226,7 +1309,9 @@ static int bnxt_re_init_qp_attr(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd,
bnxt_re_adjust_gsi_rq_attr(qp);
/* Setup SQ */
- bnxt_re_init_sq_attr(qp, init_attr, udata);
+ rc = bnxt_re_init_sq_attr(qp, init_attr, udata);
+ if (rc)
+ goto out;
if (init_attr->qp_type == IB_QPT_GSI)
bnxt_re_adjust_gsi_sq_attr(qp, init_attr);
@@ -1574,8 +1659,9 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq,
entries = dev_attr->max_srq_wqes + 1;
srq->qplib_srq.max_wqe = entries;
- srq->qplib_srq.wqe_size = bnxt_re_get_rwqe_size();
srq->qplib_srq.max_sge = srq_init_attr->attr.max_sge;
+ srq->qplib_srq.wqe_size =
+ bnxt_re_get_rwqe_size(srq->qplib_srq.max_sge);
srq->qplib_srq.threshold = srq_init_attr->attr.srq_limit;
srq->srq_limit = srq_init_attr->attr.srq_limit;
srq->qplib_srq.eventq_hw_ring_id = rdev->nq[0].ring_id;
@@ -3569,7 +3655,7 @@ int bnxt_re_map_mr_sg(struct ib_mr *ib_mr, struct scatterlist *sg, int sg_nents,
}
struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type type,
- u32 max_num_sg, struct ib_udata *udata)
+ u32 max_num_sg)
{
struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
struct bnxt_re_dev *rdev = pd->rdev;
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index e5fbbeba6d28..1daeb30e06fd 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -136,14 +136,14 @@ struct bnxt_re_ucontext {
spinlock_t sh_lock; /* protect shpg */
};
-static inline u16 bnxt_re_get_swqe_size(void)
+static inline u16 bnxt_re_get_swqe_size(int nsge)
{
- return sizeof(struct sq_send);
+ return sizeof(struct sq_send_hdr) + nsge * sizeof(struct sq_sge);
}
-static inline u16 bnxt_re_get_rwqe_size(void)
+static inline u16 bnxt_re_get_rwqe_size(int nsge)
{
- return sizeof(struct rq_wqe);
+ return sizeof(struct rq_wqe_hdr) + (nsge * sizeof(struct sq_sge));
}
int bnxt_re_query_device(struct ib_device *ibdev,
@@ -201,7 +201,7 @@ struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *pd, int mr_access_flags);
int bnxt_re_map_mr_sg(struct ib_mr *ib_mr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset);
struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata);
+ u32 max_num_sg);
int bnxt_re_dereg_mr(struct ib_mr *mr, struct ib_udata *udata);
struct ib_mw *bnxt_re_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type,
struct ib_udata *udata);
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index b12fbc857f94..dad0df8a2467 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -82,6 +82,15 @@ static void bnxt_re_remove_device(struct bnxt_re_dev *rdev);
static void bnxt_re_dealloc_driver(struct ib_device *ib_dev);
static void bnxt_re_stop_irq(void *handle);
+static void bnxt_re_set_drv_mode(struct bnxt_re_dev *rdev, u8 mode)
+{
+ struct bnxt_qplib_chip_ctx *cctx;
+
+ cctx = rdev->chip_ctx;
+ cctx->modes.wqe_mode = bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ?
+ mode : BNXT_QPLIB_WQE_MODE_STATIC;
+}
+
static void bnxt_re_destroy_chip_ctx(struct bnxt_re_dev *rdev)
{
struct bnxt_qplib_chip_ctx *chip_ctx;
@@ -97,7 +106,7 @@ static void bnxt_re_destroy_chip_ctx(struct bnxt_re_dev *rdev)
kfree(chip_ctx);
}
-static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev)
+static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev, u8 wqe_mode)
{
struct bnxt_qplib_chip_ctx *chip_ctx;
struct bnxt_en_dev *en_dev;
@@ -117,6 +126,7 @@ static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev)
rdev->qplib_res.cctx = rdev->chip_ctx;
rdev->rcfw.res = &rdev->qplib_res;
+ bnxt_re_set_drv_mode(rdev, wqe_mode);
return 0;
}
@@ -1386,7 +1396,7 @@ static void bnxt_re_worker(struct work_struct *work)
schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000));
}
-static int bnxt_re_dev_init(struct bnxt_re_dev *rdev)
+static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode)
{
struct bnxt_qplib_creq_ctx *creq;
struct bnxt_re_ring_attr rattr;
@@ -1406,7 +1416,7 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev)
}
set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
- rc = bnxt_re_setup_chip_ctx(rdev);
+ rc = bnxt_re_setup_chip_ctx(rdev, wqe_mode);
if (rc) {
ibdev_err(&rdev->ibdev, "Failed to get chip context\n");
return -EINVAL;
@@ -1585,7 +1595,7 @@ static void bnxt_re_remove_device(struct bnxt_re_dev *rdev)
}
static int bnxt_re_add_device(struct bnxt_re_dev **rdev,
- struct net_device *netdev)
+ struct net_device *netdev, u8 wqe_mode)
{
int rc;
@@ -1599,7 +1609,7 @@ static int bnxt_re_add_device(struct bnxt_re_dev **rdev,
}
pci_dev_get((*rdev)->en_dev->pdev);
- rc = bnxt_re_dev_init(*rdev);
+ rc = bnxt_re_dev_init(*rdev, wqe_mode);
if (rc) {
pci_dev_put((*rdev)->en_dev->pdev);
bnxt_re_dev_unreg(*rdev);
@@ -1711,7 +1721,8 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier,
case NETDEV_REGISTER:
if (rdev)
break;
- rc = bnxt_re_add_device(&rdev, real_dev);
+ rc = bnxt_re_add_device(&rdev, real_dev,
+ BNXT_QPLIB_WQE_MODE_STATIC);
if (!rc)
sch_work = true;
release = false;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index c5e29577cd43..117b42349a28 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -178,11 +178,11 @@ static void bnxt_qplib_free_qp_hdr_buf(struct bnxt_qplib_res *res,
if (qp->rq_hdr_buf)
dma_free_coherent(&res->pdev->dev,
- rq->hwq.max_elements * qp->rq_hdr_buf_size,
+ rq->max_wqe * qp->rq_hdr_buf_size,
qp->rq_hdr_buf, qp->rq_hdr_buf_map);
if (qp->sq_hdr_buf)
dma_free_coherent(&res->pdev->dev,
- sq->hwq.max_elements * qp->sq_hdr_buf_size,
+ sq->max_wqe * qp->sq_hdr_buf_size,
qp->sq_hdr_buf, qp->sq_hdr_buf_map);
qp->rq_hdr_buf = NULL;
qp->sq_hdr_buf = NULL;
@@ -199,10 +199,9 @@ static int bnxt_qplib_alloc_qp_hdr_buf(struct bnxt_qplib_res *res,
struct bnxt_qplib_q *sq = &qp->sq;
int rc = 0;
- if (qp->sq_hdr_buf_size && sq->hwq.max_elements) {
+ if (qp->sq_hdr_buf_size && sq->max_wqe) {
qp->sq_hdr_buf = dma_alloc_coherent(&res->pdev->dev,
- sq->hwq.max_elements *
- qp->sq_hdr_buf_size,
+ sq->max_wqe * qp->sq_hdr_buf_size,
&qp->sq_hdr_buf_map, GFP_KERNEL);
if (!qp->sq_hdr_buf) {
rc = -ENOMEM;
@@ -212,9 +211,9 @@ static int bnxt_qplib_alloc_qp_hdr_buf(struct bnxt_qplib_res *res,
}
}
- if (qp->rq_hdr_buf_size && rq->hwq.max_elements) {
+ if (qp->rq_hdr_buf_size && rq->max_wqe) {
qp->rq_hdr_buf = dma_alloc_coherent(&res->pdev->dev,
- rq->hwq.max_elements *
+ rq->max_wqe *
qp->rq_hdr_buf_size,
&qp->rq_hdr_buf_map,
GFP_KERNEL);
@@ -661,6 +660,7 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res,
srq->dbinfo.hwq = &srq->hwq;
srq->dbinfo.xid = srq->id;
srq->dbinfo.db = srq->dpi->dbr;
+ srq->dbinfo.max_slot = 1;
srq->dbinfo.priv_db = res->dpi_tbl.dbr_bar_reg_iomem;
if (srq->threshold)
bnxt_qplib_armen_db(&srq->dbinfo, DBC_DBC_TYPE_SRQ_ARMENA);
@@ -784,6 +784,28 @@ done:
}
/* QP */
+
+static int bnxt_qplib_alloc_init_swq(struct bnxt_qplib_q *que)
+{
+ int rc = 0;
+ int indx;
+
+ que->swq = kcalloc(que->max_wqe, sizeof(*que->swq), GFP_KERNEL);
+ if (!que->swq) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ que->swq_start = 0;
+ que->swq_last = que->max_wqe - 1;
+ for (indx = 0; indx < que->max_wqe; indx++)
+ que->swq[indx].next_idx = indx + 1;
+ que->swq[que->swq_last].next_idx = 0; /* Make it circular */
+ que->swq_last = 0;
+out:
+ return rc;
+}
+
int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
{
struct bnxt_qplib_hwq_attr hwq_attr = {};
@@ -808,71 +830,63 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
/* SQ */
hwq_attr.res = res;
hwq_attr.sginfo = &sq->sg_info;
- hwq_attr.depth = sq->max_wqe;
- hwq_attr.stride = sq->wqe_size;
+ hwq_attr.stride = sizeof(struct sq_sge);
+ hwq_attr.depth = bnxt_qplib_get_depth(sq);
hwq_attr.type = HWQ_TYPE_QUEUE;
rc = bnxt_qplib_alloc_init_hwq(&sq->hwq, &hwq_attr);
if (rc)
goto exit;
- sq->swq = kcalloc(sq->hwq.max_elements, sizeof(*sq->swq), GFP_KERNEL);
- if (!sq->swq) {
- rc = -ENOMEM;
+ rc = bnxt_qplib_alloc_init_swq(sq);
+ if (rc)
goto fail_sq;
- }
+
+ req.sq_size = cpu_to_le32(bnxt_qplib_set_sq_size(sq, qp->wqe_mode));
pbl = &sq->hwq.pbl[PBL_LVL_0];
req.sq_pbl = cpu_to_le64(pbl->pg_map_arr[0]);
pg_sz_lvl = (bnxt_qplib_base_pg_size(&sq->hwq) <<
CMDQ_CREATE_QP1_SQ_PG_SIZE_SFT);
pg_sz_lvl |= (sq->hwq.level & CMDQ_CREATE_QP1_SQ_LVL_MASK);
req.sq_pg_size_sq_lvl = pg_sz_lvl;
+ req.sq_fwo_sq_sge =
+ cpu_to_le16((sq->max_sge & CMDQ_CREATE_QP1_SQ_SGE_MASK) <<
+ CMDQ_CREATE_QP1_SQ_SGE_SFT);
+ req.scq_cid = cpu_to_le32(qp->scq->id);
- if (qp->scq)
- req.scq_cid = cpu_to_le32(qp->scq->id);
/* RQ */
if (rq->max_wqe) {
hwq_attr.res = res;
hwq_attr.sginfo = &rq->sg_info;
- hwq_attr.stride = rq->wqe_size;
- hwq_attr.depth = qp->rq.max_wqe;
+ hwq_attr.stride = sizeof(struct sq_sge);
+ hwq_attr.depth = bnxt_qplib_get_depth(rq);
hwq_attr.type = HWQ_TYPE_QUEUE;
rc = bnxt_qplib_alloc_init_hwq(&rq->hwq, &hwq_attr);
if (rc)
- goto fail_sq;
-
- rq->swq = kcalloc(rq->hwq.max_elements, sizeof(*rq->swq),
- GFP_KERNEL);
- if (!rq->swq) {
- rc = -ENOMEM;
+ goto sq_swq;
+ rc = bnxt_qplib_alloc_init_swq(rq);
+ if (rc)
goto fail_rq;
- }
+ req.rq_size = cpu_to_le32(rq->max_wqe);
pbl = &rq->hwq.pbl[PBL_LVL_0];
req.rq_pbl = cpu_to_le64(pbl->pg_map_arr[0]);
pg_sz_lvl = (bnxt_qplib_base_pg_size(&rq->hwq) <<
CMDQ_CREATE_QP1_RQ_PG_SIZE_SFT);
pg_sz_lvl |= (rq->hwq.level & CMDQ_CREATE_QP1_RQ_LVL_MASK);
req.rq_pg_size_rq_lvl = pg_sz_lvl;
- if (qp->rcq)
- req.rcq_cid = cpu_to_le32(qp->rcq->id);
+ req.rq_fwo_rq_sge =
+ cpu_to_le16((rq->max_sge &
+ CMDQ_CREATE_QP1_RQ_SGE_MASK) <<
+ CMDQ_CREATE_QP1_RQ_SGE_SFT);
}
+ req.rcq_cid = cpu_to_le32(qp->rcq->id);
/* Header buffer - allow hdr_buf pass in */
rc = bnxt_qplib_alloc_qp_hdr_buf(res, qp);
if (rc) {
rc = -ENOMEM;
- goto fail;
+ goto rq_rwq;
}
qp_flags |= CMDQ_CREATE_QP1_QP_FLAGS_RESERVED_LKEY_ENABLE;
req.qp_flags = cpu_to_le32(qp_flags);
- req.sq_size = cpu_to_le32(sq->hwq.max_elements);
- req.rq_size = cpu_to_le32(rq->hwq.max_elements);
-
- req.sq_fwo_sq_sge =
- cpu_to_le16((sq->max_sge & CMDQ_CREATE_QP1_SQ_SGE_MASK) <<
- CMDQ_CREATE_QP1_SQ_SGE_SFT);
- req.rq_fwo_rq_sge =
- cpu_to_le16((rq->max_sge & CMDQ_CREATE_QP1_RQ_SGE_MASK) <<
- CMDQ_CREATE_QP1_RQ_SGE_SFT);
-
req.pd_id = cpu_to_le32(qp->pd->id);
rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
@@ -886,10 +900,12 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
sq->dbinfo.hwq = &sq->hwq;
sq->dbinfo.xid = qp->id;
sq->dbinfo.db = qp->dpi->dbr;
+ sq->dbinfo.max_slot = bnxt_qplib_set_sq_max_slot(qp->wqe_mode);
if (rq->max_wqe) {
rq->dbinfo.hwq = &rq->hwq;
rq->dbinfo.xid = qp->id;
rq->dbinfo.db = qp->dpi->dbr;
+ rq->dbinfo.max_slot = bnxt_qplib_set_rq_max_slot(rq->wqe_size);
}
rcfw->qp_tbl[qp->id].qp_id = qp->id;
rcfw->qp_tbl[qp->id].qp_handle = (void *)qp;
@@ -898,12 +914,14 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
fail:
bnxt_qplib_free_qp_hdr_buf(res, qp);
+rq_rwq:
+ kfree(rq->swq);
fail_rq:
bnxt_qplib_free_hwq(res, &rq->hwq);
- kfree(rq->swq);
+sq_swq:
+ kfree(sq->swq);
fail_sq:
bnxt_qplib_free_hwq(res, &sq->hwq);
- kfree(sq->swq);
exit:
return rc;
}
@@ -912,26 +930,18 @@ static void bnxt_qplib_init_psn_ptr(struct bnxt_qplib_qp *qp, int size)
{
struct bnxt_qplib_hwq *hwq;
struct bnxt_qplib_q *sq;
- u64 fpsne, psne, psn_pg;
- u16 indx_pad = 0, indx;
- u16 pg_num, pg_indx;
- u64 *page;
+ u64 fpsne, psn_pg;
+ u16 indx_pad = 0;
sq = &qp->sq;
hwq = &sq->hwq;
-
- fpsne = (u64)bnxt_qplib_get_qe(hwq, hwq->max_elements, &psn_pg);
+ fpsne = (u64)bnxt_qplib_get_qe(hwq, hwq->depth, &psn_pg);
if (!IS_ALIGNED(fpsne, PAGE_SIZE))
indx_pad = ALIGN(fpsne, PAGE_SIZE) / size;
- page = (u64 *)psn_pg;
- for (indx = 0; indx < hwq->max_elements; indx++) {
- pg_num = (indx + indx_pad) / (PAGE_SIZE / size);
- pg_indx = (indx + indx_pad) % (PAGE_SIZE / size);
- psne = page[pg_num] + pg_indx * size;
- sq->swq[indx].psn_ext = (struct sq_psn_search_ext *)psne;
- sq->swq[indx].psn_search = (struct sq_psn_search *)psne;
- }
+ hwq->pad_pgofft = indx_pad;
+ hwq->pad_pg = (u64 *)psn_pg;
+ hwq->pad_stride = size;
}
int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
@@ -944,12 +954,12 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
struct creq_create_qp_resp resp;
int rc, req_size, psn_sz = 0;
struct bnxt_qplib_hwq *xrrq;
- u16 cmd_flags = 0, max_ssge;
struct bnxt_qplib_pbl *pbl;
struct cmdq_create_qp req;
+ u16 cmd_flags = 0;
u32 qp_flags = 0;
u8 pg_sz_lvl;
- u16 max_rsge;
+ u16 nsge;
RCFW_CMD_PREP(req, CREATE_QP, cmd_flags);
@@ -967,97 +977,78 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
hwq_attr.res = res;
hwq_attr.sginfo = &sq->sg_info;
- hwq_attr.stride = sq->wqe_size;
- hwq_attr.depth = sq->max_wqe;
+ hwq_attr.stride = sizeof(struct sq_sge);
+ hwq_attr.depth = bnxt_qplib_get_depth(sq);
hwq_attr.aux_stride = psn_sz;
- hwq_attr.aux_depth = hwq_attr.depth;
+ hwq_attr.aux_depth = bnxt_qplib_set_sq_size(sq, qp->wqe_mode);
hwq_attr.type = HWQ_TYPE_QUEUE;
rc = bnxt_qplib_alloc_init_hwq(&sq->hwq, &hwq_attr);
if (rc)
goto exit;
- sq->swq = kcalloc(sq->hwq.max_elements, sizeof(*sq->swq), GFP_KERNEL);
- if (!sq->swq) {
- rc = -ENOMEM;
+ rc = bnxt_qplib_alloc_init_swq(sq);
+ if (rc)
goto fail_sq;
- }
if (psn_sz)
bnxt_qplib_init_psn_ptr(qp, psn_sz);
+ req.sq_size = cpu_to_le32(bnxt_qplib_set_sq_size(sq, qp->wqe_mode));
pbl = &sq->hwq.pbl[PBL_LVL_0];
req.sq_pbl = cpu_to_le64(pbl->pg_map_arr[0]);
pg_sz_lvl = (bnxt_qplib_base_pg_size(&sq->hwq) <<
CMDQ_CREATE_QP_SQ_PG_SIZE_SFT);
pg_sz_lvl |= (sq->hwq.level & CMDQ_CREATE_QP_SQ_LVL_MASK);
req.sq_pg_size_sq_lvl = pg_sz_lvl;
-
- if (qp->scq)
- req.scq_cid = cpu_to_le32(qp->scq->id);
+ req.sq_fwo_sq_sge =
+ cpu_to_le16(((sq->max_sge & CMDQ_CREATE_QP_SQ_SGE_MASK) <<
+ CMDQ_CREATE_QP_SQ_SGE_SFT) | 0);
+ req.scq_cid = cpu_to_le32(qp->scq->id);
/* RQ */
- if (rq->max_wqe) {
+ if (!qp->srq) {
hwq_attr.res = res;
hwq_attr.sginfo = &rq->sg_info;
- hwq_attr.stride = rq->wqe_size;
- hwq_attr.depth = rq->max_wqe;
+ hwq_attr.stride = sizeof(struct sq_sge);
+ hwq_attr.depth = bnxt_qplib_get_depth(rq);
hwq_attr.aux_stride = 0;
hwq_attr.aux_depth = 0;
hwq_attr.type = HWQ_TYPE_QUEUE;
rc = bnxt_qplib_alloc_init_hwq(&rq->hwq, &hwq_attr);
if (rc)
- goto fail_sq;
-
- rq->swq = kcalloc(rq->hwq.max_elements, sizeof(*rq->swq),
- GFP_KERNEL);
- if (!rq->swq) {
- rc = -ENOMEM;
+ goto sq_swq;
+ rc = bnxt_qplib_alloc_init_swq(rq);
+ if (rc)
goto fail_rq;
- }
+
+ req.rq_size = cpu_to_le32(rq->max_wqe);
pbl = &rq->hwq.pbl[PBL_LVL_0];
req.rq_pbl = cpu_to_le64(pbl->pg_map_arr[0]);
pg_sz_lvl = (bnxt_qplib_base_pg_size(&rq->hwq) <<
CMDQ_CREATE_QP_RQ_PG_SIZE_SFT);
pg_sz_lvl |= (rq->hwq.level & CMDQ_CREATE_QP_RQ_LVL_MASK);
req.rq_pg_size_rq_lvl = pg_sz_lvl;
+ nsge = (qp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ?
+ 6 : rq->max_sge;
+ req.rq_fwo_rq_sge =
+ cpu_to_le16(((nsge &
+ CMDQ_CREATE_QP_RQ_SGE_MASK) <<
+ CMDQ_CREATE_QP_RQ_SGE_SFT) | 0);
} else {
/* SRQ */
- if (qp->srq) {
- qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_SRQ_USED;
- req.srq_cid = cpu_to_le32(qp->srq->id);
- }
+ qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_SRQ_USED;
+ req.srq_cid = cpu_to_le32(qp->srq->id);
}
-
- if (qp->rcq)
- req.rcq_cid = cpu_to_le32(qp->rcq->id);
+ req.rcq_cid = cpu_to_le32(qp->rcq->id);
qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_RESERVED_LKEY_ENABLE;
qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FR_PMR_ENABLED;
if (qp->sig_type)
qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FORCE_COMPLETION;
+ if (qp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE)
+ qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_VARIABLE_SIZED_WQE_ENABLED;
req.qp_flags = cpu_to_le32(qp_flags);
- req.sq_size = cpu_to_le32(sq->hwq.max_elements);
- req.rq_size = cpu_to_le32(rq->hwq.max_elements);
- qp->sq_hdr_buf = NULL;
- qp->rq_hdr_buf = NULL;
-
- rc = bnxt_qplib_alloc_qp_hdr_buf(res, qp);
- if (rc)
- goto fail_rq;
-
- /* CTRL-22434: Irrespective of the requested SGE count on the SQ
- * always create the QP with max send sges possible if the requested
- * inline size is greater than 0.
- */
- max_ssge = qp->max_inline_data ? 6 : sq->max_sge;
- req.sq_fwo_sq_sge = cpu_to_le16(
- ((max_ssge & CMDQ_CREATE_QP_SQ_SGE_MASK)
- << CMDQ_CREATE_QP_SQ_SGE_SFT) | 0);
- max_rsge = bnxt_qplib_is_chip_gen_p5(res->cctx) ? 6 : rq->max_sge;
- req.rq_fwo_rq_sge = cpu_to_le16(
- ((max_rsge & CMDQ_CREATE_QP_RQ_SGE_MASK)
- << CMDQ_CREATE_QP_RQ_SGE_SFT) | 0);
/* ORRQ and IRRQ */
if (psn_sz) {
xrrq = &qp->orrq;
@@ -1078,7 +1069,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
hwq_attr.type = HWQ_TYPE_CTX;
rc = bnxt_qplib_alloc_init_hwq(xrrq, &hwq_attr);
if (rc)
- goto fail_buf_free;
+ goto rq_swq;
pbl = &xrrq->pbl[PBL_LVL_0];
req.orrq_addr = cpu_to_le64(pbl->pg_map_arr[0]);
@@ -1113,30 +1104,29 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
sq->dbinfo.hwq = &sq->hwq;
sq->dbinfo.xid = qp->id;
sq->dbinfo.db = qp->dpi->dbr;
+ sq->dbinfo.max_slot = bnxt_qplib_set_sq_max_slot(qp->wqe_mode);
if (rq->max_wqe) {
rq->dbinfo.hwq = &rq->hwq;
rq->dbinfo.xid = qp->id;
rq->dbinfo.db = qp->dpi->dbr;
+ rq->dbinfo.max_slot = bnxt_qplib_set_rq_max_slot(rq->wqe_size);
}
rcfw->qp_tbl[qp->id].qp_id = qp->id;
rcfw->qp_tbl[qp->id].qp_handle = (void *)qp;
return 0;
-
fail:
- if (qp->irrq.max_elements)
- bnxt_qplib_free_hwq(res, &qp->irrq);
+ bnxt_qplib_free_hwq(res, &qp->irrq);
fail_orrq:
- if (qp->orrq.max_elements)
- bnxt_qplib_free_hwq(res, &qp->orrq);
-fail_buf_free:
- bnxt_qplib_free_qp_hdr_buf(res, qp);
+ bnxt_qplib_free_hwq(res, &qp->orrq);
+rq_swq:
+ kfree(rq->swq);
fail_rq:
bnxt_qplib_free_hwq(res, &rq->hwq);
- kfree(rq->swq);
+sq_swq:
+ kfree(sq->swq);
fail_sq:
bnxt_qplib_free_hwq(res, &sq->hwq);
- kfree(sq->swq);
exit:
return rc;
}
@@ -1512,7 +1502,7 @@ void *bnxt_qplib_get_qp1_sq_buf(struct bnxt_qplib_qp *qp,
memset(sge, 0, sizeof(*sge));
if (qp->sq_hdr_buf) {
- sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq);
+ sw_prod = sq->swq_start;
sge->addr = (dma_addr_t)(qp->sq_hdr_buf_map +
sw_prod * qp->sq_hdr_buf_size);
sge->lkey = 0xFFFFFFFF;
@@ -1526,7 +1516,7 @@ u32 bnxt_qplib_get_rq_prod_index(struct bnxt_qplib_qp *qp)
{
struct bnxt_qplib_q *rq = &qp->rq;
- return HWQ_CMP(rq->hwq.prod, &rq->hwq);
+ return rq->swq_start;
}
dma_addr_t bnxt_qplib_get_qp_buf_from_index(struct bnxt_qplib_qp *qp, u32 index)
@@ -1543,7 +1533,7 @@ void *bnxt_qplib_get_qp1_rq_buf(struct bnxt_qplib_qp *qp,
memset(sge, 0, sizeof(*sge));
if (qp->rq_hdr_buf) {
- sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq);
+ sw_prod = rq->swq_start;
sge->addr = (dma_addr_t)(qp->rq_hdr_buf_map +
sw_prod * qp->rq_hdr_buf_size);
sge->lkey = 0xFFFFFFFF;
@@ -1562,6 +1552,8 @@ static void bnxt_qplib_fill_psn_search(struct bnxt_qplib_qp *qp,
u32 flg_npsn;
u32 op_spsn;
+ if (!swq->psn_search)
+ return;
psns = swq->psn_search;
psns_ext = swq->psn_ext;
@@ -1575,12 +1567,122 @@ static void bnxt_qplib_fill_psn_search(struct bnxt_qplib_qp *qp,
if (bnxt_qplib_is_chip_gen_p5(qp->cctx)) {
psns_ext->opcode_start_psn = cpu_to_le32(op_spsn);
psns_ext->flags_next_psn = cpu_to_le32(flg_npsn);
+ psns_ext->start_slot_idx = cpu_to_le16(swq->slot_idx);
} else {
psns->opcode_start_psn = cpu_to_le32(op_spsn);
psns->flags_next_psn = cpu_to_le32(flg_npsn);
}
}
+static int bnxt_qplib_put_inline(struct bnxt_qplib_qp *qp,
+ struct bnxt_qplib_swqe *wqe,
+ u16 *idx)
+{
+ struct bnxt_qplib_hwq *hwq;
+ int len, t_len, offt;
+ bool pull_dst = true;
+ void *il_dst = NULL;
+ void *il_src = NULL;
+ int t_cplen, cplen;
+ int indx;
+
+ hwq = &qp->sq.hwq;
+ t_len = 0;
+ for (indx = 0; indx < wqe->num_sge; indx++) {
+ len = wqe->sg_list[indx].size;
+ il_src = (void *)wqe->sg_list[indx].addr;
+ t_len += len;
+ if (t_len > qp->max_inline_data)
+ goto bad;
+ while (len) {
+ if (pull_dst) {
+ pull_dst = false;
+ il_dst = bnxt_qplib_get_prod_qe(hwq, *idx);
+ (*idx)++;
+ t_cplen = 0;
+ offt = 0;
+ }
+ cplen = min_t(int, len, sizeof(struct sq_sge));
+ cplen = min_t(int, cplen,
+ (sizeof(struct sq_sge) - offt));
+ memcpy(il_dst, il_src, cplen);
+ t_cplen += cplen;
+ il_src += cplen;
+ il_dst += cplen;
+ offt += cplen;
+ len -= cplen;
+ if (t_cplen == sizeof(struct sq_sge))
+ pull_dst = true;
+ }
+ }
+
+ return t_len;
+bad:
+ return -ENOMEM;
+}
+
+static u32 bnxt_qplib_put_sges(struct bnxt_qplib_hwq *hwq,
+ struct bnxt_qplib_sge *ssge,
+ u16 nsge, u16 *idx)
+{
+ struct sq_sge *dsge;
+ int indx, len = 0;
+
+ for (indx = 0; indx < nsge; indx++, (*idx)++) {
+ dsge = bnxt_qplib_get_prod_qe(hwq, *idx);
+ dsge->va_or_pa = cpu_to_le64(ssge[indx].addr);
+ dsge->l_key = cpu_to_le32(ssge[indx].lkey);
+ dsge->size = cpu_to_le32(ssge[indx].size);
+ len += ssge[indx].size;
+ }
+
+ return len;
+}
+
+static u16 bnxt_qplib_required_slots(struct bnxt_qplib_qp *qp,
+ struct bnxt_qplib_swqe *wqe,
+ u16 *wqe_sz, u16 *qdf, u8 mode)
+{
+ u32 ilsize, bytes;
+ u16 nsge;
+ u16 slot;
+
+ nsge = wqe->num_sge;
+ /* Adding sq_send_hdr is a misnomer, for rq also hdr size is same. */
+ bytes = sizeof(struct sq_send_hdr) + nsge * sizeof(struct sq_sge);
+ if (wqe->flags & BNXT_QPLIB_SWQE_FLAGS_INLINE) {
+ ilsize = bnxt_qplib_calc_ilsize(wqe, qp->max_inline_data);
+ bytes = ALIGN(ilsize, sizeof(struct sq_sge));
+ bytes += sizeof(struct sq_send_hdr);
+ }
+
+ *qdf = __xlate_qfd(qp->sq.q_full_delta, bytes);
+ slot = bytes >> 4;
+ *wqe_sz = slot;
+ if (mode == BNXT_QPLIB_WQE_MODE_STATIC)
+ slot = 8;
+ return slot;
+}
+
+static void bnxt_qplib_pull_psn_buff(struct bnxt_qplib_q *sq,
+ struct bnxt_qplib_swq *swq)
+{
+ struct bnxt_qplib_hwq *hwq;
+ u32 pg_num, pg_indx;
+ void *buff;
+ u32 tail;
+
+ hwq = &sq->hwq;
+ if (!hwq->pad_pg)
+ return;
+ tail = swq->slot_idx / sq->dbinfo.max_slot;
+ pg_num = (tail + hwq->pad_pgofft) / (PAGE_SIZE / hwq->pad_stride);
+ pg_indx = (tail + hwq->pad_pgofft) % (PAGE_SIZE / hwq->pad_stride);
+ buff = (void *)(hwq->pad_pg[pg_num] + pg_indx * hwq->pad_stride);
+ swq->psn_ext = buff;
+ swq->psn_search = buff;
+}
+
void bnxt_qplib_post_send_db(struct bnxt_qplib_qp *qp)
{
struct bnxt_qplib_q *sq = &qp->sq;
@@ -1594,88 +1696,84 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
struct bnxt_qplib_nq_work *nq_work = NULL;
int i, rc = 0, data_len = 0, pkt_num = 0;
struct bnxt_qplib_q *sq = &qp->sq;
- struct sq_send *hw_sq_send_hdr;
+ struct bnxt_qplib_hwq *hwq;
struct bnxt_qplib_swq *swq;
bool sch_handler = false;
- struct sq_sge *hw_sge;
- u8 wqe_size16;
+ u16 wqe_sz, qdf = 0;
+ void *base_hdr;
+ void *ext_hdr;
__le32 temp32;
- u32 sw_prod;
+ u32 wqe_idx;
+ u32 slots;
+ u16 idx;
- if (qp->state != CMDQ_MODIFY_QP_NEW_STATE_RTS) {
- if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) {
- sch_handler = true;
- dev_dbg(&sq->hwq.pdev->dev,
- "%s Error QP. Scheduling for poll_cq\n",
- __func__);
- goto queue_err;
- }
+ hwq = &sq->hwq;
+ if (qp->state != CMDQ_MODIFY_QP_NEW_STATE_RTS &&
+ qp->state != CMDQ_MODIFY_QP_NEW_STATE_ERR) {
+ dev_err(&hwq->pdev->dev,
+ "QPLIB: FP: QP (0x%x) is in the 0x%x state",
+ qp->id, qp->state);
+ rc = -EINVAL;
+ goto done;
}
- if (bnxt_qplib_queue_full(sq)) {
- dev_err(&sq->hwq.pdev->dev,
+ slots = bnxt_qplib_required_slots(qp, wqe, &wqe_sz, &qdf, qp->wqe_mode);
+ if (bnxt_qplib_queue_full(sq, slots + qdf)) {
+ dev_err(&hwq->pdev->dev,
"prod = %#x cons = %#x qdepth = %#x delta = %#x\n",
- sq->hwq.prod, sq->hwq.cons, sq->hwq.max_elements,
- sq->q_full_delta);
+ hwq->prod, hwq->cons, hwq->depth, sq->q_full_delta);
rc = -ENOMEM;
goto done;
}
- sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq);
- swq = &sq->swq[sw_prod];
+
+ swq = bnxt_qplib_get_swqe(sq, &wqe_idx);
+ bnxt_qplib_pull_psn_buff(sq, swq);
+
+ idx = 0;
+ swq->slot_idx = hwq->prod;
+ swq->slots = slots;
swq->wr_id = wqe->wr_id;
swq->type = wqe->type;
swq->flags = wqe->flags;
+ swq->start_psn = sq->psn & BTH_PSN_MASK;
if (qp->sig_type)
swq->flags |= SQ_SEND_FLAGS_SIGNAL_COMP;
- swq->start_psn = sq->psn & BTH_PSN_MASK;
-
- hw_sq_send_hdr = bnxt_qplib_get_qe(&sq->hwq, sw_prod, NULL);
- memset(hw_sq_send_hdr, 0, sq->wqe_size);
- if (wqe->flags & BNXT_QPLIB_SWQE_FLAGS_INLINE) {
- /* Copy the inline data */
- if (wqe->inline_len > BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH) {
- dev_warn(&sq->hwq.pdev->dev,
- "Inline data length > 96 detected\n");
- data_len = BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH;
- } else {
- data_len = wqe->inline_len;
- }
- memcpy(hw_sq_send_hdr->data, wqe->inline_data, data_len);
- wqe_size16 = (data_len + 15) >> 4;
- } else {
- for (i = 0, hw_sge = (struct sq_sge *)hw_sq_send_hdr->data;
- i < wqe->num_sge; i++, hw_sge++) {
- hw_sge->va_or_pa = cpu_to_le64(wqe->sg_list[i].addr);
- hw_sge->l_key = cpu_to_le32(wqe->sg_list[i].lkey);
- hw_sge->size = cpu_to_le32(wqe->sg_list[i].size);
- data_len += wqe->sg_list[i].size;
- }
- /* Each SGE entry = 1 WQE size16 */
- wqe_size16 = wqe->num_sge;
- /* HW requires wqe size has room for atleast one SGE even if
- * none was supplied by ULP
- */
- if (!wqe->num_sge)
- wqe_size16++;
+ if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) {
+ sch_handler = true;
+ dev_dbg(&hwq->pdev->dev,
+ "%s Error QP. Scheduling for poll_cq\n", __func__);
+ goto queue_err;
}
+ base_hdr = bnxt_qplib_get_prod_qe(hwq, idx++);
+ ext_hdr = bnxt_qplib_get_prod_qe(hwq, idx++);
+ memset(base_hdr, 0, sizeof(struct sq_sge));
+ memset(ext_hdr, 0, sizeof(struct sq_sge));
+
+ if (wqe->flags & BNXT_QPLIB_SWQE_FLAGS_INLINE)
+ /* Copy the inline data */
+ data_len = bnxt_qplib_put_inline(qp, wqe, &idx);
+ else
+ data_len = bnxt_qplib_put_sges(hwq, wqe->sg_list, wqe->num_sge,
+ &idx);
+ if (data_len < 0)
+ goto queue_err;
/* Specifics */
switch (wqe->type) {
case BNXT_QPLIB_SWQE_TYPE_SEND:
if (qp->type == CMDQ_CREATE_QP1_TYPE_GSI) {
+ struct sq_send_raweth_qp1_hdr *sqe = base_hdr;
+ struct sq_raw_ext_hdr *ext_sqe = ext_hdr;
/* Assemble info for Raw Ethertype QPs */
- struct sq_send_raweth_qp1 *sqe =
- (struct sq_send_raweth_qp1 *)hw_sq_send_hdr;
sqe->wqe_type = wqe->type;
sqe->flags = wqe->flags;
- sqe->wqe_size = wqe_size16 +
- ((offsetof(typeof(*sqe), data) + 15) >> 4);
+ sqe->wqe_size = wqe_sz;
sqe->cfa_action = cpu_to_le16(wqe->rawqp1.cfa_action);
sqe->lflags = cpu_to_le16(wqe->rawqp1.lflags);
sqe->length = cpu_to_le32(data_len);
- sqe->cfa_meta = cpu_to_le32((wqe->rawqp1.cfa_meta &
+ ext_sqe->cfa_meta = cpu_to_le32((wqe->rawqp1.cfa_meta &
SQ_SEND_RAWETH_QP1_CFA_META_VLAN_VID_MASK) <<
SQ_SEND_RAWETH_QP1_CFA_META_VLAN_VID_SFT);
@@ -1685,27 +1783,24 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
case BNXT_QPLIB_SWQE_TYPE_SEND_WITH_IMM:
case BNXT_QPLIB_SWQE_TYPE_SEND_WITH_INV:
{
- struct sq_send *sqe = (struct sq_send *)hw_sq_send_hdr;
+ struct sq_ud_ext_hdr *ext_sqe = ext_hdr;
+ struct sq_send_hdr *sqe = base_hdr;
sqe->wqe_type = wqe->type;
sqe->flags = wqe->flags;
- sqe->wqe_size = wqe_size16 +
- ((offsetof(typeof(*sqe), data) + 15) >> 4);
- sqe->inv_key_or_imm_data = cpu_to_le32(
- wqe->send.inv_key);
+ sqe->wqe_size = wqe_sz;
+ sqe->inv_key_or_imm_data = cpu_to_le32(wqe->send.inv_key);
if (qp->type == CMDQ_CREATE_QP_TYPE_UD ||
qp->type == CMDQ_CREATE_QP_TYPE_GSI) {
sqe->q_key = cpu_to_le32(wqe->send.q_key);
- sqe->dst_qp = cpu_to_le32(
- wqe->send.dst_qp & SQ_SEND_DST_QP_MASK);
sqe->length = cpu_to_le32(data_len);
- sqe->avid = cpu_to_le32(wqe->send.avid &
- SQ_SEND_AVID_MASK);
sq->psn = (sq->psn + 1) & BTH_PSN_MASK;
+ ext_sqe->dst_qp = cpu_to_le32(wqe->send.dst_qp &
+ SQ_SEND_DST_QP_MASK);
+ ext_sqe->avid = cpu_to_le32(wqe->send.avid &
+ SQ_SEND_AVID_MASK);
} else {
sqe->length = cpu_to_le32(data_len);
- sqe->dst_qp = 0;
- sqe->avid = 0;
if (qp->mtu)
pkt_num = (data_len + qp->mtu - 1) / qp->mtu;
if (!pkt_num)
@@ -1718,16 +1813,16 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
case BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE_WITH_IMM:
case BNXT_QPLIB_SWQE_TYPE_RDMA_READ:
{
- struct sq_rdma *sqe = (struct sq_rdma *)hw_sq_send_hdr;
+ struct sq_rdma_ext_hdr *ext_sqe = ext_hdr;
+ struct sq_rdma_hdr *sqe = base_hdr;
sqe->wqe_type = wqe->type;
sqe->flags = wqe->flags;
- sqe->wqe_size = wqe_size16 +
- ((offsetof(typeof(*sqe), data) + 15) >> 4);
+ sqe->wqe_size = wqe_sz;
sqe->imm_data = cpu_to_le32(wqe->rdma.inv_key);
sqe->length = cpu_to_le32((u32)data_len);
- sqe->remote_va = cpu_to_le64(wqe->rdma.remote_va);
- sqe->remote_key = cpu_to_le32(wqe->rdma.r_key);
+ ext_sqe->remote_va = cpu_to_le64(wqe->rdma.remote_va);
+ ext_sqe->remote_key = cpu_to_le32(wqe->rdma.r_key);
if (qp->mtu)
pkt_num = (data_len + qp->mtu - 1) / qp->mtu;
if (!pkt_num)
@@ -1738,14 +1833,15 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
case BNXT_QPLIB_SWQE_TYPE_ATOMIC_CMP_AND_SWP:
case BNXT_QPLIB_SWQE_TYPE_ATOMIC_FETCH_AND_ADD:
{
- struct sq_atomic *sqe = (struct sq_atomic *)hw_sq_send_hdr;
+ struct sq_atomic_ext_hdr *ext_sqe = ext_hdr;
+ struct sq_atomic_hdr *sqe = base_hdr;
sqe->wqe_type = wqe->type;
sqe->flags = wqe->flags;
sqe->remote_key = cpu_to_le32(wqe->atomic.r_key);
sqe->remote_va = cpu_to_le64(wqe->atomic.remote_va);
- sqe->swap_data = cpu_to_le64(wqe->atomic.swap_data);
- sqe->cmp_data = cpu_to_le64(wqe->atomic.cmp_data);
+ ext_sqe->swap_data = cpu_to_le64(wqe->atomic.swap_data);
+ ext_sqe->cmp_data = cpu_to_le64(wqe->atomic.cmp_data);
if (qp->mtu)
pkt_num = (data_len + qp->mtu - 1) / qp->mtu;
if (!pkt_num)
@@ -1755,8 +1851,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
}
case BNXT_QPLIB_SWQE_TYPE_LOCAL_INV:
{
- struct sq_localinvalidate *sqe =
- (struct sq_localinvalidate *)hw_sq_send_hdr;
+ struct sq_localinvalidate *sqe = base_hdr;
sqe->wqe_type = wqe->type;
sqe->flags = wqe->flags;
@@ -1766,7 +1861,8 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
}
case BNXT_QPLIB_SWQE_TYPE_FAST_REG_MR:
{
- struct sq_fr_pmr *sqe = (struct sq_fr_pmr *)hw_sq_send_hdr;
+ struct sq_fr_pmr_ext_hdr *ext_sqe = ext_hdr;
+ struct sq_fr_pmr_hdr *sqe = base_hdr;
sqe->wqe_type = wqe->type;
sqe->flags = wqe->flags;
@@ -1790,14 +1886,15 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
wqe->frmr.pbl_ptr[i] = cpu_to_le64(
wqe->frmr.page_list[i] |
PTU_PTE_VALID);
- sqe->pblptr = cpu_to_le64(wqe->frmr.pbl_dma_ptr);
- sqe->va = cpu_to_le64(wqe->frmr.va);
+ ext_sqe->pblptr = cpu_to_le64(wqe->frmr.pbl_dma_ptr);
+ ext_sqe->va = cpu_to_le64(wqe->frmr.va);
break;
}
case BNXT_QPLIB_SWQE_TYPE_BIND_MW:
{
- struct sq_bind *sqe = (struct sq_bind *)hw_sq_send_hdr;
+ struct sq_bind_ext_hdr *ext_sqe = ext_hdr;
+ struct sq_bind_hdr *sqe = base_hdr;
sqe->wqe_type = wqe->type;
sqe->flags = wqe->flags;
@@ -1806,9 +1903,8 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
(wqe->bind.zero_based ? SQ_BIND_ZERO_BASED : 0);
sqe->parent_l_key = cpu_to_le32(wqe->bind.parent_l_key);
sqe->l_key = cpu_to_le32(wqe->bind.r_key);
- sqe->va = cpu_to_le64(wqe->bind.va);
- temp32 = cpu_to_le32(wqe->bind.length);
- memcpy(&sqe->length, &temp32, sizeof(wqe->bind.length));
+ ext_sqe->va = cpu_to_le64(wqe->bind.va);
+ ext_sqe->length_lo = cpu_to_le32(wqe->bind.length);
break;
}
default:
@@ -1817,23 +1913,11 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
goto done;
}
swq->next_psn = sq->psn & BTH_PSN_MASK;
- if (qp->type == CMDQ_CREATE_QP_TYPE_RC)
- bnxt_qplib_fill_psn_search(qp, wqe, swq);
+ bnxt_qplib_fill_psn_search(qp, wqe, swq);
queue_err:
- if (sch_handler) {
- /* Store the ULP info in the software structures */
- sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq);
- swq = &sq->swq[sw_prod];
- swq->wr_id = wqe->wr_id;
- swq->type = wqe->type;
- swq->flags = wqe->flags;
- if (qp->sig_type)
- swq->flags |= SQ_SEND_FLAGS_SIGNAL_COMP;
- swq->start_psn = sq->psn & BTH_PSN_MASK;
- }
- sq->hwq.prod++;
+ bnxt_qplib_swq_mod_start(sq, wqe_idx);
+ bnxt_qplib_hwq_incr_prod(hwq, swq->slots);
qp->wqe_cnt++;
-
done:
if (sch_handler) {
nq_work = kzalloc(sizeof(*nq_work), GFP_ATOMIC);
@@ -1843,7 +1927,7 @@ done:
INIT_WORK(&nq_work->work, bnxt_qpn_cqn_sched_task);
queue_work(qp->scq->nq->cqn_wq, &nq_work->work);
} else {
- dev_err(&sq->hwq.pdev->dev,
+ dev_err(&hwq->pdev->dev,
"FP: Failed to allocate SQ nq_work!\n");
rc = -ENOMEM;
}
@@ -1863,58 +1947,65 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp,
{
struct bnxt_qplib_nq_work *nq_work = NULL;
struct bnxt_qplib_q *rq = &qp->rq;
+ struct rq_wqe_hdr *base_hdr;
+ struct rq_ext_hdr *ext_hdr;
+ struct bnxt_qplib_hwq *hwq;
+ struct bnxt_qplib_swq *swq;
bool sch_handler = false;
- struct sq_sge *hw_sge;
- struct rq_wqe *rqe;
- int i, rc = 0;
- u32 sw_prod;
+ u16 wqe_sz, idx;
+ u32 wqe_idx;
+ int rc = 0;
- if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) {
- sch_handler = true;
- dev_dbg(&rq->hwq.pdev->dev,
- "%s: Error QP. Scheduling for poll_cq\n", __func__);
- goto queue_err;
+ hwq = &rq->hwq;
+ if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_RESET) {
+ dev_err(&hwq->pdev->dev,
+ "QPLIB: FP: QP (0x%x) is in the 0x%x state",
+ qp->id, qp->state);
+ rc = -EINVAL;
+ goto done;
}
- if (bnxt_qplib_queue_full(rq)) {
- dev_err(&rq->hwq.pdev->dev,
+
+ if (bnxt_qplib_queue_full(rq, rq->dbinfo.max_slot)) {
+ dev_err(&hwq->pdev->dev,
"FP: QP (0x%x) RQ is full!\n", qp->id);
rc = -EINVAL;
goto done;
}
- sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq);
- rq->swq[sw_prod].wr_id = wqe->wr_id;
- rqe = bnxt_qplib_get_qe(&rq->hwq, sw_prod, NULL);
- memset(rqe, 0, rq->wqe_size);
+ swq = bnxt_qplib_get_swqe(rq, &wqe_idx);
+ swq->wr_id = wqe->wr_id;
+ swq->slots = rq->dbinfo.max_slot;
- /* Calculate wqe_size16 and data_len */
- for (i = 0, hw_sge = (struct sq_sge *)rqe->data;
- i < wqe->num_sge; i++, hw_sge++) {
- hw_sge->va_or_pa = cpu_to_le64(wqe->sg_list[i].addr);
- hw_sge->l_key = cpu_to_le32(wqe->sg_list[i].lkey);
- hw_sge->size = cpu_to_le32(wqe->sg_list[i].size);
+ if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) {
+ sch_handler = true;
+ dev_dbg(&hwq->pdev->dev,
+ "%s: Error QP. Scheduling for poll_cq\n", __func__);
+ goto queue_err;
}
- rqe->wqe_type = wqe->type;
- rqe->flags = wqe->flags;
- rqe->wqe_size = wqe->num_sge +
- ((offsetof(typeof(*rqe), data) + 15) >> 4);
- /* HW requires wqe size has room for atleast one SGE even if none
- * was supplied by ULP
- */
- if (!wqe->num_sge)
- rqe->wqe_size++;
-
- /* Supply the rqe->wr_id index to the wr_id_tbl for now */
- rqe->wr_id[0] = cpu_to_le32(sw_prod);
+ idx = 0;
+ base_hdr = bnxt_qplib_get_prod_qe(hwq, idx++);
+ ext_hdr = bnxt_qplib_get_prod_qe(hwq, idx++);
+ memset(base_hdr, 0, sizeof(struct sq_sge));
+ memset(ext_hdr, 0, sizeof(struct sq_sge));
+ wqe_sz = (sizeof(struct rq_wqe_hdr) +
+ wqe->num_sge * sizeof(struct sq_sge)) >> 4;
+ bnxt_qplib_put_sges(hwq, wqe->sg_list, wqe->num_sge, &idx);
+ if (!wqe->num_sge) {
+ struct sq_sge *sge;
+
+ sge = bnxt_qplib_get_prod_qe(hwq, idx++);
+ sge->size = 0;
+ wqe_sz++;
+ }
+ base_hdr->wqe_type = wqe->type;
+ base_hdr->flags = wqe->flags;
+ base_hdr->wqe_size = wqe_sz;
+ base_hdr->wr_id[0] = cpu_to_le32(wqe_idx);
queue_err:
- if (sch_handler) {
- /* Store the ULP info in the software structures */
- sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq);
- rq->swq[sw_prod].wr_id = wqe->wr_id;
- }
-
- rq->hwq.prod++;
+ bnxt_qplib_swq_mod_start(rq, wqe_idx);
+ bnxt_qplib_hwq_incr_prod(hwq, swq->slots);
+done:
if (sch_handler) {
nq_work = kzalloc(sizeof(*nq_work), GFP_ATOMIC);
if (nq_work) {
@@ -1923,12 +2014,12 @@ queue_err:
INIT_WORK(&nq_work->work, bnxt_qpn_cqn_sched_task);
queue_work(qp->rcq->nq->cqn_wq, &nq_work->work);
} else {
- dev_err(&rq->hwq.pdev->dev,
+ dev_err(&hwq->pdev->dev,
"FP: Failed to allocate RQ nq_work!\n");
rc = -ENOMEM;
}
}
-done:
+
return rc;
}
@@ -2026,20 +2117,19 @@ int bnxt_qplib_destroy_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
static int __flush_sq(struct bnxt_qplib_q *sq, struct bnxt_qplib_qp *qp,
struct bnxt_qplib_cqe **pcqe, int *budget)
{
- u32 sw_prod, sw_cons;
struct bnxt_qplib_cqe *cqe;
+ u32 start, last;
int rc = 0;
/* Now complete all outstanding SQEs with FLUSHED_ERR */
- sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq);
+ start = sq->swq_start;
cqe = *pcqe;
while (*budget) {
- sw_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq);
- if (sw_cons == sw_prod) {
+ last = sq->swq_last;
+ if (start == last)
break;
- }
/* Skip the FENCE WQE completions */
- if (sq->swq[sw_cons].wr_id == BNXT_QPLIB_FENCE_WRID) {
+ if (sq->swq[last].wr_id == BNXT_QPLIB_FENCE_WRID) {
bnxt_qplib_cancel_phantom_processing(qp);
goto skip_compl;
}
@@ -2047,16 +2137,17 @@ static int __flush_sq(struct bnxt_qplib_q *sq, struct bnxt_qplib_qp *qp,
cqe->status = CQ_REQ_STATUS_WORK_REQUEST_FLUSHED_ERR;
cqe->opcode = CQ_BASE_CQE_TYPE_REQ;
cqe->qp_handle = (u64)(unsigned long)qp;
- cqe->wr_id = sq->swq[sw_cons].wr_id;
+ cqe->wr_id = sq->swq[last].wr_id;
cqe->src_qp = qp->id;
- cqe->type = sq->swq[sw_cons].type;
+ cqe->type = sq->swq[last].type;
cqe++;
(*budget)--;
skip_compl:
- sq->hwq.cons++;
+ bnxt_qplib_hwq_incr_cons(&sq->hwq, sq->swq[last].slots);
+ sq->swq_last = sq->swq[last].next_idx;
}
*pcqe = cqe;
- if (!(*budget) && HWQ_CMP(sq->hwq.cons, &sq->hwq) != sw_prod)
+ if (!(*budget) && sq->swq_last != start)
/* Out of budget */
rc = -EAGAIN;
@@ -2067,9 +2158,9 @@ static int __flush_rq(struct bnxt_qplib_q *rq, struct bnxt_qplib_qp *qp,
struct bnxt_qplib_cqe **pcqe, int *budget)
{
struct bnxt_qplib_cqe *cqe;
- u32 sw_prod, sw_cons;
- int rc = 0;
+ u32 start, last;
int opcode = 0;
+ int rc = 0;
switch (qp->type) {
case CMDQ_CREATE_QP1_TYPE_GSI:
@@ -2085,24 +2176,25 @@ static int __flush_rq(struct bnxt_qplib_q *rq, struct bnxt_qplib_qp *qp,
}
/* Flush the rest of the RQ */
- sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq);
+ start = rq->swq_start;
cqe = *pcqe;
while (*budget) {
- sw_cons = HWQ_CMP(rq->hwq.cons, &rq->hwq);
- if (sw_cons == sw_prod)
+ last = rq->swq_last;
+ if (last == start)
break;
memset(cqe, 0, sizeof(*cqe));
cqe->status =
CQ_RES_RC_STATUS_WORK_REQUEST_FLUSHED_ERR;
cqe->opcode = opcode;
cqe->qp_handle = (unsigned long)qp;
- cqe->wr_id = rq->swq[sw_cons].wr_id;
+ cqe->wr_id = rq->swq[last].wr_id;
cqe++;
(*budget)--;
- rq->hwq.cons++;
+ bnxt_qplib_hwq_incr_cons(&rq->hwq, rq->swq[last].slots);
+ rq->swq_last = rq->swq[last].next_idx;
}
*pcqe = cqe;
- if (!*budget && HWQ_CMP(rq->hwq.cons, &rq->hwq) != sw_prod)
+ if (!*budget && rq->swq_last != start)
/* Out of budget */
rc = -EAGAIN;
@@ -2125,7 +2217,7 @@ void bnxt_qplib_mark_qp_error(void *qp_handle)
* CQE is track from sw_cq_cons to max_element but valid only if VALID=1
*/
static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
- u32 cq_cons, u32 sw_sq_cons, u32 cqe_sq_cons)
+ u32 cq_cons, u32 swq_last, u32 cqe_sq_cons)
{
u32 peek_sw_cq_cons, peek_raw_cq_cons, peek_sq_cons_idx;
struct bnxt_qplib_q *sq = &qp->sq;
@@ -2138,7 +2230,7 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
/* Normal mode */
/* Check for the psn_search marking before completing */
- swq = &sq->swq[sw_sq_cons];
+ swq = &sq->swq[swq_last];
if (swq->psn_search &&
le32_to_cpu(swq->psn_search->flags_next_psn) & 0x80000000) {
/* Unmark */
@@ -2147,7 +2239,7 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
& ~0x80000000);
dev_dbg(&cq->hwq.pdev->dev,
"FP: Process Req cq_cons=0x%x qp=0x%x sq cons sw=0x%x cqe=0x%x marked!\n",
- cq_cons, qp->id, sw_sq_cons, cqe_sq_cons);
+ cq_cons, qp->id, swq_last, cqe_sq_cons);
sq->condition = true;
sq->send_phantom = true;
@@ -2184,9 +2276,10 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
le64_to_cpu
(peek_req_hwcqe->qp_handle));
peek_sq = &peek_qp->sq;
- peek_sq_cons_idx = HWQ_CMP(le16_to_cpu(
- peek_req_hwcqe->sq_cons_idx) - 1
- , &sq->hwq);
+ peek_sq_cons_idx =
+ ((le16_to_cpu(
+ peek_req_hwcqe->sq_cons_idx)
+ - 1) % sq->max_wqe);
/* If the hwcqe's sq's wr_id matches */
if (peek_sq == sq &&
sq->swq[peek_sq_cons_idx].wr_id ==
@@ -2214,7 +2307,7 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
}
dev_err(&cq->hwq.pdev->dev,
"Should not have come here! cq_cons=0x%x qp=0x%x sq cons sw=0x%x hw=0x%x\n",
- cq_cons, qp->id, sw_sq_cons, cqe_sq_cons);
+ cq_cons, qp->id, swq_last, cqe_sq_cons);
rc = -EINVAL;
}
out:
@@ -2226,11 +2319,11 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
struct bnxt_qplib_cqe **pcqe, int *budget,
u32 cq_cons, struct bnxt_qplib_qp **lib_qp)
{
- u32 sw_sq_cons, cqe_sq_cons;
struct bnxt_qplib_swq *swq;
struct bnxt_qplib_cqe *cqe;
struct bnxt_qplib_qp *qp;
struct bnxt_qplib_q *sq;
+ u32 cqe_sq_cons;
int rc = 0;
qp = (struct bnxt_qplib_qp *)((unsigned long)
@@ -2242,14 +2335,7 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
}
sq = &qp->sq;
- cqe_sq_cons = HWQ_CMP(le16_to_cpu(hwcqe->sq_cons_idx), &sq->hwq);
- if (cqe_sq_cons > sq->hwq.max_elements) {
- dev_err(&cq->hwq.pdev->dev,
- "FP: CQ Process req reported sq_cons_idx 0x%x which exceeded max 0x%x\n",
- cqe_sq_cons, sq->hwq.max_elements);
- return -EINVAL;
- }
-
+ cqe_sq_cons = le16_to_cpu(hwcqe->sq_cons_idx) % sq->max_wqe;
if (qp->sq.flushed) {
dev_dbg(&cq->hwq.pdev->dev,
"%s: QP in Flush QP = %p\n", __func__, qp);
@@ -2261,12 +2347,11 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
*/
cqe = *pcqe;
while (*budget) {
- sw_sq_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq);
- if (sw_sq_cons == cqe_sq_cons)
+ if (sq->swq_last == cqe_sq_cons)
/* Done */
break;
- swq = &sq->swq[sw_sq_cons];
+ swq = &sq->swq[sq->swq_last];
memset(cqe, 0, sizeof(*cqe));
cqe->opcode = CQ_BASE_CQE_TYPE_REQ;
cqe->qp_handle = (u64)(unsigned long)qp;
@@ -2280,12 +2365,12 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
* of the request being signaled or not, it must complete with
* the hwcqe error status
*/
- if (HWQ_CMP((sw_sq_cons + 1), &sq->hwq) == cqe_sq_cons &&
+ if (swq->next_idx == cqe_sq_cons &&
hwcqe->status != CQ_REQ_STATUS_OK) {
cqe->status = hwcqe->status;
dev_err(&cq->hwq.pdev->dev,
"FP: CQ Processed Req wr_id[%d] = 0x%llx with status 0x%x\n",
- sw_sq_cons, cqe->wr_id, cqe->status);
+ sq->swq_last, cqe->wr_id, cqe->status);
cqe++;
(*budget)--;
bnxt_qplib_mark_qp_error(qp);
@@ -2293,7 +2378,7 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
bnxt_qplib_add_flush_qp(qp);
} else {
/* Before we complete, do WA 9060 */
- if (do_wa9060(qp, cq, cq_cons, sw_sq_cons,
+ if (do_wa9060(qp, cq, cq_cons, sq->swq_last,
cqe_sq_cons)) {
*lib_qp = qp;
goto out;
@@ -2305,13 +2390,14 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
}
}
skip:
- sq->hwq.cons++;
+ bnxt_qplib_hwq_incr_cons(&sq->hwq, swq->slots);
+ sq->swq_last = swq->next_idx;
if (sq->single)
break;
}
out:
*pcqe = cqe;
- if (HWQ_CMP(sq->hwq.cons, &sq->hwq) != cqe_sq_cons) {
+ if (sq->swq_last != cqe_sq_cons) {
/* Out of budget */
rc = -EAGAIN;
goto done;
@@ -2386,17 +2472,23 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
(*budget)--;
*pcqe = cqe;
} else {
+ struct bnxt_qplib_swq *swq;
+
rq = &qp->rq;
- if (wr_id_idx >= rq->hwq.max_elements) {
+ if (wr_id_idx > (rq->max_wqe - 1)) {
dev_err(&cq->hwq.pdev->dev,
"FP: CQ Process RC wr_id idx 0x%x exceeded RQ max 0x%x\n",
- wr_id_idx, rq->hwq.max_elements);
+ wr_id_idx, rq->max_wqe);
return -EINVAL;
}
- cqe->wr_id = rq->swq[wr_id_idx].wr_id;
+ if (wr_id_idx != rq->swq_last)
+ return -EINVAL;
+ swq = &rq->swq[rq->swq_last];
+ cqe->wr_id = swq->wr_id;
cqe++;
(*budget)--;
- rq->hwq.cons++;
+ bnxt_qplib_hwq_incr_cons(&rq->hwq, swq->slots);
+ rq->swq_last = swq->next_idx;
*pcqe = cqe;
if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
@@ -2467,18 +2559,24 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
(*budget)--;
*pcqe = cqe;
} else {
+ struct bnxt_qplib_swq *swq;
+
rq = &qp->rq;
- if (wr_id_idx >= rq->hwq.max_elements) {
+ if (wr_id_idx > (rq->max_wqe - 1)) {
dev_err(&cq->hwq.pdev->dev,
"FP: CQ Process UD wr_id idx 0x%x exceeded RQ max 0x%x\n",
- wr_id_idx, rq->hwq.max_elements);
+ wr_id_idx, rq->max_wqe);
return -EINVAL;
}
- cqe->wr_id = rq->swq[wr_id_idx].wr_id;
+ if (rq->swq_last != wr_id_idx)
+ return -EINVAL;
+ swq = &rq->swq[rq->swq_last];
+ cqe->wr_id = swq->wr_id;
cqe++;
(*budget)--;
- rq->hwq.cons++;
+ bnxt_qplib_hwq_incr_cons(&rq->hwq, swq->slots);
+ rq->swq_last = swq->next_idx;
*pcqe = cqe;
if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
@@ -2569,17 +2667,23 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
(*budget)--;
*pcqe = cqe;
} else {
+ struct bnxt_qplib_swq *swq;
+
rq = &qp->rq;
- if (wr_id_idx >= rq->hwq.max_elements) {
+ if (wr_id_idx > (rq->max_wqe - 1)) {
dev_err(&cq->hwq.pdev->dev,
"FP: CQ Process Raw/QP1 RQ wr_id idx 0x%x exceeded RQ max 0x%x\n",
- wr_id_idx, rq->hwq.max_elements);
+ wr_id_idx, rq->max_wqe);
return -EINVAL;
}
- cqe->wr_id = rq->swq[wr_id_idx].wr_id;
+ if (rq->swq_last != wr_id_idx)
+ return -EINVAL;
+ swq = &rq->swq[rq->swq_last];
+ cqe->wr_id = swq->wr_id;
cqe++;
(*budget)--;
- rq->hwq.cons++;
+ bnxt_qplib_hwq_incr_cons(&rq->hwq, swq->slots);
+ rq->swq_last = swq->next_idx;
*pcqe = cqe;
if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
@@ -2601,7 +2705,7 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq,
struct bnxt_qplib_qp *qp;
struct bnxt_qplib_q *sq, *rq;
struct bnxt_qplib_cqe *cqe;
- u32 sw_cons = 0, cqe_cons;
+ u32 swq_last = 0, cqe_cons;
int rc = 0;
/* Check the Status */
@@ -2627,13 +2731,7 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq,
cqe_cons = le16_to_cpu(hwcqe->sq_cons_idx);
if (cqe_cons == 0xFFFF)
goto do_rq;
-
- if (cqe_cons > sq->hwq.max_elements) {
- dev_err(&cq->hwq.pdev->dev,
- "FP: CQ Process terminal reported sq_cons_idx 0x%x which exceeded max 0x%x\n",
- cqe_cons, sq->hwq.max_elements);
- goto do_rq;
- }
+ cqe_cons %= sq->max_wqe;
if (qp->sq.flushed) {
dev_dbg(&cq->hwq.pdev->dev,
@@ -2647,24 +2745,25 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq,
*/
cqe = *pcqe;
while (*budget) {
- sw_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq);
- if (sw_cons == cqe_cons)
+ swq_last = sq->swq_last;
+ if (swq_last == cqe_cons)
break;
- if (sq->swq[sw_cons].flags & SQ_SEND_FLAGS_SIGNAL_COMP) {
+ if (sq->swq[swq_last].flags & SQ_SEND_FLAGS_SIGNAL_COMP) {
memset(cqe, 0, sizeof(*cqe));
cqe->status = CQ_REQ_STATUS_OK;
cqe->opcode = CQ_BASE_CQE_TYPE_REQ;
cqe->qp_handle = (u64)(unsigned long)qp;
cqe->src_qp = qp->id;
- cqe->wr_id = sq->swq[sw_cons].wr_id;
- cqe->type = sq->swq[sw_cons].type;
+ cqe->wr_id = sq->swq[swq_last].wr_id;
+ cqe->type = sq->swq[swq_last].type;
cqe++;
(*budget)--;
}
- sq->hwq.cons++;
+ bnxt_qplib_hwq_incr_cons(&sq->hwq, sq->swq[swq_last].slots);
+ sq->swq_last = sq->swq[swq_last].next_idx;
}
*pcqe = cqe;
- if (!(*budget) && sw_cons != cqe_cons) {
+ if (!(*budget) && swq_last != cqe_cons) {
/* Out of budget */
rc = -EAGAIN;
goto sq_done;
@@ -2676,10 +2775,10 @@ do_rq:
cqe_cons = le16_to_cpu(hwcqe->rq_cons_idx);
if (cqe_cons == 0xFFFF) {
goto done;
- } else if (cqe_cons > rq->hwq.max_elements) {
+ } else if (cqe_cons > rq->max_wqe - 1) {
dev_err(&cq->hwq.pdev->dev,
"FP: CQ Processed terminal reported rq_cons_idx 0x%x exceeds max 0x%x\n",
- cqe_cons, rq->hwq.max_elements);
+ cqe_cons, rq->max_wqe);
goto done;
}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
index 568ca390322c..f50784405e27 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
@@ -39,6 +39,51 @@
#ifndef __BNXT_QPLIB_FP_H__
#define __BNXT_QPLIB_FP_H__
+/* Few helper structures temporarily defined here
+ * should get rid of these when roce_hsi.h is updated
+ * in original code base
+ */
+struct sq_ud_ext_hdr {
+ __le32 dst_qp;
+ __le32 avid;
+ __le64 rsvd;
+};
+
+struct sq_raw_ext_hdr {
+ __le32 cfa_meta;
+ __le32 rsvd0;
+ __le64 rsvd1;
+};
+
+struct sq_rdma_ext_hdr {
+ __le64 remote_va;
+ __le32 remote_key;
+ __le32 rsvd;
+};
+
+struct sq_atomic_ext_hdr {
+ __le64 swap_data;
+ __le64 cmp_data;
+};
+
+struct sq_fr_pmr_ext_hdr {
+ __le64 pblptr;
+ __le64 va;
+};
+
+struct sq_bind_ext_hdr {
+ __le64 va;
+ __le32 length_lo;
+ __le32 length_hi;
+};
+
+struct rq_ext_hdr {
+ __le64 rsvd1;
+ __le64 rsvd2;
+};
+
+/* Helper structures end */
+
struct bnxt_qplib_srq {
struct bnxt_qplib_pd *pd;
struct bnxt_qplib_dpi *dpi;
@@ -74,6 +119,8 @@ struct bnxt_qplib_swq {
u8 flags;
u32 start_psn;
u32 next_psn;
+ u32 slot_idx;
+ u8 slots;
struct sq_psn_search *psn_search;
struct sq_psn_search_ext *psn_ext;
};
@@ -213,6 +260,8 @@ struct bnxt_qplib_q {
u32 phantom_cqe_cnt;
u32 next_cq_cons;
bool flushed;
+ u32 swq_start;
+ u32 swq_last;
};
struct bnxt_qplib_qp {
@@ -224,9 +273,10 @@ struct bnxt_qplib_qp {
u32 id;
u8 type;
u8 sig_type;
- u32 modify_flags;
+ u8 wqe_mode;
u8 state;
u8 cur_qp_state;
+ u64 modify_flags;
u32 max_inline_data;
u32 mtu;
u8 path_mtu;
@@ -300,11 +350,18 @@ struct bnxt_qplib_qp {
(!!((hdr)->cqe_type_toggle & CQ_BASE_TOGGLE) == \
!((raw_cons) & (cp_bit)))
-static inline bool bnxt_qplib_queue_full(struct bnxt_qplib_q *qplib_q)
+static inline bool bnxt_qplib_queue_full(struct bnxt_qplib_q *que,
+ u8 slots)
{
- return HWQ_CMP((qplib_q->hwq.prod + qplib_q->q_full_delta),
- &qplib_q->hwq) == HWQ_CMP(qplib_q->hwq.cons,
- &qplib_q->hwq);
+ struct bnxt_qplib_hwq *hwq;
+ int avail;
+
+ hwq = &que->hwq;
+ /* False full is possible, retrying post-send makes sense */
+ avail = hwq->cons - hwq->prod;
+ if (hwq->cons <= hwq->prod)
+ avail += hwq->depth;
+ return avail <= slots;
}
struct bnxt_qplib_cqe {
@@ -489,4 +546,64 @@ int bnxt_qplib_process_flush_list(struct bnxt_qplib_cq *cq,
struct bnxt_qplib_cqe *cqe,
int num_cqes);
void bnxt_qplib_flush_cqn_wq(struct bnxt_qplib_qp *qp);
+
+static inline void *bnxt_qplib_get_swqe(struct bnxt_qplib_q *que, u32 *swq_idx)
+{
+ u32 idx;
+
+ idx = que->swq_start;
+ if (swq_idx)
+ *swq_idx = idx;
+ return &que->swq[idx];
+}
+
+static inline void bnxt_qplib_swq_mod_start(struct bnxt_qplib_q *que, u32 idx)
+{
+ que->swq_start = que->swq[idx].next_idx;
+}
+
+static inline u32 bnxt_qplib_get_depth(struct bnxt_qplib_q *que)
+{
+ return (que->wqe_size * que->max_wqe) / sizeof(struct sq_sge);
+}
+
+static inline u32 bnxt_qplib_set_sq_size(struct bnxt_qplib_q *que, u8 wqe_mode)
+{
+ return (wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ?
+ que->max_wqe : bnxt_qplib_get_depth(que);
+}
+
+static inline u32 bnxt_qplib_set_sq_max_slot(u8 wqe_mode)
+{
+ return (wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ?
+ sizeof(struct sq_send) / sizeof(struct sq_sge) : 1;
+}
+
+static inline u32 bnxt_qplib_set_rq_max_slot(u32 wqe_size)
+{
+ return (wqe_size / sizeof(struct sq_sge));
+}
+
+static inline u16 __xlate_qfd(u16 delta, u16 wqe_bytes)
+{
+ /* For Cu/Wh delta = 128, stride = 16, wqe_bytes = 128
+ * For Gen-p5 B/C mode delta = 0, stride = 16, wqe_bytes = 128.
+ * For Gen-p5 delta = 0, stride = 16, 32 <= wqe_bytes <= 512.
+ * when 8916 is disabled.
+ */
+ return (delta * wqe_bytes) / sizeof(struct sq_sge);
+}
+
+static inline u16 bnxt_qplib_calc_ilsize(struct bnxt_qplib_swqe *wqe, u16 max)
+{
+ u16 size = 0;
+ int indx;
+
+ for (indx = 0; indx < wqe->num_sge; indx++)
+ size += wqe->sg_list[indx].size;
+ if (size > max)
+ size = max;
+
+ return size;
+}
#endif /* __BNXT_QPLIB_FP_H__ */
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h
index c29cbd3a2d7b..9da470d1e4a3 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h
@@ -41,6 +41,28 @@
extern const struct bnxt_qplib_gid bnxt_qplib_gid_zero;
+#define CHIP_NUM_57508 0x1750
+#define CHIP_NUM_57504 0x1751
+#define CHIP_NUM_57502 0x1752
+
+enum bnxt_qplib_wqe_mode {
+ BNXT_QPLIB_WQE_MODE_STATIC = 0x00,
+ BNXT_QPLIB_WQE_MODE_VARIABLE = 0x01,
+ BNXT_QPLIB_WQE_MODE_INVALID = 0x02
+};
+
+struct bnxt_qplib_drv_modes {
+ u8 wqe_mode;
+ /* Other modes to follow here */
+};
+
+struct bnxt_qplib_chip_ctx {
+ u16 chip_num;
+ u8 chip_rev;
+ u8 chip_metal;
+ struct bnxt_qplib_drv_modes modes;
+};
+
#define PTR_CNT_PER_PG (PAGE_SIZE / sizeof(void *))
#define PTR_MAX_IDX_PER_PG (PTR_CNT_PER_PG - 1)
#define PTR_PG(x) (((x) & ~PTR_MAX_IDX_PER_PG) / PTR_CNT_PER_PG)
@@ -141,6 +163,9 @@ struct bnxt_qplib_hwq {
u32 cons; /* raw */
u8 cp_bit;
u8 is_user;
+ u64 *pad_pg;
+ u32 pad_stride;
+ u32 pad_pgofft;
};
struct bnxt_qplib_db_info {
@@ -148,6 +173,7 @@ struct bnxt_qplib_db_info {
void __iomem *priv_db;
struct bnxt_qplib_hwq *hwq;
u32 xid;
+ u32 max_slot;
};
/* Tables */
@@ -230,16 +256,6 @@ struct bnxt_qplib_ctx {
u64 hwrm_intf_ver;
};
-struct bnxt_qplib_chip_ctx {
- u16 chip_num;
- u8 chip_rev;
- u8 chip_metal;
-};
-
-#define CHIP_NUM_57508 0x1750
-#define CHIP_NUM_57504 0x1751
-#define CHIP_NUM_57502 0x1752
-
struct bnxt_qplib_res {
struct pci_dev *pdev;
struct bnxt_qplib_chip_ctx *cctx;
@@ -317,6 +333,14 @@ static inline void *bnxt_qplib_get_qe(struct bnxt_qplib_hwq *hwq,
return (void *)(hwq->pbl_ptr[pg_num] + hwq->element_size * pg_idx);
}
+static inline void *bnxt_qplib_get_prod_qe(struct bnxt_qplib_hwq *hwq, u32 idx)
+{
+ idx += hwq->prod;
+ if (idx >= hwq->depth)
+ idx -= hwq->depth;
+ return bnxt_qplib_get_qe(hwq, idx, NULL);
+}
+
#define to_bnxt_qplib(ptr, type, member) \
container_of(ptr, type, member)
@@ -351,6 +375,17 @@ int bnxt_qplib_alloc_ctx(struct bnxt_qplib_res *res,
struct bnxt_qplib_ctx *ctx,
bool virt_fn, bool is_p5);
+static inline void bnxt_qplib_hwq_incr_prod(struct bnxt_qplib_hwq *hwq, u32 cnt)
+{
+ hwq->prod = (hwq->prod + cnt) % hwq->depth;
+}
+
+static inline void bnxt_qplib_hwq_incr_cons(struct bnxt_qplib_hwq *hwq,
+ u32 cnt)
+{
+ hwq->cons = (hwq->cons + cnt) % hwq->depth;
+}
+
static inline void bnxt_qplib_ring_db32(struct bnxt_qplib_db_info *info,
bool arm)
{
@@ -383,8 +418,7 @@ static inline void bnxt_qplib_ring_prod_db(struct bnxt_qplib_db_info *info,
key = (info->xid & DBC_DBC_XID_MASK) | DBC_DBC_PATH_ROCE | type;
key <<= 32;
- key |= (info->hwq->prod & (info->hwq->max_elements - 1)) &
- DBC_DBC_INDEX_MASK;
+ key |= ((info->hwq->prod / info->max_slot)) & DBC_DBC_INDEX_MASK;
writeq(key, info->db);
}
diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
index 6f00f07420b7..3e40e0d76efd 100644
--- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h
+++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
@@ -1126,6 +1126,7 @@ struct cmdq_create_qp {
#define CMDQ_CREATE_QP_QP_FLAGS_FORCE_COMPLETION 0x2UL
#define CMDQ_CREATE_QP_QP_FLAGS_RESERVED_LKEY_ENABLE 0x4UL
#define CMDQ_CREATE_QP_QP_FLAGS_FR_PMR_ENABLED 0x8UL
+ #define CMDQ_CREATE_QP_QP_FLAGS_VARIABLE_SIZED_WQE_ENABLED 0x10UL
u8 type;
#define CMDQ_CREATE_QP_TYPE_RC 0x2UL
#define CMDQ_CREATE_QP_TYPE_UD 0x4UL
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 30e08bcc9afb..77bc02a9228e 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -3282,7 +3282,7 @@ static int get_lladdr(struct net_device *dev, struct in6_addr *addr,
static int pick_local_ip6addrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id)
{
- struct in6_addr uninitialized_var(addr);
+ struct in6_addr addr;
struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)&cm_id->m_local_addr;
struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)&cm_id->m_remote_addr;
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index b1bb61c65f4f..352b8af1998a 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -754,7 +754,7 @@ skip_cqe:
static int __c4iw_poll_cq_one(struct c4iw_cq *chp, struct c4iw_qp *qhp,
struct ib_wc *wc, struct c4iw_srq *srq)
{
- struct t4_cqe uninitialized_var(cqe);
+ struct t4_cqe cqe;
struct t4_wq *wq = qhp ? &qhp->wq : NULL;
u32 credit = 0;
u8 cqe_flushed;
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index e8e11bd95e42..2b2b009b371a 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -980,7 +980,7 @@ int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len);
void c4iw_qp_add_ref(struct ib_qp *qp);
void c4iw_qp_rem_ref(struct ib_qp *qp);
struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata);
+ u32 max_num_sg);
int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset);
int c4iw_dealloc_mw(struct ib_mw *mw);
@@ -1053,8 +1053,9 @@ int c4iw_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_wr);
struct c4iw_wr_wait *c4iw_alloc_wr_wait(gfp_t gfp);
-typedef int c4iw_restrack_func(struct sk_buff *msg,
- struct rdma_restrack_entry *res);
-extern c4iw_restrack_func *c4iw_restrack_funcs[RDMA_RESTRACK_MAX];
+int c4iw_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr);
+int c4iw_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ibcq);
+int c4iw_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ibqp);
+int c4iw_fill_res_cm_id_entry(struct sk_buff *msg, struct rdma_cm_id *cm_id);
#endif
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 962dc97a8ff2..73936c3341b7 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -399,7 +399,6 @@ static int finish_mem_reg(struct c4iw_mr *mhp, u32 stag)
mmid = stag >> 8;
mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
mhp->ibmr.length = mhp->attr.len;
- mhp->ibmr.iova = mhp->attr.va_fbo;
mhp->ibmr.page_size = 1U << (mhp->attr.page_size + 12);
pr_debug("mmid 0x%x mhp %p\n", mmid, mhp);
return xa_insert_irq(&mhp->rhp->mrs, mmid, mhp, GFP_KERNEL);
@@ -691,7 +690,7 @@ int c4iw_dealloc_mw(struct ib_mw *mw)
}
struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata)
+ u32 max_num_sg)
{
struct c4iw_dev *rhp;
struct c4iw_pd *php;
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index ba83d942997c..6c579d2d3997 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -236,14 +236,6 @@ static int c4iw_allocate_pd(struct ib_pd *pd, struct ib_udata *udata)
return 0;
}
-static int c4iw_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
- u16 *pkey)
-{
- pr_debug("ibdev %p\n", ibdev);
- *pkey = 0;
- return 0;
-}
-
static int c4iw_query_gid(struct ib_device *ibdev, u8 port, int index,
union ib_gid *gid)
{
@@ -317,7 +309,6 @@ static int c4iw_query_port(struct ib_device *ibdev, u8 port,
IB_PORT_DEVICE_MGMT_SUP |
IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
props->gid_tbl_len = 1;
- props->pkey_tbl_len = 1;
props->max_msg_sz = -1;
return ret;
@@ -439,7 +430,6 @@ static int c4iw_port_immutable(struct ib_device *ibdev, u8 port_num,
if (err)
return err;
- immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
return 0;
@@ -458,13 +448,6 @@ static void get_dev_fw_str(struct ib_device *dev, char *str)
FW_HDR_FW_VER_BUILD_G(c4iw_dev->rdev.lldi.fw_vers));
}
-static int fill_res_entry(struct sk_buff *msg, struct rdma_restrack_entry *res)
-{
- return (res->type < ARRAY_SIZE(c4iw_restrack_funcs) &&
- c4iw_restrack_funcs[res->type]) ?
- c4iw_restrack_funcs[res->type](msg, res) : 0;
-}
-
static const struct ib_device_ops c4iw_dev_ops = {
.owner = THIS_MODULE,
.driver_id = RDMA_DRIVER_CXGB4,
@@ -485,7 +468,9 @@ static const struct ib_device_ops c4iw_dev_ops = {
.destroy_cq = c4iw_destroy_cq,
.destroy_qp = c4iw_destroy_qp,
.destroy_srq = c4iw_destroy_srq,
- .fill_res_entry = fill_res_entry,
+ .fill_res_cq_entry = c4iw_fill_res_cq_entry,
+ .fill_res_cm_id_entry = c4iw_fill_res_cm_id_entry,
+ .fill_res_mr_entry = c4iw_fill_res_mr_entry,
.get_dev_fw_str = get_dev_fw_str,
.get_dma_mr = c4iw_get_dma_mr,
.get_hw_stats = c4iw_get_mib,
@@ -508,7 +493,6 @@ static const struct ib_device_ops c4iw_dev_ops = {
.post_srq_recv = c4iw_post_srq_recv,
.query_device = c4iw_query_device,
.query_gid = c4iw_query_gid,
- .query_pkey = c4iw_query_pkey,
.query_port = c4iw_query_port,
.query_qp = c4iw_ib_query_qp,
.reg_user_mr = c4iw_reg_user_mr,
diff --git a/drivers/infiniband/hw/cxgb4/restrack.c b/drivers/infiniband/hw/cxgb4/restrack.c
index f82d46ed969d..b32e6516d65f 100644
--- a/drivers/infiniband/hw/cxgb4/restrack.c
+++ b/drivers/infiniband/hw/cxgb4/restrack.c
@@ -134,10 +134,8 @@ err:
return -EMSGSIZE;
}
-static int fill_res_qp_entry(struct sk_buff *msg,
- struct rdma_restrack_entry *res)
+int c4iw_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ibqp)
{
- struct ib_qp *ibqp = container_of(res, struct ib_qp, res);
struct t4_swsqe *fsp = NULL, *lsp = NULL;
struct c4iw_qp *qhp = to_c4iw_qp(ibqp);
u16 first_sq_idx = 0, last_sq_idx = 0;
@@ -195,10 +193,9 @@ union union_ep {
struct c4iw_ep ep;
};
-static int fill_res_ep_entry(struct sk_buff *msg,
- struct rdma_restrack_entry *res)
+int c4iw_fill_res_cm_id_entry(struct sk_buff *msg,
+ struct rdma_cm_id *cm_id)
{
- struct rdma_cm_id *cm_id = rdma_res_to_id(res);
struct nlattr *table_attr;
struct c4iw_ep_common *epcp;
struct c4iw_listen_ep *listen_ep = NULL;
@@ -372,10 +369,8 @@ err:
return -EMSGSIZE;
}
-static int fill_res_cq_entry(struct sk_buff *msg,
- struct rdma_restrack_entry *res)
+int c4iw_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ibcq)
{
- struct ib_cq *ibcq = container_of(res, struct ib_cq, res);
struct c4iw_cq *chp = to_c4iw_cq(ibcq);
struct nlattr *table_attr;
struct t4_cqe hwcqes[2];
@@ -433,10 +428,8 @@ err:
return -EMSGSIZE;
}
-static int fill_res_mr_entry(struct sk_buff *msg,
- struct rdma_restrack_entry *res)
+int c4iw_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr)
{
- struct ib_mr *ibmr = container_of(res, struct ib_mr, res);
struct c4iw_mr *mhp = to_c4iw_mr(ibmr);
struct c4iw_dev *dev = mhp->rhp;
u32 stag = mhp->attr.stag;
@@ -492,10 +485,3 @@ err_cancel_table:
err:
return -EMSGSIZE;
}
-
-c4iw_restrack_func *c4iw_restrack_funcs[RDMA_RESTRACK_MAX] = {
- [RDMA_RESTRACK_QP] = fill_res_qp_entry,
- [RDMA_RESTRACK_CM_ID] = fill_res_ep_entry,
- [RDMA_RESTRACK_CQ] = fill_res_cq_entry,
- [RDMA_RESTRACK_MR] = fill_res_mr_entry,
-};
diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
index bef2bd291054..5484b08bbc5d 100644
--- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
+++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
@@ -606,8 +606,8 @@ struct efa_admin_feature_queue_attr_desc {
/* Number of sub-CQs to be created for each CQ */
u16 sub_cqs_per_cq;
- /* MBZ */
- u16 reserved;
+ /* Minimum number of WQEs per SQ */
+ u16 min_sq_depth;
/* Maximum number of SGEs (buffers) allowed for a single send WQE */
u16 max_wr_send_sges;
@@ -632,6 +632,17 @@ struct efa_admin_feature_queue_attr_desc {
/* Maximum number of SGEs for a single RDMA read WQE */
u16 max_wr_rdma_sges;
+
+ /*
+ * Maximum number of bytes that can be written to SQ between two
+ * consecutive doorbells (in units of 64B). Driver must ensure that only
+ * complete WQEs are written to queue before issuing a doorbell.
+ * Examples: max_tx_batch=16 and WQE size = 64B, means up to 16 WQEs can
+ * be written to SQ between two consecutive doorbells. max_tx_batch=11
+ * and WQE size = 128B, means up to 5 WQEs can be written to SQ between
+ * two consecutive doorbells. Zero means unlimited.
+ */
+ u16 max_tx_batch;
};
struct efa_admin_feature_aenq_desc {
diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c
index fabd8df2e78f..6ac23627f65a 100644
--- a/drivers/infiniband/hw/efa/efa_com_cmd.c
+++ b/drivers/infiniband/hw/efa/efa_com_cmd.c
@@ -480,6 +480,8 @@ int efa_com_get_device_attr(struct efa_com_dev *edev,
result->max_llq_size = resp.u.queue_attr.max_llq_size;
result->sub_cqs_per_cq = resp.u.queue_attr.sub_cqs_per_cq;
result->max_wr_rdma_sge = resp.u.queue_attr.max_wr_rdma_sges;
+ result->max_tx_batch = resp.u.queue_attr.max_tx_batch;
+ result->min_sq_depth = resp.u.queue_attr.min_sq_depth;
err = efa_com_get_feature(edev, &resp, EFA_ADMIN_NETWORK_ATTR);
if (err) {
diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h
index 41ce4a476ee6..190bac23f585 100644
--- a/drivers/infiniband/hw/efa/efa_com_cmd.h
+++ b/drivers/infiniband/hw/efa/efa_com_cmd.h
@@ -127,6 +127,8 @@ struct efa_com_get_device_attr_result {
u16 max_sq_sge;
u16 max_rq_sge;
u16 max_wr_rdma_sge;
+ u16 max_tx_batch;
+ u16 min_sq_depth;
u8 db_bar;
};
diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c
index 82145574c928..92d701146320 100644
--- a/drivers/infiniband/hw/efa/efa_main.c
+++ b/drivers/infiniband/hw/efa/efa_main.c
@@ -12,10 +12,12 @@
#include "efa.h"
-#define PCI_DEV_ID_EFA_VF 0xefa0
+#define PCI_DEV_ID_EFA0_VF 0xefa0
+#define PCI_DEV_ID_EFA1_VF 0xefa1
static const struct pci_device_id efa_pci_tbl[] = {
- { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA_VF) },
+ { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA0_VF) },
+ { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA1_VF) },
{ }
};
diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c
index 08313f7c73bc..9e201f169289 100644
--- a/drivers/infiniband/hw/efa/efa_verbs.c
+++ b/drivers/infiniband/hw/efa/efa_verbs.c
@@ -212,6 +212,7 @@ int efa_query_device(struct ib_device *ibdev,
props->max_send_sge = dev_attr->max_sq_sge;
props->max_recv_sge = dev_attr->max_rq_sge;
props->max_sge_rd = dev_attr->max_wr_rdma_sge;
+ props->max_pkeys = 1;
if (udata && udata->outlen) {
resp.max_sq_sge = dev_attr->max_sq_sge;
@@ -1501,11 +1502,39 @@ static int efa_dealloc_uar(struct efa_dev *dev, u16 uarn)
return efa_com_dealloc_uar(&dev->edev, &params);
}
+#define EFA_CHECK_USER_COMP(_dev, _comp_mask, _attr, _mask, _attr_str) \
+ (_attr_str = (!(_dev)->dev_attr._attr || ((_comp_mask) & (_mask))) ? \
+ NULL : #_attr)
+
+static int efa_user_comp_handshake(const struct ib_ucontext *ibucontext,
+ const struct efa_ibv_alloc_ucontext_cmd *cmd)
+{
+ struct efa_dev *dev = to_edev(ibucontext->device);
+ char *attr_str;
+
+ if (EFA_CHECK_USER_COMP(dev, cmd->comp_mask, max_tx_batch,
+ EFA_ALLOC_UCONTEXT_CMD_COMP_TX_BATCH, attr_str))
+ goto err;
+
+ if (EFA_CHECK_USER_COMP(dev, cmd->comp_mask, min_sq_depth,
+ EFA_ALLOC_UCONTEXT_CMD_COMP_MIN_SQ_WR,
+ attr_str))
+ goto err;
+
+ return 0;
+
+err:
+ ibdev_dbg(&dev->ibdev, "Userspace handshake failed for %s attribute\n",
+ attr_str);
+ return -EOPNOTSUPP;
+}
+
int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata)
{
struct efa_ucontext *ucontext = to_eucontext(ibucontext);
struct efa_dev *dev = to_edev(ibucontext->device);
struct efa_ibv_alloc_ucontext_resp resp = {};
+ struct efa_ibv_alloc_ucontext_cmd cmd = {};
struct efa_com_alloc_uar_result result;
int err;
@@ -1514,6 +1543,18 @@ int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata)
* we will ack input fields in our response.
*/
+ err = ib_copy_from_udata(&cmd, udata,
+ min(sizeof(cmd), udata->inlen));
+ if (err) {
+ ibdev_dbg(&dev->ibdev,
+ "Cannot copy udata for alloc_ucontext\n");
+ goto err_out;
+ }
+
+ err = efa_user_comp_handshake(ibucontext, &cmd);
+ if (err)
+ goto err_out;
+
err = efa_com_alloc_uar(&dev->edev, &result);
if (err)
goto err_out;
@@ -1525,6 +1566,8 @@ int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata)
resp.sub_cqs_per_cq = dev->dev_attr.sub_cqs_per_cq;
resp.inline_buf_size = dev->dev_attr.inline_buf_size;
resp.max_llq_size = dev->dev_attr.max_llq_size;
+ resp.max_tx_batch = dev->dev_attr.max_tx_batch;
+ resp.min_sq_wr = dev->dev_attr.min_sq_depth;
if (udata && udata->outlen) {
err = ib_copy_to_udata(udata, &resp,
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 15f9c635f292..7eaf99538216 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -7317,11 +7317,11 @@ static u16 link_width_to_bits(struct hfi1_devdata *dd, u16 width)
case 1: return OPA_LINK_WIDTH_1X;
case 2: return OPA_LINK_WIDTH_2X;
case 3: return OPA_LINK_WIDTH_3X;
+ case 4: return OPA_LINK_WIDTH_4X;
default:
dd_dev_info(dd, "%s: invalid width %d, using 4\n",
__func__, width);
- /* fall through */
- case 4: return OPA_LINK_WIDTH_4X;
+ return OPA_LINK_WIDTH_4X;
}
}
@@ -7376,12 +7376,13 @@ static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width,
case 0:
dd->pport[0].link_speed_active = OPA_LINK_SPEED_12_5G;
break;
+ case 1:
+ dd->pport[0].link_speed_active = OPA_LINK_SPEED_25G;
+ break;
default:
dd_dev_err(dd,
"%s: unexpected max rate %d, using 25Gb\n",
__func__, (int)max_rate);
- /* fall through */
- case 1:
dd->pport[0].link_speed_active = OPA_LINK_SPEED_25G;
break;
}
@@ -12878,11 +12879,6 @@ bail:
static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate)
{
switch (chip_lstate) {
- default:
- dd_dev_err(dd,
- "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n",
- chip_lstate);
- /* fall through */
case LSTATE_DOWN:
return IB_PORT_DOWN;
case LSTATE_INIT:
@@ -12891,6 +12887,11 @@ static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate)
return IB_PORT_ARMED;
case LSTATE_ACTIVE:
return IB_PORT_ACTIVE;
+ default:
+ dd_dev_err(dd,
+ "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n",
+ chip_lstate);
+ return IB_PORT_DOWN;
}
}
@@ -12898,10 +12899,6 @@ u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate)
{
/* look at the HFI meta-states only */
switch (chip_pstate & 0xf0) {
- default:
- dd_dev_err(dd, "Unexpected chip physical state of 0x%x\n",
- chip_pstate);
- /* fall through */
case PLS_DISABLED:
return IB_PORTPHYSSTATE_DISABLED;
case PLS_OFFLINE:
@@ -12914,6 +12911,10 @@ u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate)
return IB_PORTPHYSSTATE_LINKUP;
case PLS_PHYTEST:
return IB_PORTPHYSSTATE_PHY_TEST;
+ default:
+ dd_dev_err(dd, "Unexpected chip physical state of 0x%x\n",
+ chip_pstate);
+ return IB_PORTPHYSSTATE_DISABLED;
}
}
diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c
index 4633a0ce1a8c..2ced236e1553 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.c
+++ b/drivers/infiniband/hw/hfi1/debugfs.c
@@ -985,15 +985,10 @@ static ssize_t qsfp2_debugfs_read(struct file *file, char __user *buf,
static int __i2c_debugfs_open(struct inode *in, struct file *fp, u32 target)
{
struct hfi1_pportdata *ppd;
- int ret;
ppd = private2ppd(fp);
- ret = acquire_chip_resource(ppd->dd, i2c_target(target), 0);
- if (ret) /* failed - release the module */
- module_put(THIS_MODULE);
-
- return ret;
+ return acquire_chip_resource(ppd->dd, i2c_target(target), 0);
}
static int i2c1_debugfs_open(struct inode *in, struct file *fp)
@@ -1013,7 +1008,6 @@ static int __i2c_debugfs_release(struct inode *in, struct file *fp, u32 target)
ppd = private2ppd(fp);
release_chip_resource(ppd->dd, i2c_target(target));
- module_put(THIS_MODULE);
return 0;
}
@@ -1031,18 +1025,10 @@ static int i2c2_debugfs_release(struct inode *in, struct file *fp)
static int __qsfp_debugfs_open(struct inode *in, struct file *fp, u32 target)
{
struct hfi1_pportdata *ppd;
- int ret;
-
- if (!try_module_get(THIS_MODULE))
- return -ENODEV;
ppd = private2ppd(fp);
- ret = acquire_chip_resource(ppd->dd, i2c_target(target), 0);
- if (ret) /* failed - release the module */
- module_put(THIS_MODULE);
-
- return ret;
+ return acquire_chip_resource(ppd->dd, i2c_target(target), 0);
}
static int qsfp1_debugfs_open(struct inode *in, struct file *fp)
@@ -1062,7 +1048,6 @@ static int __qsfp_debugfs_release(struct inode *in, struct file *fp, u32 target)
ppd = private2ppd(fp);
release_chip_resource(ppd->dd, i2c_target(target));
- module_put(THIS_MODULE);
return 0;
}
diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c
index 2b57ba70ddd6..0e83d4b61e46 100644
--- a/drivers/infiniband/hw/hfi1/firmware.c
+++ b/drivers/infiniband/hw/hfi1/firmware.c
@@ -1868,11 +1868,8 @@ int parse_platform_config(struct hfi1_devdata *dd)
2;
break;
case PLATFORM_CONFIG_RX_PRESET_TABLE:
- /* fall through */
case PLATFORM_CONFIG_TX_PRESET_TABLE:
- /* fall through */
case PLATFORM_CONFIG_QSFP_ATTEN_TABLE:
- /* fall through */
case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE:
pcfgcache->config_tables[table_type].num_table =
table_length_dwords;
@@ -1890,15 +1887,10 @@ int parse_platform_config(struct hfi1_devdata *dd)
/* metadata table */
switch (table_type) {
case PLATFORM_CONFIG_SYSTEM_TABLE:
- /* fall through */
case PLATFORM_CONFIG_PORT_TABLE:
- /* fall through */
case PLATFORM_CONFIG_RX_PRESET_TABLE:
- /* fall through */
case PLATFORM_CONFIG_TX_PRESET_TABLE:
- /* fall through */
case PLATFORM_CONFIG_QSFP_ATTEN_TABLE:
- /* fall through */
case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE:
break;
default:
@@ -2027,15 +2019,10 @@ static int get_platform_fw_field_metadata(struct hfi1_devdata *dd, int table,
switch (table) {
case PLATFORM_CONFIG_SYSTEM_TABLE:
- /* fall through */
case PLATFORM_CONFIG_PORT_TABLE:
- /* fall through */
case PLATFORM_CONFIG_RX_PRESET_TABLE:
- /* fall through */
case PLATFORM_CONFIG_TX_PRESET_TABLE:
- /* fall through */
case PLATFORM_CONFIG_QSFP_ATTEN_TABLE:
- /* fall through */
case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE:
if (field && field < platform_config_table_limits[table])
src_ptr =
@@ -2138,11 +2125,8 @@ int get_platform_config_field(struct hfi1_devdata *dd,
pcfgcache->config_tables[table_type].table;
break;
case PLATFORM_CONFIG_RX_PRESET_TABLE:
- /* fall through */
case PLATFORM_CONFIG_TX_PRESET_TABLE:
- /* fall through */
case PLATFORM_CONFIG_QSFP_ATTEN_TABLE:
- /* fall through */
case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE:
src_ptr = pcfgcache->config_tables[table_type].table;
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 5eed4360695f..cb7ad1288821 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -831,6 +831,29 @@ wq_error:
}
/**
+ * destroy_workqueues - destroy per port workqueues
+ * @dd: the hfi1_ib device
+ */
+static void destroy_workqueues(struct hfi1_devdata *dd)
+{
+ int pidx;
+ struct hfi1_pportdata *ppd;
+
+ for (pidx = 0; pidx < dd->num_pports; ++pidx) {
+ ppd = dd->pport + pidx;
+
+ if (ppd->hfi1_wq) {
+ destroy_workqueue(ppd->hfi1_wq);
+ ppd->hfi1_wq = NULL;
+ }
+ if (ppd->link_wq) {
+ destroy_workqueue(ppd->link_wq);
+ ppd->link_wq = NULL;
+ }
+ }
+}
+
+/**
* enable_general_intr() - Enable the IRQs that will be handled by the
* general interrupt handler.
* @dd: valid devdata
@@ -1103,15 +1126,10 @@ static void shutdown_device(struct hfi1_devdata *dd)
* We can't count on interrupts since we are stopping.
*/
hfi1_quiet_serdes(ppd);
-
- if (ppd->hfi1_wq) {
- destroy_workqueue(ppd->hfi1_wq);
- ppd->hfi1_wq = NULL;
- }
- if (ppd->link_wq) {
- destroy_workqueue(ppd->link_wq);
- ppd->link_wq = NULL;
- }
+ if (ppd->hfi1_wq)
+ flush_workqueue(ppd->hfi1_wq);
+ if (ppd->link_wq)
+ flush_workqueue(ppd->link_wq);
}
sdma_exit(dd);
}
@@ -1756,6 +1774,7 @@ static void remove_one(struct pci_dev *pdev)
* clear dma engines, etc.
*/
shutdown_device(dd);
+ destroy_workqueues(dd);
stop_timers(dd);
diff --git a/drivers/infiniband/hw/hfi1/iowait.h b/drivers/infiniband/hw/hfi1/iowait.h
index 07847cb72169..d580aa17ae37 100644
--- a/drivers/infiniband/hw/hfi1/iowait.h
+++ b/drivers/infiniband/hw/hfi1/iowait.h
@@ -399,7 +399,7 @@ static inline void iowait_get_priority(struct iowait *w)
* @wait_head: the wait queue
*
* This function is called to insert an iowait struct into a
- * wait queue after a resource (eg, sdma decriptor or pio
+ * wait queue after a resource (eg, sdma descriptor or pio
* buffer) is run out.
*/
static inline void iowait_queue(bool pkts_sent, struct iowait *w,
diff --git a/drivers/infiniband/hw/hfi1/ipoib.h b/drivers/infiniband/hw/hfi1/ipoib.h
index 185c9b02c974..b8c9d0a003fb 100644
--- a/drivers/infiniband/hw/hfi1/ipoib.h
+++ b/drivers/infiniband/hw/hfi1/ipoib.h
@@ -67,6 +67,9 @@ struct hfi1_ipoib_circ_buf {
* @sde: sdma engine
* @tx_list: tx request list
* @sent_txreqs: count of txreqs posted to sdma
+ * @stops: count of stops of queue
+ * @ring_full: ring has been filled
+ * @no_desc: descriptor shortage seen
* @flow: tracks when list needs to be flushed for a flow change
* @q_idx: ipoib Tx queue index
* @pkts_sent: indicator packets have been sent from this queue
@@ -80,6 +83,9 @@ struct hfi1_ipoib_txq {
struct sdma_engine *sde;
struct list_head tx_list;
u64 sent_txreqs;
+ atomic_t stops;
+ atomic_t ring_full;
+ atomic_t no_desc;
union hfi1_ipoib_flow flow;
u8 q_idx;
bool pkts_sent;
diff --git a/drivers/infiniband/hw/hfi1/ipoib_tx.c b/drivers/infiniband/hw/hfi1/ipoib_tx.c
index 883cb9d48022..9df292b51a05 100644
--- a/drivers/infiniband/hw/hfi1/ipoib_tx.c
+++ b/drivers/infiniband/hw/hfi1/ipoib_tx.c
@@ -55,23 +55,48 @@ static u64 hfi1_ipoib_txreqs(const u64 sent, const u64 completed)
return sent - completed;
}
-static void hfi1_ipoib_check_queue_depth(struct hfi1_ipoib_txq *txq)
+static u64 hfi1_ipoib_used(struct hfi1_ipoib_txq *txq)
{
- if (unlikely(hfi1_ipoib_txreqs(++txq->sent_txreqs,
- atomic64_read(&txq->complete_txreqs)) >=
- min_t(unsigned int, txq->priv->netdev->tx_queue_len,
- txq->tx_ring.max_items - 1)))
+ return hfi1_ipoib_txreqs(txq->sent_txreqs,
+ atomic64_read(&txq->complete_txreqs));
+}
+
+static void hfi1_ipoib_stop_txq(struct hfi1_ipoib_txq *txq)
+{
+ if (atomic_inc_return(&txq->stops) == 1)
netif_stop_subqueue(txq->priv->netdev, txq->q_idx);
}
+static void hfi1_ipoib_wake_txq(struct hfi1_ipoib_txq *txq)
+{
+ if (atomic_dec_and_test(&txq->stops))
+ netif_wake_subqueue(txq->priv->netdev, txq->q_idx);
+}
+
+static uint hfi1_ipoib_ring_hwat(struct hfi1_ipoib_txq *txq)
+{
+ return min_t(uint, txq->priv->netdev->tx_queue_len,
+ txq->tx_ring.max_items - 1);
+}
+
+static uint hfi1_ipoib_ring_lwat(struct hfi1_ipoib_txq *txq)
+{
+ return min_t(uint, txq->priv->netdev->tx_queue_len,
+ txq->tx_ring.max_items) >> 1;
+}
+
+static void hfi1_ipoib_check_queue_depth(struct hfi1_ipoib_txq *txq)
+{
+ ++txq->sent_txreqs;
+ if (hfi1_ipoib_used(txq) >= hfi1_ipoib_ring_hwat(txq) &&
+ !atomic_xchg(&txq->ring_full, 1))
+ hfi1_ipoib_stop_txq(txq);
+}
+
static void hfi1_ipoib_check_queue_stopped(struct hfi1_ipoib_txq *txq)
{
struct net_device *dev = txq->priv->netdev;
- /* If the queue is already running just return */
- if (likely(!__netif_subqueue_stopped(dev, txq->q_idx)))
- return;
-
/* If shutting down just return as queue state is irrelevant */
if (unlikely(dev->reg_state != NETREG_REGISTERED))
return;
@@ -86,11 +111,9 @@ static void hfi1_ipoib_check_queue_stopped(struct hfi1_ipoib_txq *txq)
* Use the minimum of the current tx_queue_len or the rings max txreqs
* to protect against ring overflow.
*/
- if (hfi1_ipoib_txreqs(txq->sent_txreqs,
- atomic64_read(&txq->complete_txreqs))
- < min_t(unsigned int, dev->tx_queue_len,
- txq->tx_ring.max_items) >> 1)
- netif_wake_subqueue(dev, txq->q_idx);
+ if (hfi1_ipoib_used(txq) < hfi1_ipoib_ring_lwat(txq) &&
+ atomic_xchg(&txq->ring_full, 0))
+ hfi1_ipoib_wake_txq(txq);
}
static void hfi1_ipoib_free_tx(struct ipoib_txreq *tx, int budget)
@@ -364,11 +387,12 @@ static struct ipoib_txreq *hfi1_ipoib_send_dma_common(struct net_device *dev,
if (unlikely(!tx))
return ERR_PTR(-ENOMEM);
- /* so that we can test if the sdma decriptors are there */
+ /* so that we can test if the sdma descriptors are there */
tx->txreq.num_desc = 0;
tx->priv = priv;
tx->txq = txp->txq;
tx->skb = skb;
+ INIT_LIST_HEAD(&tx->txreq.list);
hfi1_ipoib_build_ib_tx_headers(tx, txp);
@@ -469,6 +493,7 @@ static int hfi1_ipoib_send_dma_single(struct net_device *dev,
ret = hfi1_ipoib_submit_tx(txq, tx);
if (likely(!ret)) {
+tx_ok:
trace_sdma_output_ibhdr(tx->priv->dd,
&tx->sdma_hdr.hdr,
ib_is_sc5(txp->flow.sc5));
@@ -478,20 +503,8 @@ static int hfi1_ipoib_send_dma_single(struct net_device *dev,
txq->pkts_sent = false;
- if (ret == -EBUSY) {
- list_add_tail(&tx->txreq.list, &txq->tx_list);
-
- trace_sdma_output_ibhdr(tx->priv->dd,
- &tx->sdma_hdr.hdr,
- ib_is_sc5(txp->flow.sc5));
- hfi1_ipoib_check_queue_depth(txq);
- return NETDEV_TX_OK;
- }
-
- if (ret == -ECOMM) {
- hfi1_ipoib_check_queue_depth(txq);
- return NETDEV_TX_OK;
- }
+ if (ret == -EBUSY || ret == -ECOMM)
+ goto tx_ok;
sdma_txclean(priv->dd, &tx->txreq);
dev_kfree_skb_any(skb);
@@ -509,9 +522,17 @@ static int hfi1_ipoib_send_dma_list(struct net_device *dev,
struct ipoib_txreq *tx;
/* Has the flow change ? */
- if (txq->flow.as_int != txp->flow.as_int)
- (void)hfi1_ipoib_flush_tx_list(dev, txq);
-
+ if (txq->flow.as_int != txp->flow.as_int) {
+ int ret;
+
+ ret = hfi1_ipoib_flush_tx_list(dev, txq);
+ if (unlikely(ret)) {
+ if (ret == -EBUSY)
+ ++dev->stats.tx_dropped;
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_OK;
+ }
+ }
tx = hfi1_ipoib_send_dma_common(dev, skb, txp);
if (IS_ERR(tx)) {
int ret = PTR_ERR(tx);
@@ -610,10 +631,14 @@ static int hfi1_ipoib_sdma_sleep(struct sdma_engine *sde,
return -EAGAIN;
}
- netif_stop_subqueue(txq->priv->netdev, txq->q_idx);
-
- if (list_empty(&txq->wait.list))
+ if (list_empty(&txreq->list))
+ /* came from non-list submit */
+ list_add_tail(&txreq->list, &txq->tx_list);
+ if (list_empty(&txq->wait.list)) {
+ if (!atomic_xchg(&txq->no_desc, 1))
+ hfi1_ipoib_stop_txq(txq);
iowait_queue(pkts_sent, wait->iow, &sde->dmawait);
+ }
write_sequnlock(&sde->waitlock);
return -EBUSY;
@@ -648,9 +673,9 @@ static void hfi1_ipoib_flush_txq(struct work_struct *work)
struct net_device *dev = txq->priv->netdev;
if (likely(dev->reg_state == NETREG_REGISTERED) &&
- likely(__netif_subqueue_stopped(dev, txq->q_idx)) &&
likely(!hfi1_ipoib_flush_tx_list(dev, txq)))
- netif_wake_subqueue(dev, txq->q_idx);
+ if (atomic_xchg(&txq->no_desc, 0))
+ hfi1_ipoib_wake_txq(txq);
}
int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv)
@@ -704,6 +729,9 @@ int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv)
txq->sde = NULL;
INIT_LIST_HEAD(&txq->tx_list);
atomic64_set(&txq->complete_txreqs, 0);
+ atomic_set(&txq->stops, 0);
+ atomic_set(&txq->ring_full, 0);
+ atomic_set(&txq->no_desc, 0);
txq->q_idx = i;
txq->flow.tx_queue = 0xff;
txq->flow.sc5 = 0xff;
@@ -769,7 +797,7 @@ static void hfi1_ipoib_drain_tx_list(struct hfi1_ipoib_txq *txq)
atomic64_inc(complete_txreqs);
}
- if (hfi1_ipoib_txreqs(txq->sent_txreqs, atomic64_read(complete_txreqs)))
+ if (hfi1_ipoib_used(txq))
dd_dev_warn(txq->priv->dd,
"txq %d not empty found %llu requests\n",
txq->q_idx,
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index 7073f237a949..3222e3acb79c 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -721,7 +721,7 @@ static int check_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad,
/* Bad mkey not a violation below level 2 */
if (ibp->rvp.mkeyprot < 2)
break;
- /* fall through */
+ fallthrough;
case IB_MGMT_METHOD_SET:
case IB_MGMT_METHOD_TRAP_REPRESS:
if (ibp->rvp.mkey_violations != 0xFFFF)
@@ -1272,7 +1272,7 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
case IB_PORT_NOP:
if (phys_state == IB_PORTPHYSSTATE_NOP)
break;
- /* FALLTHROUGH */
+ fallthrough;
case IB_PORT_DOWN:
if (phys_state == IB_PORTPHYSSTATE_NOP) {
link_state = HLS_DN_DOWNDEF;
@@ -2300,7 +2300,6 @@ static int __subn_set_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
* can be changed from the default values
*/
case OPA_VLARB_PREEMPT_ELEMENTS:
- /* FALLTHROUGH */
case OPA_VLARB_PREEMPT_MATRIX:
smp->status |= IB_SMP_UNSUP_METH_ATTR;
break;
@@ -4170,7 +4169,7 @@ static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
if (ibp->rvp.port_cap_flags & IB_PORT_SM)
return IB_MAD_RESULT_SUCCESS;
- /* FALLTHROUGH */
+ fallthrough;
default:
smp->status |= IB_SMP_UNSUP_METH_ATTR;
ret = reply((struct ib_mad_hdr *)smp);
@@ -4240,7 +4239,7 @@ static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
if (ibp->rvp.port_cap_flags & IB_PORT_SM)
return IB_MAD_RESULT_SUCCESS;
- /* FALLTHROUGH */
+ fallthrough;
default:
smp->status |= IB_SMP_UNSUP_METH_ATTR;
ret = reply((struct ib_mad_hdr *)smp);
diff --git a/drivers/infiniband/hw/hfi1/netdev_rx.c b/drivers/infiniband/hw/hfi1/netdev_rx.c
index 63688e85e8da..6d263c9749b3 100644
--- a/drivers/infiniband/hw/hfi1/netdev_rx.c
+++ b/drivers/infiniband/hw/hfi1/netdev_rx.c
@@ -373,7 +373,7 @@ void hfi1_netdev_free(struct hfi1_devdata *dd)
{
if (dd->dummy_netdev) {
dd_dev_info(dd, "hfi1 netdev freed\n");
- free_netdev(dd->dummy_netdev);
+ kfree(dd->dummy_netdev);
dd->dummy_netdev = NULL;
}
}
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index 1a6268d61977..18d32f053d26 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -306,7 +306,7 @@ int pcie_speeds(struct hfi1_devdata *dd)
ret = pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &linkcap);
if (ret) {
dd_dev_err(dd, "Unable to read from PCI config\n");
- return ret;
+ return pcibios_err_to_errno(ret);
}
if ((linkcap & PCI_EXP_LNKCAP_SLS) != PCI_EXP_LNKCAP_SLS_8_0GB) {
@@ -334,10 +334,14 @@ int pcie_speeds(struct hfi1_devdata *dd)
return 0;
}
-/* restore command and BARs after a reset has wiped them out */
+/**
+ * Restore command and BARs after a reset has wiped them out
+ *
+ * Returns 0 on success, otherwise a negative error value
+ */
int restore_pci_variables(struct hfi1_devdata *dd)
{
- int ret = 0;
+ int ret;
ret = pci_write_config_word(dd->pcidev, PCI_COMMAND, dd->pci_command);
if (ret)
@@ -386,13 +390,17 @@ int restore_pci_variables(struct hfi1_devdata *dd)
error:
dd_dev_err(dd, "Unable to write to PCI config\n");
- return ret;
+ return pcibios_err_to_errno(ret);
}
-/* Save BARs and command to rewrite after device reset */
+/**
+ * Save BARs and command to rewrite after device reset
+ *
+ * Returns 0 on success, otherwise a negative error value
+ */
int save_pci_variables(struct hfi1_devdata *dd)
{
- int ret = 0;
+ int ret;
ret = pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0,
&dd->pcibar0);
@@ -441,7 +449,7 @@ int save_pci_variables(struct hfi1_devdata *dd)
error:
dd_dev_err(dd, "Unable to read from PCI config\n");
- return ret;
+ return pcibios_err_to_errno(ret);
}
/*
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index 79126b2b14ab..ff864f6f0266 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -86,7 +86,7 @@ void pio_send_control(struct hfi1_devdata *dd, int op)
switch (op) {
case PSC_GLOBAL_ENABLE:
reg |= SEND_CTRL_SEND_ENABLE_SMASK;
- /* Fall through */
+ fallthrough;
case PSC_DATA_VL_ENABLE:
mask = 0;
for (i = 0; i < ARRAY_SIZE(dd->vld); i++)
diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c
index 03024cec78dd..b12e4665c9ab 100644
--- a/drivers/infiniband/hw/hfi1/pio_copy.c
+++ b/drivers/infiniband/hw/hfi1/pio_copy.c
@@ -191,22 +191,22 @@ static inline void jcopy(u8 *dest, const u8 *src, u32 n)
switch (n) {
case 7:
*dest++ = *src++;
- /* fall through */
+ fallthrough;
case 6:
*dest++ = *src++;
- /* fall through */
+ fallthrough;
case 5:
*dest++ = *src++;
- /* fall through */
+ fallthrough;
case 4:
*dest++ = *src++;
- /* fall through */
+ fallthrough;
case 3:
*dest++ = *src++;
- /* fall through */
+ fallthrough;
case 2:
*dest++ = *src++;
- /* fall through */
+ fallthrough;
case 1:
*dest++ = *src++;
/* fall through */
diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c
index 36593f2efe26..4642d6ceb890 100644
--- a/drivers/infiniband/hw/hfi1/platform.c
+++ b/drivers/infiniband/hw/hfi1/platform.c
@@ -668,8 +668,8 @@ static u8 aoc_low_power_setting(struct hfi1_pportdata *ppd)
/* active optical cables only */
switch ((cache[QSFP_MOD_TECH_OFFS] & 0xF0) >> 4) {
- case 0x0 ... 0x9: /* fallthrough */
- case 0xC: /* fallthrough */
+ case 0x0 ... 0x9: fallthrough;
+ case 0xC: fallthrough;
case 0xE:
/* active AOC */
power_class = get_qsfp_power_class(cache[QSFP_MOD_PWR_OFFS]);
@@ -899,8 +899,8 @@ static int tune_qsfp(struct hfi1_pportdata *ppd,
*ptr_tuning_method = OPA_PASSIVE_TUNING;
break;
- case 0x0 ... 0x9: /* fallthrough */
- case 0xC: /* fallthrough */
+ case 0x0 ... 0x9: fallthrough;
+ case 0xC: fallthrough;
case 0xE:
ret = tune_active_qsfp(ppd, ptr_tx_preset, ptr_rx_preset,
ptr_total_atten);
@@ -909,7 +909,7 @@ static int tune_qsfp(struct hfi1_pportdata *ppd,
*ptr_tuning_method = OPA_ACTIVE_TUNING;
break;
- case 0xD: /* fallthrough */
+ case 0xD: fallthrough;
case 0xF:
default:
dd_dev_warn(ppd->dd, "%s: Unknown/unsupported cable\n",
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index 0c2ae9f7b3e8..356518e17fa6 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -195,7 +195,7 @@ static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu)
{
/* Constraining 10KB packets to 8KB packets */
if (mtu == (enum ib_mtu)OPA_MTU_10240)
- mtu = OPA_MTU_8192;
+ mtu = (enum ib_mtu)OPA_MTU_8192;
return opa_mtu_enum_to_int((enum opa_mtu)mtu);
}
@@ -312,7 +312,7 @@ int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send)
switch (qp->ibqp.qp_type) {
case IB_QPT_RC:
hfi1_setup_tid_rdma_wqe(qp, wqe);
- /* fall through */
+ fallthrough;
case IB_QPT_UC:
if (wqe->length > 0x80000000U)
return -EINVAL;
@@ -367,7 +367,10 @@ bool _hfi1_schedule_send(struct rvt_qp *qp)
struct hfi1_ibport *ibp =
to_iport(qp->ibqp.device, qp->port_num);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
- struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
+ struct hfi1_devdata *dd = ppd->dd;
+
+ if (dd->flags & HFI1_SHUTDOWN)
+ return true;
return iowait_schedule(&priv->s_iowait, ppd->hfi1_wq,
priv->s_sde ?
diff --git a/drivers/infiniband/hw/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h
index b670321365d3..b0d053d12129 100644
--- a/drivers/infiniband/hw/hfi1/qp.h
+++ b/drivers/infiniband/hw/hfi1/qp.h
@@ -113,20 +113,6 @@ static inline void clear_ahg(struct rvt_qp *qp)
}
/**
- * hfi1_create_qp - create a queue pair for a device
- * @ibpd: the protection domain who's device we create the queue pair for
- * @init_attr: the attributes of the queue pair
- * @udata: user data for libibverbs.so
- *
- * Returns the queue pair on success, otherwise returns an errno.
- *
- * Called by the ib_create_qp() core verbs function.
- */
-struct ib_qp *hfi1_create_qp(struct ib_pd *ibpd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata);
-
-/**
* hfi1_qp_wakeup - wake up on the indicated event
* @qp: the QP
* @flag: flag the qp on which the qp is stalled
diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c
index b5966991d647..8386c84c2d92 100644
--- a/drivers/infiniband/hw/hfi1/qsfp.c
+++ b/drivers/infiniband/hw/hfi1/qsfp.c
@@ -231,7 +231,7 @@ static int i2c_bus_write(struct hfi1_devdata *dd, struct hfi1_i2c_bus *i2c,
break;
case 2:
offset_bytes[1] = (offset >> 8) & 0xff;
- /* fall through */
+ fallthrough;
case 1:
num_msgs = 2;
offset_bytes[0] = offset & 0xff;
@@ -279,7 +279,7 @@ static int i2c_bus_read(struct hfi1_devdata *dd, struct hfi1_i2c_bus *bus,
break;
case 2:
offset_bytes[1] = (offset >> 8) & 0xff;
- /* fall through */
+ fallthrough;
case 1:
num_msgs = 2;
offset_bytes[0] = offset & 0xff;
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index f1734e5e9ac4..1bb5f57152d3 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -141,7 +141,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
case OP(RDMA_READ_RESPONSE_ONLY):
e = &qp->s_ack_queue[qp->s_tail_ack_queue];
release_rdma_sge_mr(e);
- /* FALLTHROUGH */
+ fallthrough;
case OP(ATOMIC_ACKNOWLEDGE):
/*
* We can increment the tail pointer now that the last
@@ -160,7 +160,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
qp->s_acked_ack_queue = next;
qp->s_tail_ack_queue = next;
trace_hfi1_rsp_make_rc_ack(qp, e->psn);
- /* FALLTHROUGH */
+ fallthrough;
case OP(SEND_ONLY):
case OP(ACKNOWLEDGE):
/* Check for no next entry in the queue. */
@@ -267,7 +267,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
case OP(RDMA_READ_RESPONSE_FIRST):
qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
- /* FALLTHROUGH */
+ fallthrough;
case OP(RDMA_READ_RESPONSE_MIDDLE):
ps->s_txreq->ss = &qp->s_ack_rdma_sge;
ps->s_txreq->mr = qp->s_ack_rdma_sge.sge.mr;
@@ -881,8 +881,7 @@ no_flow_control:
goto bail;
}
qp->s_num_rd_atomic++;
-
- /* FALLTHROUGH */
+ fallthrough;
case IB_WR_OPFN:
if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
qp->s_lsn++;
@@ -946,10 +945,10 @@ no_flow_control:
* See restart_rc().
*/
qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu);
- /* FALLTHROUGH */
+ fallthrough;
case OP(SEND_FIRST):
qp->s_state = OP(SEND_MIDDLE);
- /* FALLTHROUGH */
+ fallthrough;
case OP(SEND_MIDDLE):
bth2 = mask_psn(qp->s_psn++);
ss = &qp->s_sge;
@@ -991,10 +990,10 @@ no_flow_control:
* See restart_rc().
*/
qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu);
- /* FALLTHROUGH */
+ fallthrough;
case OP(RDMA_WRITE_FIRST):
qp->s_state = OP(RDMA_WRITE_MIDDLE);
- /* FALLTHROUGH */
+ fallthrough;
case OP(RDMA_WRITE_MIDDLE):
bth2 = mask_psn(qp->s_psn++);
ss = &qp->s_sge;
@@ -2901,7 +2900,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
if (!ret)
goto rnr_nak;
qp->r_rcv_len = 0;
- /* FALLTHROUGH */
+ fallthrough;
case OP(SEND_MIDDLE):
case OP(RDMA_WRITE_MIDDLE):
send_middle:
@@ -2941,7 +2940,7 @@ send_middle:
goto no_immediate_data;
if (opcode == OP(SEND_ONLY_WITH_INVALIDATE))
goto send_last_inv;
- /* FALLTHROUGH -- for SEND_ONLY_WITH_IMMEDIATE */
+ fallthrough; /* for SEND_ONLY_WITH_IMMEDIATE */
case OP(SEND_LAST_WITH_IMMEDIATE):
send_last_imm:
wc.ex.imm_data = ohdr->u.imm_data;
@@ -2957,7 +2956,7 @@ send_last_inv:
goto send_last;
case OP(RDMA_WRITE_LAST):
copy_last = rvt_is_user_qp(qp);
- /* fall through */
+ fallthrough;
case OP(SEND_LAST):
no_immediate_data:
wc.wc_flags = 0;
@@ -3010,7 +3009,7 @@ send_last:
case OP(RDMA_WRITE_ONLY):
copy_last = rvt_is_user_qp(qp);
- /* fall through */
+ fallthrough;
case OP(RDMA_WRITE_FIRST):
case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index c93ea021cf49..04575c9afd61 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -2584,7 +2584,7 @@ static void __sdma_process_event(struct sdma_engine *sde,
* 7220, e.g.
*/
ss->go_s99_running = 1;
- /* fall through -- and start dma engine */
+ fallthrough; /* and start dma engine */
case sdma_event_e10_go_hw_start:
/* This reference means the state machine is started */
sdma_get(&sde->state);
@@ -2726,7 +2726,6 @@ static void __sdma_process_event(struct sdma_engine *sde,
case sdma_event_e70_go_idle:
break;
case sdma_event_e85_link_down:
- /* fall through */
case sdma_event_e80_hw_freeze:
sdma_set_state(sde, sdma_state_s80_hw_freeze);
atomic_dec(&sde->dd->sdma_unfreeze_count);
@@ -3007,7 +3006,7 @@ static void __sdma_process_event(struct sdma_engine *sde,
case sdma_event_e60_hw_halted:
need_progress = 1;
sdma_err_progress_check_schedule(sde);
- /* fall through */
+ fallthrough;
case sdma_event_e90_sw_halted:
/*
* SW initiated halt does not perform engines
@@ -3021,7 +3020,7 @@ static void __sdma_process_event(struct sdma_engine *sde,
break;
case sdma_event_e85_link_down:
ss->go_s99_running = 0;
- /* fall through */
+ fallthrough;
case sdma_event_e80_hw_freeze:
sdma_set_state(sde, sdma_state_s80_hw_freeze);
atomic_dec(&sde->dd->sdma_unfreeze_count);
@@ -3252,7 +3251,7 @@ void _sdma_txreq_ahgadd(
tx->num_desc++;
tx->descs[2].qw[0] = 0;
tx->descs[2].qw[1] = 0;
- /* FALLTHROUGH */
+ fallthrough;
case SDMA_AHG_APPLY_UPDATE2:
tx->num_desc++;
tx->descs[1].qw[0] = 0;
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c
index 243b4ba0b6f6..9af82ff933d7 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.c
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.c
@@ -3227,7 +3227,7 @@ bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe)
case IB_WR_RDMA_READ:
if (prev->wr.opcode != IB_WR_TID_RDMA_WRITE)
break;
- /* fall through */
+ fallthrough;
case IB_WR_TID_RDMA_READ:
switch (prev->wr.opcode) {
case IB_WR_RDMA_READ:
@@ -5067,7 +5067,7 @@ int hfi1_make_tid_rdma_pkt(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
if (priv->s_state == TID_OP(WRITE_REQ))
hfi1_tid_rdma_restart_req(qp, wqe, &bth2);
priv->s_state = TID_OP(WRITE_DATA);
- /* fall through */
+ fallthrough;
case TID_OP(WRITE_DATA):
/*
@@ -5406,7 +5406,10 @@ static bool _hfi1_schedule_tid_send(struct rvt_qp *qp)
struct hfi1_ibport *ibp =
to_iport(qp->ibqp.device, qp->port_num);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
- struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
+ struct hfi1_devdata *dd = ppd->dd;
+
+ if ((dd->flags & HFI1_SHUTDOWN))
+ return true;
return iowait_tid_schedule(&priv->s_iowait, ppd->hfi1_wq,
priv->s_sde ?
diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index 0c77f18120ed..1fb918399da0 100644
--- a/drivers/infiniband/hw/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
@@ -216,7 +216,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
case OP(SEND_FIRST):
qp->s_state = OP(SEND_MIDDLE);
- /* FALLTHROUGH */
+ fallthrough;
case OP(SEND_MIDDLE):
len = qp->s_len;
if (len > pmtu) {
@@ -241,7 +241,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
case OP(RDMA_WRITE_FIRST):
qp->s_state = OP(RDMA_WRITE_MIDDLE);
- /* FALLTHROUGH */
+ fallthrough;
case OP(RDMA_WRITE_MIDDLE):
len = qp->s_len;
if (len > pmtu) {
@@ -414,7 +414,7 @@ send_first:
goto no_immediate_data;
else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE))
goto send_last_imm;
- /* FALLTHROUGH */
+ fallthrough;
case OP(SEND_MIDDLE):
/* Check for invalid length PMTU or posted rwqe len. */
/*
@@ -515,7 +515,7 @@ rdma_first:
wc.ex.imm_data = ohdr->u.rc.imm_data;
goto rdma_last_imm;
}
- /* FALLTHROUGH */
+ fallthrough;
case OP(RDMA_WRITE_MIDDLE):
/* Check for invalid length PMTU or posted rwqe len. */
if (unlikely(tlen != (hdrsize + pmtu + 4)))
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h
index bfa6e081cb56..d2d526c5a756 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.h
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h
@@ -91,7 +91,7 @@ static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev,
tx->mr = NULL;
tx->sde = priv->s_sde;
tx->psc = priv->s_sendcontext;
- /* so that we can test if the sdma decriptors are there */
+ /* so that we can test if the sdma descriptors are there */
tx->txreq.num_desc = 0;
/* Set the header type */
tx->phdr.hdr.hdr_type = priv->hdr_type;
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index a77fa6730b2d..da9888deff8c 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -37,9 +37,8 @@
#define DRV_NAME "hns_roce"
-/* hip08 is a pci device, it includes two version according pci version id */
-#define PCI_REVISION_ID_HIP08_A 0x20
-#define PCI_REVISION_ID_HIP08_B 0x21
+/* hip08 is a pci device */
+#define PCI_REVISION_ID_HIP08 0x21
#define HNS_ROCE_HW_VER1 ('h' << 24 | 'i' << 16 | '0' << 8 | '6')
@@ -348,20 +347,22 @@ struct hns_roce_buf_attr {
bool mtt_only; /* only alloc buffer-required MTT memory */
};
+struct hns_roce_hem_cfg {
+ dma_addr_t root_ba; /* root BA table's address */
+ bool is_direct; /* addressing without BA table */
+ unsigned int ba_pg_shift; /* BA table page shift */
+ unsigned int buf_pg_shift; /* buffer page shift */
+ unsigned int buf_pg_count; /* buffer page count */
+ struct hns_roce_buf_region region[HNS_ROCE_MAX_BT_REGION];
+ int region_count;
+};
+
/* memory translate region */
struct hns_roce_mtr {
struct hns_roce_hem_list hem_list; /* multi-hop addressing resource */
struct ib_umem *umem; /* user space buffer */
struct hns_roce_buf *kmem; /* kernel space buffer */
- struct {
- dma_addr_t root_ba; /* root BA table's address */
- bool is_direct; /* addressing without BA table */
- unsigned int ba_pg_shift; /* BA table page shift */
- unsigned int buf_pg_shift; /* buffer page shift */
- int buf_pg_count; /* buffer page count */
- struct hns_roce_buf_region region[HNS_ROCE_MAX_BT_REGION];
- unsigned int region_count;
- } hem_cfg; /* config for hardware addressing */
+ struct hns_roce_hem_cfg hem_cfg; /* config for hardware addressing */
};
struct hns_roce_mw {
@@ -898,13 +899,14 @@ struct hns_roce_hw {
int (*set_mac)(struct hns_roce_dev *hr_dev, u8 phy_port, u8 *addr);
void (*set_mtu)(struct hns_roce_dev *hr_dev, u8 phy_port,
enum ib_mtu mtu);
- int (*write_mtpt)(void *mb_buf, struct hns_roce_mr *mr,
- unsigned long mtpt_idx);
+ int (*write_mtpt)(struct hns_roce_dev *hr_dev, void *mb_buf,
+ struct hns_roce_mr *mr, unsigned long mtpt_idx);
int (*rereg_write_mtpt)(struct hns_roce_dev *hr_dev,
struct hns_roce_mr *mr, int flags, u32 pdn,
int mr_access_flags, u64 iova, u64 size,
void *mb_buf);
- int (*frmr_write_mtpt)(void *mb_buf, struct hns_roce_mr *mr);
+ int (*frmr_write_mtpt)(struct hns_roce_dev *hr_dev, void *mb_buf,
+ struct hns_roce_mr *mr);
int (*mw_write_mtpt)(void *mb_buf, struct hns_roce_mw *mw);
void (*write_cqc)(struct hns_roce_dev *hr_dev,
struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts,
@@ -1191,7 +1193,7 @@ int hns_roce_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, u64 length,
u64 virt_addr, int mr_access_flags, struct ib_pd *pd,
struct ib_udata *udata);
struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata);
+ u32 max_num_sg);
int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset);
int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
@@ -1266,6 +1268,6 @@ void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev);
int hns_roce_init(struct hns_roce_dev *hr_dev);
void hns_roce_exit(struct hns_roce_dev *hr_dev);
-int hns_roce_fill_res_entry(struct sk_buff *msg,
- struct rdma_restrack_entry *res);
+int hns_roce_fill_res_cq_entry(struct sk_buff *msg,
+ struct ib_cq *ib_cq);
#endif /* _HNS_ROCE_DEVICE_H */
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index d02207cd30df..07b4c85d341d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -1756,10 +1756,10 @@ static void hns_roce_v1_set_mtu(struct hns_roce_dev *hr_dev, u8 phy_port,
val);
}
-static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
+static int hns_roce_v1_write_mtpt(struct hns_roce_dev *hr_dev, void *mb_buf,
+ struct hns_roce_mr *mr,
unsigned long mtpt_idx)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(mr->ibmr.device);
u64 pages[HNS_ROCE_MAX_INNER_MTPT_NUM] = { 0 };
struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_v1_mpt_entry *mpt_entry;
@@ -2483,7 +2483,6 @@ static int find_wqe_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
u64 *sq_ba, u64 *rq_ba, dma_addr_t *bt_ba)
{
struct ib_device *ibdev = &hr_dev->ib_dev;
- int rq_pa_start;
int count;
count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, 0, sq_ba, 1, bt_ba);
@@ -2491,9 +2490,9 @@ static int find_wqe_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
ibdev_err(ibdev, "Failed to find SQ ba\n");
return -ENOBUFS;
}
- rq_pa_start = hr_qp->rq.offset >> hr_qp->mtr.hem_cfg.buf_pg_shift;
- count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, rq_pa_start, rq_ba, 1,
- NULL);
+
+ count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->rq.offset, rq_ba,
+ 1, NULL);
if (!count) {
ibdev_err(ibdev, "Failed to find RQ ba\n");
return -ENOBUFS;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index c597d7281629..d2968594664b 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -91,10 +91,11 @@ static u32 to_hr_opcode(u32 ib_opcode)
}
static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
- void *wqe, const struct ib_reg_wr *wr)
+ const struct ib_reg_wr *wr)
{
+ struct hns_roce_wqe_frmr_seg *fseg =
+ (void *)rc_sq_wqe + sizeof(struct hns_roce_v2_rc_send_wqe);
struct hns_roce_mr *mr = to_hr_mr(wr->mr);
- struct hns_roce_wqe_frmr_seg *fseg = wqe;
u64 pbl_ba;
/* use ib_access_flags */
@@ -128,14 +129,16 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S, 0);
}
-static void set_atomic_seg(const struct ib_send_wr *wr, void *wqe,
+static void set_atomic_seg(const struct ib_send_wr *wr,
struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
unsigned int valid_num_sge)
{
- struct hns_roce_wqe_atomic_seg *aseg;
+ struct hns_roce_v2_wqe_data_seg *dseg =
+ (void *)rc_sq_wqe + sizeof(struct hns_roce_v2_rc_send_wqe);
+ struct hns_roce_wqe_atomic_seg *aseg =
+ (void *)dseg + sizeof(struct hns_roce_v2_wqe_data_seg);
- set_data_seg_v2(wqe, wr->sg_list);
- aseg = wqe + sizeof(struct hns_roce_v2_wqe_data_seg);
+ set_data_seg_v2(dseg, wr->sg_list);
if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
aseg->fetchadd_swap_data = cpu_to_le64(atomic_wr(wr)->swap);
@@ -143,7 +146,7 @@ static void set_atomic_seg(const struct ib_send_wr *wr, void *wqe,
} else {
aseg->fetchadd_swap_data =
cpu_to_le64(atomic_wr(wr)->compare_add);
- aseg->cmp_data = 0;
+ aseg->cmp_data = 0;
}
roce_set_field(rc_sq_wqe->byte_16, V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
@@ -176,13 +179,15 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
- void *wqe, unsigned int *sge_ind,
+ unsigned int *sge_ind,
unsigned int valid_num_sge)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
- struct hns_roce_v2_wqe_data_seg *dseg = wqe;
+ struct hns_roce_v2_wqe_data_seg *dseg =
+ (void *)rc_sq_wqe + sizeof(struct hns_roce_v2_rc_send_wqe);
struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_qp *qp = to_hr_qp(ibqp);
+ void *wqe = dseg;
int j = 0;
int i;
@@ -438,7 +443,6 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp,
roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OWNER_S,
owner_bit);
- wqe += sizeof(struct hns_roce_v2_rc_send_wqe);
switch (wr->opcode) {
case IB_WR_RDMA_READ:
case IB_WR_RDMA_WRITE:
@@ -451,7 +455,7 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp,
rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey);
break;
case IB_WR_REG_MR:
- set_frmr_seg(rc_sq_wqe, wqe, reg_wr(wr));
+ set_frmr_seg(rc_sq_wqe, reg_wr(wr));
break;
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
@@ -468,10 +472,10 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp,
if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
- set_atomic_seg(wr, wqe, rc_sq_wqe, valid_num_sge);
+ set_atomic_seg(wr, rc_sq_wqe, valid_num_sge);
else if (wr->opcode != IB_WR_REG_MR)
ret = set_rwqe_data_seg(&qp->ibqp, wr, rc_sq_wqe,
- wqe, &curr_idx, valid_num_sge);
+ &curr_idx, valid_num_sge);
*sge_idx = curr_idx;
@@ -910,7 +914,7 @@ static int hns_roce_v2_rst_process_cmd(struct hns_roce_dev *hr_dev)
instance_stage = handle->rinfo.instance_state;
reset_stage = handle->rinfo.reset_state;
reset_cnt = ops->ae_dev_reset_cnt(handle);
- hw_resetting = ops->get_hw_reset_stat(handle);
+ hw_resetting = ops->get_cmdq_stat(handle);
sw_resetting = ops->ae_dev_resetting(handle);
if (reset_cnt != hr_dev->reset_cnt)
@@ -1510,8 +1514,6 @@ static int hns_roce_alloc_vf_resource(struct hns_roce_dev *hr_dev)
req_a = (struct hns_roce_vf_res_a *)desc[0].data;
req_b = (struct hns_roce_vf_res_b *)desc[1].data;
- memset(req_a, 0, sizeof(*req_a));
- memset(req_b, 0, sizeof(*req_b));
for (i = 0; i < 2; i++) {
hns_roce_cmq_setup_basic_desc(&desc[i],
HNS_ROCE_OPC_ALLOC_VF_RES, false);
@@ -1744,27 +1746,25 @@ static void set_default_caps(struct hns_roce_dev *hr_dev)
caps->max_srq_wrs = HNS_ROCE_V2_MAX_SRQ_WR;
caps->max_srq_sges = HNS_ROCE_V2_MAX_SRQ_SGE;
- if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP08_B) {
- caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC | HNS_ROCE_CAP_FLAG_MW |
- HNS_ROCE_CAP_FLAG_SRQ | HNS_ROCE_CAP_FLAG_FRMR |
- HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL;
+ caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC | HNS_ROCE_CAP_FLAG_MW |
+ HNS_ROCE_CAP_FLAG_SRQ | HNS_ROCE_CAP_FLAG_FRMR |
+ HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL;
- caps->num_qpc_timer = HNS_ROCE_V2_MAX_QPC_TIMER_NUM;
- caps->qpc_timer_entry_sz = HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ;
- caps->qpc_timer_ba_pg_sz = 0;
- caps->qpc_timer_buf_pg_sz = 0;
- caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
- caps->num_cqc_timer = HNS_ROCE_V2_MAX_CQC_TIMER_NUM;
- caps->cqc_timer_entry_sz = HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ;
- caps->cqc_timer_ba_pg_sz = 0;
- caps->cqc_timer_buf_pg_sz = 0;
- caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
+ caps->num_qpc_timer = HNS_ROCE_V2_MAX_QPC_TIMER_NUM;
+ caps->qpc_timer_entry_sz = HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ;
+ caps->qpc_timer_ba_pg_sz = 0;
+ caps->qpc_timer_buf_pg_sz = 0;
+ caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
+ caps->num_cqc_timer = HNS_ROCE_V2_MAX_CQC_TIMER_NUM;
+ caps->cqc_timer_entry_sz = HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ;
+ caps->cqc_timer_ba_pg_sz = 0;
+ caps->cqc_timer_buf_pg_sz = 0;
+ caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
- caps->sccc_entry_sz = HNS_ROCE_V2_SCCC_ENTRY_SZ;
- caps->sccc_ba_pg_sz = 0;
- caps->sccc_buf_pg_sz = 0;
- caps->sccc_hop_num = HNS_ROCE_SCCC_HOP_NUM;
- }
+ caps->sccc_entry_sz = HNS_ROCE_V2_SCCC_ENTRY_SZ;
+ caps->sccc_ba_pg_sz = 0;
+ caps->sccc_buf_pg_sz = 0;
+ caps->sccc_hop_num = HNS_ROCE_SCCC_HOP_NUM;
}
static void calc_pg_sz(int obj_num, int obj_size, int hop_num, int ctx_bt_num,
@@ -1995,20 +1995,18 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
caps->srqc_bt_num, &caps->srqc_buf_pg_sz,
&caps->srqc_ba_pg_sz, HEM_TYPE_SRQC);
- if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP08_B) {
- caps->sccc_hop_num = ctx_hop_num;
- caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
- caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
+ caps->sccc_hop_num = ctx_hop_num;
+ caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
+ caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
- calc_pg_sz(caps->num_qps, caps->sccc_entry_sz,
- caps->sccc_hop_num, caps->sccc_bt_num,
- &caps->sccc_buf_pg_sz, &caps->sccc_ba_pg_sz,
- HEM_TYPE_SCCC);
- calc_pg_sz(caps->num_cqc_timer, caps->cqc_timer_entry_sz,
- caps->cqc_timer_hop_num, caps->cqc_timer_bt_num,
- &caps->cqc_timer_buf_pg_sz,
- &caps->cqc_timer_ba_pg_sz, HEM_TYPE_CQC_TIMER);
- }
+ calc_pg_sz(caps->num_qps, caps->sccc_entry_sz,
+ caps->sccc_hop_num, caps->sccc_bt_num,
+ &caps->sccc_buf_pg_sz, &caps->sccc_ba_pg_sz,
+ HEM_TYPE_SCCC);
+ calc_pg_sz(caps->num_cqc_timer, caps->cqc_timer_entry_sz,
+ caps->cqc_timer_hop_num, caps->cqc_timer_bt_num,
+ &caps->cqc_timer_buf_pg_sz,
+ &caps->cqc_timer_ba_pg_sz, HEM_TYPE_CQC_TIMER);
calc_pg_sz(caps->num_cqe_segs, caps->mtt_entry_sz, caps->cqe_hop_num,
1, &caps->cqe_buf_pg_sz, &caps->cqe_ba_pg_sz, HEM_TYPE_CQE);
@@ -2055,22 +2053,19 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
return ret;
}
- if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP08_B) {
- ret = hns_roce_query_pf_timer_resource(hr_dev);
- if (ret) {
- dev_err(hr_dev->dev,
- "Query pf timer resource fail, ret = %d.\n",
- ret);
- return ret;
- }
+ ret = hns_roce_query_pf_timer_resource(hr_dev);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "failed to query pf timer resource, ret = %d.\n", ret);
+ return ret;
+ }
- ret = hns_roce_set_vf_switch_param(hr_dev, 0);
- if (ret) {
- dev_err(hr_dev->dev,
- "Set function switch param fail, ret = %d.\n",
- ret);
- return ret;
- }
+ ret = hns_roce_set_vf_switch_param(hr_dev, 0);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "failed to set function switch param, ret = %d.\n",
+ ret);
+ return ret;
}
hr_dev->vendor_part_id = hr_dev->pci_dev->device;
@@ -2336,8 +2331,7 @@ static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev)
{
struct hns_roce_v2_priv *priv = hr_dev->priv;
- if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP08_B)
- hns_roce_function_clear(hr_dev);
+ hns_roce_function_clear(hr_dev);
hns_roce_free_link_table(hr_dev, &priv->tpq);
hns_roce_free_link_table(hr_dev, &priv->tsq);
@@ -2529,10 +2523,10 @@ static int hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port,
return hns_roce_cmq_send(hr_dev, &desc, 1);
}
-static int set_mtpt_pbl(struct hns_roce_v2_mpt_entry *mpt_entry,
+static int set_mtpt_pbl(struct hns_roce_dev *hr_dev,
+ struct hns_roce_v2_mpt_entry *mpt_entry,
struct hns_roce_mr *mr)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(mr->ibmr.device);
u64 pages[HNS_ROCE_V2_MAX_INNER_MTPT_NUM] = { 0 };
struct ib_device *ibdev = &hr_dev->ib_dev;
dma_addr_t pbl_ba;
@@ -2571,7 +2565,8 @@ static int set_mtpt_pbl(struct hns_roce_v2_mpt_entry *mpt_entry,
return 0;
}
-static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
+static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev,
+ void *mb_buf, struct hns_roce_mr *mr,
unsigned long mtpt_idx)
{
struct hns_roce_v2_mpt_entry *mpt_entry;
@@ -2620,7 +2615,7 @@ static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
if (mr->type == MR_TYPE_DMA)
return 0;
- ret = set_mtpt_pbl(mpt_entry, mr);
+ ret = set_mtpt_pbl(hr_dev, mpt_entry, mr);
return ret;
}
@@ -2666,15 +2661,15 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev,
mr->iova = iova;
mr->size = size;
- ret = set_mtpt_pbl(mpt_entry, mr);
+ ret = set_mtpt_pbl(hr_dev, mpt_entry, mr);
}
return ret;
}
-static int hns_roce_v2_frmr_write_mtpt(void *mb_buf, struct hns_roce_mr *mr)
+static int hns_roce_v2_frmr_write_mtpt(struct hns_roce_dev *hr_dev,
+ void *mb_buf, struct hns_roce_mr *mr)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(mr->ibmr.device);
struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_v2_mpt_entry *mpt_entry;
dma_addr_t pbl_ba = 0;
@@ -3052,6 +3047,7 @@ static void get_cqe_status(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp,
IB_WC_RETRY_EXC_ERR },
{ HNS_ROCE_CQE_V2_RNR_RETRY_EXC_ERR, IB_WC_RNR_RETRY_EXC_ERR },
{ HNS_ROCE_CQE_V2_REMOTE_ABORT_ERR, IB_WC_REM_ABORT_ERR },
+ { HNS_ROCE_CQE_V2_GENERAL_ERR, IB_WC_GENERAL_ERR}
};
u32 cqe_status = roce_get_field(cqe->byte_4, V2_CQE_BYTE_4_STATUS_M,
@@ -3074,6 +3070,14 @@ static void get_cqe_status(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp,
sizeof(*cqe), false);
/*
+ * For hns ROCEE, GENERAL_ERR is an error type that is not defined in
+ * the standard protocol, the driver must ignore it and needn't to set
+ * the QP to an error state.
+ */
+ if (cqe_status == HNS_ROCE_CQE_V2_GENERAL_ERR)
+ return;
+
+ /*
* Hip08 hardware cannot flush the WQEs in SQ/RQ if the QP state gets
* into errored mode. Hence, as a workaround to this hardware
* limitation, driver needs to assist in flushing. But the flushing
@@ -3169,51 +3173,51 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
/* SQ corresponding to CQE */
switch (roce_get_field(cqe->byte_4, V2_CQE_BYTE_4_OPCODE_M,
V2_CQE_BYTE_4_OPCODE_S) & 0x1f) {
- case HNS_ROCE_SQ_OPCODE_SEND:
+ case HNS_ROCE_V2_WQE_OP_SEND:
wc->opcode = IB_WC_SEND;
break;
- case HNS_ROCE_SQ_OPCODE_SEND_WITH_INV:
+ case HNS_ROCE_V2_WQE_OP_SEND_WITH_INV:
wc->opcode = IB_WC_SEND;
break;
- case HNS_ROCE_SQ_OPCODE_SEND_WITH_IMM:
+ case HNS_ROCE_V2_WQE_OP_SEND_WITH_IMM:
wc->opcode = IB_WC_SEND;
wc->wc_flags |= IB_WC_WITH_IMM;
break;
- case HNS_ROCE_SQ_OPCODE_RDMA_READ:
+ case HNS_ROCE_V2_WQE_OP_RDMA_READ:
wc->opcode = IB_WC_RDMA_READ;
wc->byte_len = le32_to_cpu(cqe->byte_cnt);
break;
- case HNS_ROCE_SQ_OPCODE_RDMA_WRITE:
+ case HNS_ROCE_V2_WQE_OP_RDMA_WRITE:
wc->opcode = IB_WC_RDMA_WRITE;
break;
- case HNS_ROCE_SQ_OPCODE_RDMA_WRITE_WITH_IMM:
+ case HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM:
wc->opcode = IB_WC_RDMA_WRITE;
wc->wc_flags |= IB_WC_WITH_IMM;
break;
- case HNS_ROCE_SQ_OPCODE_LOCAL_INV:
+ case HNS_ROCE_V2_WQE_OP_LOCAL_INV:
wc->opcode = IB_WC_LOCAL_INV;
wc->wc_flags |= IB_WC_WITH_INVALIDATE;
break;
- case HNS_ROCE_SQ_OPCODE_ATOMIC_COMP_AND_SWAP:
+ case HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP:
wc->opcode = IB_WC_COMP_SWAP;
wc->byte_len = 8;
break;
- case HNS_ROCE_SQ_OPCODE_ATOMIC_FETCH_AND_ADD:
+ case HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD:
wc->opcode = IB_WC_FETCH_ADD;
wc->byte_len = 8;
break;
- case HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_COMP_AND_SWAP:
+ case HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP:
wc->opcode = IB_WC_MASKED_COMP_SWAP;
wc->byte_len = 8;
break;
- case HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_FETCH_AND_ADD:
+ case HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD:
wc->opcode = IB_WC_MASKED_FETCH_ADD;
wc->byte_len = 8;
break;
- case HNS_ROCE_SQ_OPCODE_FAST_REG_WR:
+ case HNS_ROCE_V2_WQE_OP_FAST_REG_PMR:
wc->opcode = IB_WC_REG_MR;
break;
- case HNS_ROCE_SQ_OPCODE_BIND_MW:
+ case HNS_ROCE_V2_WQE_OP_BIND_MW:
wc->opcode = IB_WC_REG_MR;
break;
default:
@@ -3373,11 +3377,33 @@ static int get_op_for_set_hem(struct hns_roce_dev *hr_dev, u32 type,
return op + step_idx;
}
+static int set_hem_to_hw(struct hns_roce_dev *hr_dev, int obj, u64 bt_ba,
+ u32 hem_type, int step_idx)
+{
+ struct hns_roce_cmd_mailbox *mailbox;
+ int ret;
+ int op;
+
+ op = get_op_for_set_hem(hr_dev, hem_type, step_idx);
+ if (op < 0)
+ return 0;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ ret = hns_roce_cmd_mbox(hr_dev, bt_ba, mailbox->dma, obj,
+ 0, op, HNS_ROCE_CMD_TIMEOUT_MSECS);
+
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+
+ return ret;
+}
+
static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table, int obj,
int step_idx)
{
- struct hns_roce_cmd_mailbox *mailbox;
struct hns_roce_hem_iter iter;
struct hns_roce_hem_mhop mhop;
struct hns_roce_hem *hem;
@@ -3389,7 +3415,6 @@ static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev,
u64 bt_ba = 0;
u32 chunk_ba_num;
u32 hop_num;
- int op;
if (!hns_roce_check_whether_mhop(hr_dev, table->type))
return 0;
@@ -3411,14 +3436,6 @@ static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev,
hem_idx = i;
}
- op = get_op_for_set_hem(hr_dev, table->type, step_idx);
- if (op == -EINVAL)
- return 0;
-
- mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
- if (IS_ERR(mailbox))
- return PTR_ERR(mailbox);
-
if (table->type == HEM_TYPE_SCCC)
obj = mhop.l0_idx;
@@ -3427,11 +3444,8 @@ static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev,
for (hns_roce_hem_first(hem, &iter);
!hns_roce_hem_last(&iter); hns_roce_hem_next(&iter)) {
bt_ba = hns_roce_hem_addr(&iter);
-
- /* configure the ba, tag, and op */
- ret = hns_roce_cmd_mbox(hr_dev, bt_ba, mailbox->dma,
- obj, 0, op,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
+ ret = set_hem_to_hw(hr_dev, obj, bt_ba, table->type,
+ step_idx);
}
} else {
if (step_idx == 0)
@@ -3439,12 +3453,9 @@ static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev,
else if (step_idx == 1 && hop_num == 2)
bt_ba = table->bt_l1_dma_addr[l1_idx];
- /* configure the ba, tag, and op */
- ret = hns_roce_cmd_mbox(hr_dev, bt_ba, mailbox->dma, obj,
- 0, op, HNS_ROCE_CMD_TIMEOUT_MSECS);
+ ret = set_hem_to_hw(hr_dev, obj, bt_ba, table->type, step_idx);
}
- hns_roce_free_cmd_mailbox(hr_dev, mailbox);
return ret;
}
@@ -3744,51 +3755,23 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
}
}
-static bool check_wqe_rq_mtt_count(struct hns_roce_dev *hr_dev,
- struct hns_roce_qp *hr_qp, int mtt_cnt,
- u32 page_size)
-{
- struct ib_device *ibdev = &hr_dev->ib_dev;
-
- if (hr_qp->rq.wqe_cnt < 1)
- return true;
-
- if (mtt_cnt < 1) {
- ibdev_err(ibdev, "failed to find RQWQE buf ba of QP(0x%lx)\n",
- hr_qp->qpn);
- return false;
- }
-
- if (mtt_cnt < MTT_MIN_COUNT &&
- (hr_qp->rq.offset + page_size) < hr_qp->buff_size) {
- ibdev_err(ibdev,
- "failed to find next RQWQE buf ba of QP(0x%lx)\n",
- hr_qp->qpn);
- return false;
- }
-
- return true;
-}
-
static int config_qp_rq_buf(struct hns_roce_dev *hr_dev,
struct hns_roce_qp *hr_qp,
struct hns_roce_v2_qp_context *context,
struct hns_roce_v2_qp_context *qpc_mask)
{
- struct ib_qp *ibqp = &hr_qp->ibqp;
u64 mtts[MTT_MIN_COUNT] = { 0 };
u64 wqe_sge_ba;
- u32 page_size;
int count;
/* Search qp buf's mtts */
- page_size = 1 << hr_qp->mtr.hem_cfg.buf_pg_shift;
- count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr,
- hr_qp->rq.offset / page_size, mtts,
+ count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->rq.offset, mtts,
MTT_MIN_COUNT, &wqe_sge_ba);
- if (!ibqp->srq)
- if (!check_wqe_rq_mtt_count(hr_dev, hr_qp, count, page_size))
- return -EINVAL;
+ if (hr_qp->rq.wqe_cnt && count < 1) {
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to find RQ WQE, QPN = 0x%lx.\n", hr_qp->qpn);
+ return -EINVAL;
+ }
context->wqe_sge_ba = cpu_to_le32(wqe_sge_ba >> 3);
qpc_mask->wqe_sge_ba = 0;
@@ -3890,7 +3873,6 @@ static int config_qp_sq_buf(struct hns_roce_dev *hr_dev,
struct ib_device *ibdev = &hr_dev->ib_dev;
u64 sge_cur_blk = 0;
u64 sq_cur_blk = 0;
- u32 page_size;
int count;
/* search qp buf's mtts */
@@ -3901,9 +3883,8 @@ static int config_qp_sq_buf(struct hns_roce_dev *hr_dev,
return -EINVAL;
}
if (hr_qp->sge.sge_cnt > 0) {
- page_size = 1 << hr_qp->mtr.hem_cfg.buf_pg_shift;
count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr,
- hr_qp->sge.offset / page_size,
+ hr_qp->sge.offset,
&sge_cur_blk, 1, NULL);
if (count < 1) {
ibdev_err(ibdev, "failed to find QP(0x%lx) SGE buf.\n",
@@ -3953,6 +3934,15 @@ static int config_qp_sq_buf(struct hns_roce_dev *hr_dev,
return 0;
}
+static inline enum ib_mtu get_mtu(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr)
+{
+ if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_UD)
+ return IB_MTU_4096;
+
+ return attr->path_mtu;
+}
+
static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
const struct ib_qp_attr *attr, int attr_mask,
struct hns_roce_v2_qp_context *context,
@@ -3964,6 +3954,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
struct ib_device *ibdev = &hr_dev->ib_dev;
dma_addr_t trrl_ba;
dma_addr_t irrl_ba;
+ enum ib_mtu mtu;
u8 port_num;
u64 *mtts;
u8 *dmac;
@@ -4061,23 +4052,23 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
roce_set_field(qpc_mask->byte_52_udpspn_dmac, V2_QPC_BYTE_52_DMAC_M,
V2_QPC_BYTE_52_DMAC_S, 0);
- /* mtu*(2^LP_PKTN_INI) should not bigger than 1 message length 64kb */
+ mtu = get_mtu(ibqp, attr);
+
+ if (attr_mask & IB_QP_PATH_MTU) {
+ roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M,
+ V2_QPC_BYTE_24_MTU_S, mtu);
+ roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M,
+ V2_QPC_BYTE_24_MTU_S, 0);
+ }
+
+#define MAX_LP_MSG_LEN 65536
+ /* MTU*(2^LP_PKTN_INI) shouldn't be bigger than 64kb */
roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_LP_PKTN_INI_M,
V2_QPC_BYTE_56_LP_PKTN_INI_S,
- ilog2(hr_dev->caps.max_sq_inline / IB_MTU_4096));
+ ilog2(MAX_LP_MSG_LEN / ib_mtu_enum_to_int(mtu)));
roce_set_field(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_LP_PKTN_INI_M,
V2_QPC_BYTE_56_LP_PKTN_INI_S, 0);
- if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_UD)
- roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M,
- V2_QPC_BYTE_24_MTU_S, IB_MTU_4096);
- else if (attr_mask & IB_QP_PATH_MTU)
- roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M,
- V2_QPC_BYTE_24_MTU_S, attr->path_mtu);
-
- roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M,
- V2_QPC_BYTE_24_MTU_S, 0);
-
roce_set_bit(qpc_mask->byte_108_rx_reqepsn,
V2_QPC_BYTE_108_RX_REQ_PSN_ERR_S, 0);
roce_set_field(qpc_mask->byte_96_rx_reqmsn, V2_QPC_BYTE_96_RX_REQ_MSN_M,
@@ -4254,12 +4245,13 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp,
roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_HOP_LIMIT_M,
V2_QPC_BYTE_24_HOP_LIMIT_S, 0);
- if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP08_B && is_udp)
+ if (is_udp)
roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
V2_QPC_BYTE_24_TC_S, grh->traffic_class >> 2);
else
roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
V2_QPC_BYTE_24_TC_S, grh->traffic_class);
+
roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
V2_QPC_BYTE_24_TC_S, 0);
roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M,
@@ -4290,7 +4282,9 @@ static bool check_qp_state(enum ib_qp_state cur_state,
[IB_QPS_RTR] = { [IB_QPS_RESET] = true,
[IB_QPS_RTS] = true,
[IB_QPS_ERR] = true },
- [IB_QPS_RTS] = { [IB_QPS_RESET] = true, [IB_QPS_ERR] = true },
+ [IB_QPS_RTS] = { [IB_QPS_RESET] = true,
+ [IB_QPS_RTS] = true,
+ [IB_QPS_ERR] = true },
[IB_QPS_SQD] = {},
[IB_QPS_SQE] = {},
[IB_QPS_ERR] = { [IB_QPS_RESET] = true, [IB_QPS_ERR] = true }
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index e176b0aaa4ac..1fb1c583d0f8 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -179,27 +179,11 @@ enum {
HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD = 0x9,
HNS_ROCE_V2_WQE_OP_FAST_REG_PMR = 0xa,
HNS_ROCE_V2_WQE_OP_LOCAL_INV = 0xb,
- HNS_ROCE_V2_WQE_OP_BIND_MW_TYPE = 0xc,
+ HNS_ROCE_V2_WQE_OP_BIND_MW = 0xc,
HNS_ROCE_V2_WQE_OP_MASK = 0x1f,
};
enum {
- HNS_ROCE_SQ_OPCODE_SEND = 0x0,
- HNS_ROCE_SQ_OPCODE_SEND_WITH_INV = 0x1,
- HNS_ROCE_SQ_OPCODE_SEND_WITH_IMM = 0x2,
- HNS_ROCE_SQ_OPCODE_RDMA_WRITE = 0x3,
- HNS_ROCE_SQ_OPCODE_RDMA_WRITE_WITH_IMM = 0x4,
- HNS_ROCE_SQ_OPCODE_RDMA_READ = 0x5,
- HNS_ROCE_SQ_OPCODE_ATOMIC_COMP_AND_SWAP = 0x6,
- HNS_ROCE_SQ_OPCODE_ATOMIC_FETCH_AND_ADD = 0x7,
- HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_COMP_AND_SWAP = 0x8,
- HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_FETCH_AND_ADD = 0x9,
- HNS_ROCE_SQ_OPCODE_FAST_REG_WR = 0xa,
- HNS_ROCE_SQ_OPCODE_LOCAL_INV = 0xb,
- HNS_ROCE_SQ_OPCODE_BIND_MW = 0xc,
-};
-
-enum {
/* rq operations */
HNS_ROCE_V2_OPCODE_RDMA_WRITE_IMM = 0x0,
HNS_ROCE_V2_OPCODE_SEND = 0x1,
@@ -230,6 +214,7 @@ enum {
HNS_ROCE_CQE_V2_TRANSPORT_RETRY_EXC_ERR = 0x15,
HNS_ROCE_CQE_V2_RNR_RETRY_EXC_ERR = 0x16,
HNS_ROCE_CQE_V2_REMOTE_ABORT_ERR = 0x22,
+ HNS_ROCE_CQE_V2_GENERAL_ERR = 0x23,
HNS_ROCE_V2_CQE_STATUS_MASK = 0xff,
};
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 50763cf4fa3d..5907cfd878a6 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -428,7 +428,7 @@ static const struct ib_device_ops hns_roce_dev_ops = {
.destroy_ah = hns_roce_destroy_ah,
.destroy_cq = hns_roce_destroy_cq,
.disassociate_ucontext = hns_roce_disassociate_ucontext,
- .fill_res_entry = hns_roce_fill_res_entry,
+ .fill_res_cq_entry = hns_roce_fill_res_cq_entry,
.get_dma_mr = hns_roce_get_dma_mr,
.get_link_layer = hns_roce_get_link_layer,
.get_port_immutable = hns_roce_port_immutable,
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 4c0bbb12770d..e5df3884b41d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -120,7 +120,7 @@ static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr,
mr->pbl_hop_num = is_fast ? 1 : hr_dev->caps.pbl_hop_num;
buf_attr.page_shift = is_fast ? PAGE_SHIFT :
- hr_dev->caps.pbl_buf_pg_sz + HNS_HW_PAGE_SHIFT;
+ hr_dev->caps.pbl_buf_pg_sz + PAGE_SHIFT;
buf_attr.region[0].size = length;
buf_attr.region[0].hopnum = mr->pbl_hop_num;
buf_attr.region_count = 1;
@@ -180,9 +180,10 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev,
}
if (mr->type != MR_TYPE_FRMR)
- ret = hr_dev->hw->write_mtpt(mailbox->buf, mr, mtpt_idx);
+ ret = hr_dev->hw->write_mtpt(hr_dev, mailbox->buf, mr,
+ mtpt_idx);
else
- ret = hr_dev->hw->frmr_write_mtpt(mailbox->buf, mr);
+ ret = hr_dev->hw->frmr_write_mtpt(hr_dev, mailbox->buf, mr);
if (ret) {
dev_err(dev, "Write mtpt fail!\n");
goto err_page;
@@ -414,7 +415,7 @@ int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
}
struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata)
+ u32 max_num_sg)
{
struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
struct device *dev = hr_dev->dev;
@@ -870,6 +871,15 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
int err;
int i;
+ /*
+ * Only use the first page address as root ba when hopnum is 0, this
+ * is because the addresses of all pages are consecutive in this case.
+ */
+ if (mtr->hem_cfg.is_direct) {
+ mtr->hem_cfg.root_ba = pages[0];
+ return 0;
+ }
+
for (i = 0; i < mtr->hem_cfg.region_count; i++) {
r = &mtr->hem_cfg.region[i];
if (r->offset + r->count > page_cnt) {
@@ -895,6 +905,8 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr)
{
+ struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg;
+ int start_index;
int mtt_count;
int total = 0;
__le64 *mtts;
@@ -906,26 +918,32 @@ int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
goto done;
/* no mtt memory in direct mode, so just return the buffer address */
- if (mtr->hem_cfg.is_direct) {
- npage = offset;
- for (total = 0; total < mtt_max; total++, npage++) {
- addr = mtr->hem_cfg.root_ba +
- (npage << mtr->hem_cfg.buf_pg_shift);
-
+ if (cfg->is_direct) {
+ start_index = offset >> HNS_HW_PAGE_SHIFT;
+ for (mtt_count = 0; mtt_count < cfg->region_count &&
+ total < mtt_max; mtt_count++) {
+ npage = cfg->region[mtt_count].offset;
+ if (npage < start_index)
+ continue;
+
+ addr = cfg->root_ba + (npage << HNS_HW_PAGE_SHIFT);
if (hr_dev->hw_rev == HNS_ROCE_HW_VER1)
mtt_buf[total] = to_hr_hw_page_addr(addr);
else
mtt_buf[total] = addr;
+
+ total++;
}
goto done;
}
+ start_index = offset >> cfg->buf_pg_shift;
left = mtt_max;
while (left > 0) {
mtt_count = 0;
mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list,
- offset + total,
+ start_index + total,
&mtt_count, NULL);
if (!mtts || !mtt_count)
goto done;
@@ -938,104 +956,136 @@ int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
done:
if (base_addr)
- *base_addr = mtr->hem_cfg.root_ba;
+ *base_addr = cfg->root_ba;
return total;
}
-/* convert buffer size to page index and page count */
-static unsigned int mtr_init_region(struct hns_roce_buf_attr *attr,
- int page_cnt,
- struct hns_roce_buf_region *regions,
- int region_cnt, unsigned int page_shift)
+static int mtr_init_buf_cfg(struct hns_roce_dev *hr_dev,
+ struct hns_roce_buf_attr *attr,
+ struct hns_roce_hem_cfg *cfg,
+ unsigned int *buf_page_shift)
{
- unsigned int page_size = 1 << page_shift;
- int max_region = attr->region_count;
struct hns_roce_buf_region *r;
- unsigned int i = 0;
- int page_idx = 0;
-
- for (; i < region_cnt && i < max_region && page_idx < page_cnt; i++) {
- r = &regions[i];
- r->hopnum = attr->region[i].hopnum == HNS_ROCE_HOP_NUM_0 ?
- 0 : attr->region[i].hopnum;
- r->offset = page_idx;
- r->count = DIV_ROUND_UP(attr->region[i].size, page_size);
- page_idx += r->count;
+ unsigned int page_shift = 0;
+ int page_cnt = 0;
+ size_t buf_size;
+ int region_cnt;
+
+ if (cfg->is_direct) {
+ buf_size = cfg->buf_pg_count << cfg->buf_pg_shift;
+ page_cnt = DIV_ROUND_UP(buf_size, HNS_HW_PAGE_SIZE);
+ /*
+ * When HEM buffer use level-0 addressing, the page size equals
+ * the buffer size, and the the page size = 4K * 2^N.
+ */
+ cfg->buf_pg_shift = HNS_HW_PAGE_SHIFT + order_base_2(page_cnt);
+ if (attr->region_count > 1) {
+ cfg->buf_pg_count = page_cnt;
+ page_shift = HNS_HW_PAGE_SHIFT;
+ } else {
+ cfg->buf_pg_count = 1;
+ page_shift = cfg->buf_pg_shift;
+ if (buf_size != 1 << page_shift) {
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to check direct size %zu shift %d.\n",
+ buf_size, page_shift);
+ return -EINVAL;
+ }
+ }
+ } else {
+ page_shift = cfg->buf_pg_shift;
+ }
+
+ /* convert buffer size to page index and page count */
+ for (page_cnt = 0, region_cnt = 0; page_cnt < cfg->buf_pg_count &&
+ region_cnt < attr->region_count &&
+ region_cnt < ARRAY_SIZE(cfg->region); region_cnt++) {
+ r = &cfg->region[region_cnt];
+ r->offset = page_cnt;
+ buf_size = hr_hw_page_align(attr->region[region_cnt].size);
+ r->count = DIV_ROUND_UP(buf_size, 1 << page_shift);
+ page_cnt += r->count;
+ r->hopnum = to_hr_hem_hopnum(attr->region[region_cnt].hopnum,
+ r->count);
+ }
+
+ if (region_cnt < 1) {
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to check mtr region count, pages = %d.\n",
+ cfg->buf_pg_count);
+ return -ENOBUFS;
}
- return i;
+ cfg->region_count = region_cnt;
+ *buf_page_shift = page_shift;
+
+ return page_cnt;
}
/**
* hns_roce_mtr_create - Create hns memory translate region.
*
* @mtr: memory translate region
- * @init_attr: init attribute for creating mtr
- * @page_shift: page shift for multi-hop base address table
+ * @buf_attr: buffer attribute for creating mtr
+ * @ba_page_shift: page shift for multi-hop base address table
* @udata: user space context, if it's NULL, means kernel space
* @user_addr: userspace virtual address to start at
- * @buf_alloced: mtr has private buffer, true means need to alloc
*/
int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
struct hns_roce_buf_attr *buf_attr,
- unsigned int page_shift, struct ib_udata *udata,
+ unsigned int ba_page_shift, struct ib_udata *udata,
unsigned long user_addr)
{
+ struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg;
struct ib_device *ibdev = &hr_dev->ib_dev;
+ unsigned int buf_page_shift = 0;
dma_addr_t *pages = NULL;
- int region_cnt = 0;
int all_pg_cnt;
int get_pg_cnt;
- bool has_mtt;
- int err = 0;
+ int ret = 0;
+
+ /* if disable mtt, all pages must in a continuous address range */
+ cfg->is_direct = !mtr_has_mtt(buf_attr);
- has_mtt = mtr_has_mtt(buf_attr);
/* if buffer only need mtt, just init the hem cfg */
if (buf_attr->mtt_only) {
- mtr->hem_cfg.buf_pg_shift = buf_attr->page_shift;
- mtr->hem_cfg.buf_pg_count = mtr_bufs_size(buf_attr) >>
- buf_attr->page_shift;
+ cfg->buf_pg_shift = buf_attr->page_shift;
+ cfg->buf_pg_count = mtr_bufs_size(buf_attr) >>
+ buf_attr->page_shift;
mtr->umem = NULL;
mtr->kmem = NULL;
} else {
- err = mtr_alloc_bufs(hr_dev, mtr, buf_attr, !has_mtt, udata,
- user_addr);
- if (err) {
- ibdev_err(ibdev, "Failed to alloc mtr bufs, err %d\n",
- err);
- return err;
+ ret = mtr_alloc_bufs(hr_dev, mtr, buf_attr, cfg->is_direct,
+ udata, user_addr);
+ if (ret) {
+ ibdev_err(ibdev,
+ "failed to alloc mtr bufs, ret = %d.\n", ret);
+ return ret;
}
}
- /* alloc mtt memory */
- all_pg_cnt = mtr->hem_cfg.buf_pg_count;
- hns_roce_hem_list_init(&mtr->hem_list);
- mtr->hem_cfg.is_direct = !has_mtt;
- mtr->hem_cfg.ba_pg_shift = page_shift;
- mtr->hem_cfg.region_count = 0;
- region_cnt = mtr_init_region(buf_attr, all_pg_cnt,
- mtr->hem_cfg.region,
- ARRAY_SIZE(mtr->hem_cfg.region),
- mtr->hem_cfg.buf_pg_shift);
- if (region_cnt < 1) {
- err = -ENOBUFS;
- ibdev_err(ibdev, "failed to init mtr region %d\n", region_cnt);
+ all_pg_cnt = mtr_init_buf_cfg(hr_dev, buf_attr, cfg, &buf_page_shift);
+ if (all_pg_cnt < 1) {
+ ret = -ENOBUFS;
+ ibdev_err(ibdev, "failed to init mtr buf cfg.\n");
goto err_alloc_bufs;
}
- mtr->hem_cfg.region_count = region_cnt;
-
- if (has_mtt) {
- err = hns_roce_hem_list_request(hr_dev, &mtr->hem_list,
- mtr->hem_cfg.region, region_cnt,
- page_shift);
- if (err) {
- ibdev_err(ibdev, "Failed to request mtr hem, err %d\n",
- err);
+ hns_roce_hem_list_init(&mtr->hem_list);
+ if (!cfg->is_direct) {
+ ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list,
+ cfg->region, cfg->region_count,
+ ba_page_shift);
+ if (ret) {
+ ibdev_err(ibdev, "failed to request mtr hem, ret = %d.\n",
+ ret);
goto err_alloc_bufs;
}
- mtr->hem_cfg.root_ba = mtr->hem_list.root_ba;
+ cfg->root_ba = mtr->hem_list.root_ba;
+ cfg->ba_pg_shift = ba_page_shift;
+ } else {
+ cfg->ba_pg_shift = cfg->buf_pg_shift;
}
/* no buffer to map */
@@ -1045,31 +1095,26 @@ int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
/* alloc a tmp array to store buffer's dma address */
pages = kvcalloc(all_pg_cnt, sizeof(dma_addr_t), GFP_KERNEL);
if (!pages) {
- err = -ENOMEM;
- ibdev_err(ibdev, "Failed to alloc mtr page list %d\n",
+ ret = -ENOMEM;
+ ibdev_err(ibdev, "failed to alloc mtr page list %d.\n",
all_pg_cnt);
goto err_alloc_hem_list;
}
get_pg_cnt = mtr_get_pages(hr_dev, mtr, pages, all_pg_cnt,
- mtr->hem_cfg.buf_pg_shift);
+ buf_page_shift);
if (get_pg_cnt != all_pg_cnt) {
- ibdev_err(ibdev, "Failed to get mtr page %d != %d\n",
+ ibdev_err(ibdev, "failed to get mtr page %d != %d.\n",
get_pg_cnt, all_pg_cnt);
- err = -ENOBUFS;
+ ret = -ENOBUFS;
goto err_alloc_page_list;
}
- if (!has_mtt) {
- mtr->hem_cfg.root_ba = pages[0];
- } else {
- /* write buffer's dma address to BA table */
- err = hns_roce_mtr_map(hr_dev, mtr, pages, all_pg_cnt);
- if (err) {
- ibdev_err(ibdev, "Failed to map mtr pages, err %d\n",
- err);
- goto err_alloc_page_list;
- }
+ /* write buffer's dma address to BA table */
+ ret = hns_roce_mtr_map(hr_dev, mtr, pages, all_pg_cnt);
+ if (ret) {
+ ibdev_err(ibdev, "failed to map mtr pages, ret = %d.\n", ret);
+ goto err_alloc_page_list;
}
/* drop tmp array */
@@ -1081,7 +1126,7 @@ err_alloc_hem_list:
hns_roce_hem_list_release(hr_dev, &mtr->hem_list);
err_alloc_bufs:
mtr_free_bufs(hr_dev, mtr);
- return err;
+ return ret;
}
void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr)
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index a0a47bd66975..e94ca130ff5e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -411,7 +411,6 @@ static int set_extend_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt,
struct hns_roce_qp *hr_qp,
struct ib_qp_cap *cap)
{
- struct ib_device *ibdev = &hr_dev->ib_dev;
u32 cnt;
cnt = max(1U, cap->max_send_sge);
@@ -431,15 +430,6 @@ static int set_extend_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt,
} else if (hr_qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) {
cnt = roundup_pow_of_two(sq_wqe_cnt *
(hr_qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE));
-
- if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08_A) {
- if (cnt > hr_dev->caps.max_extend_sg) {
- ibdev_err(ibdev,
- "failed to check exSGE num, exSGE num = %d.\n",
- cnt);
- return -EINVAL;
- }
- }
} else {
cnt = 0;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c
index 06871731ac43..259444c0a630 100644
--- a/drivers/infiniband/hw/hns/hns_roce_restrack.c
+++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c
@@ -76,10 +76,9 @@ err:
return -EMSGSIZE;
}
-static int hns_roce_fill_res_cq_entry(struct sk_buff *msg,
- struct rdma_restrack_entry *res)
+int hns_roce_fill_res_cq_entry(struct sk_buff *msg,
+ struct ib_cq *ib_cq)
{
- struct ib_cq *ib_cq = container_of(res, struct ib_cq, res);
struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
struct hns_roce_v2_cq_context *context;
@@ -119,12 +118,3 @@ err:
kfree(context);
return ret;
}
-
-int hns_roce_fill_res_entry(struct sk_buff *msg,
- struct rdma_restrack_entry *res)
-{
- if (res->type == RDMA_RESTRACK_CQ)
- return hns_roce_fill_res_cq_entry(msg, res);
-
- return 0;
-}
diff --git a/drivers/infiniband/hw/i40iw/Makefile b/drivers/infiniband/hw/i40iw/Makefile
index 8942f8229945..34da9eba8a7c 100644
--- a/drivers/infiniband/hw/i40iw/Makefile
+++ b/drivers/infiniband/hw/i40iw/Makefile
@@ -1,5 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
-ccflags-y := -I $(srctree)/drivers/net/ethernet/intel/i40e
obj-$(CONFIG_INFINIBAND_I40IW) += i40iw.o
diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h
index 49d92638e0db..25747b85a79c 100644
--- a/drivers/infiniband/hw/i40iw/i40iw.h
+++ b/drivers/infiniband/hw/i40iw/i40iw.h
@@ -45,6 +45,7 @@
#include <linux/slab.h>
#include <linux/io.h>
#include <linux/crc32c.h>
+#include <linux/net/intel/i40e_client.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_pack.h>
@@ -57,7 +58,6 @@
#include "i40iw_d.h"
#include "i40iw_hmc.h"
-#include <i40e_client.h>
#include "i40iw_type.h"
#include "i40iw_p.h"
#include <rdma/i40iw-abi.h>
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 19af29a48c55..6957e4f3404b 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -101,7 +101,6 @@ static int i40iw_query_port(struct ib_device *ibdev,
props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP |
IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
props->gid_tbl_len = 1;
- props->pkey_tbl_len = 1;
props->active_width = IB_WIDTH_4X;
props->active_speed = 1;
props->max_msg_sz = I40IW_MAX_OUTBOUND_MESSAGE_SIZE;
@@ -1543,10 +1542,9 @@ static int i40iw_hw_alloc_stag(struct i40iw_device *iwdev, struct i40iw_mr *iwmr
* @pd: ibpd pointer
* @mr_type: memory for stag registrion
* @max_num_sg: man number of pages
- * @udata: user data or NULL for kernel objects
*/
static struct ib_mr *i40iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata)
+ u32 max_num_sg)
{
struct i40iw_pd *iwpd = to_iwpd(pd);
struct i40iw_device *iwdev = to_iwdev(pd->device);
@@ -2460,7 +2458,6 @@ static int i40iw_port_immutable(struct ib_device *ibdev, u8 port_num,
if (err)
return err;
- immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
return 0;
@@ -2616,22 +2613,6 @@ static int i40iw_query_gid(struct ib_device *ibdev,
return 0;
}
-/**
- * i40iw_query_pkey - Query partition key
- * @ibdev: device pointer from stack
- * @port: port number
- * @index: index of pkey
- * @pkey: pointer to store the pkey
- */
-static int i40iw_query_pkey(struct ib_device *ibdev,
- u8 port,
- u16 index,
- u16 *pkey)
-{
- *pkey = 0;
- return 0;
-}
-
static const struct ib_device_ops i40iw_dev_ops = {
.owner = THIS_MODULE,
.driver_id = RDMA_DRIVER_I40IW,
@@ -2671,7 +2652,6 @@ static const struct ib_device_ops i40iw_dev_ops = {
.post_send = i40iw_post_send,
.query_device = i40iw_query_device,
.query_gid = i40iw_query_gid,
- .query_pkey = i40iw_query_pkey,
.query_port = i40iw_query_port,
.query_qp = i40iw_query_qp,
.reg_user_mr = i40iw_reg_user_mr,
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 816d28854a8e..5e7910a517da 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1219,56 +1219,47 @@ static void mlx4_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn);
}
-static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev,
- struct ib_udata *udata)
+static int mlx4_ib_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata)
{
- struct mlx4_ib_xrcd *xrcd;
+ struct mlx4_ib_dev *dev = to_mdev(ibxrcd->device);
+ struct mlx4_ib_xrcd *xrcd = to_mxrcd(ibxrcd);
struct ib_cq_init_attr cq_attr = {};
int err;
- if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
- return ERR_PTR(-ENOSYS);
-
- xrcd = kmalloc(sizeof *xrcd, GFP_KERNEL);
- if (!xrcd)
- return ERR_PTR(-ENOMEM);
+ if (!(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
+ return -EOPNOTSUPP;
- err = mlx4_xrcd_alloc(to_mdev(ibdev)->dev, &xrcd->xrcdn);
+ err = mlx4_xrcd_alloc(dev->dev, &xrcd->xrcdn);
if (err)
- goto err1;
+ return err;
- xrcd->pd = ib_alloc_pd(ibdev, 0);
+ xrcd->pd = ib_alloc_pd(ibxrcd->device, 0);
if (IS_ERR(xrcd->pd)) {
err = PTR_ERR(xrcd->pd);
goto err2;
}
cq_attr.cqe = 1;
- xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, &cq_attr);
+ xrcd->cq = ib_create_cq(ibxrcd->device, NULL, NULL, xrcd, &cq_attr);
if (IS_ERR(xrcd->cq)) {
err = PTR_ERR(xrcd->cq);
goto err3;
}
- return &xrcd->ibxrcd;
+ return 0;
err3:
ib_dealloc_pd(xrcd->pd);
err2:
- mlx4_xrcd_free(to_mdev(ibdev)->dev, xrcd->xrcdn);
-err1:
- kfree(xrcd);
- return ERR_PTR(err);
+ mlx4_xrcd_free(dev->dev, xrcd->xrcdn);
+ return err;
}
-static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
+static void mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
{
ib_destroy_cq(to_mxrcd(xrcd)->cq);
ib_dealloc_pd(to_mxrcd(xrcd)->pd);
mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn);
- kfree(xrcd);
-
- return 0;
}
static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
@@ -2607,6 +2598,8 @@ static const struct ib_device_ops mlx4_ib_dev_mw_ops = {
static const struct ib_device_ops mlx4_ib_dev_xrc_ops = {
.alloc_xrcd = mlx4_ib_alloc_xrcd,
.dealloc_xrcd = mlx4_ib_dealloc_xrcd,
+
+ INIT_RDMA_OBJ_SIZE(ib_xrcd, mlx4_ib_xrcd, ibxrcd),
};
static const struct ib_device_ops mlx4_ib_dev_fs_ops = {
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 6f4ea1067095..38e87a700a2a 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -729,7 +729,7 @@ struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
struct ib_udata *udata);
int mlx4_ib_dealloc_mw(struct ib_mw *mw);
struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata);
+ u32 max_num_sg);
int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset);
int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 7e0b205c05eb..1d5ef0de12c9 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -439,7 +439,6 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
mr->ibmr.length = length;
- mr->ibmr.iova = virt_addr;
mr->ibmr.page_size = 1U << shift;
return &mr->ibmr;
@@ -655,7 +654,7 @@ int mlx4_ib_dealloc_mw(struct ib_mw *ibmw)
}
struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata)
+ u32 max_num_sg)
{
struct mlx4_ib_dev *dev = to_mdev(pd->device);
struct mlx4_ib_mr *mr;
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index cf51e3cbd969..f9ca6e000a81 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -3541,11 +3541,11 @@ static int _mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
int nreq;
int err = 0;
unsigned ind;
- int uninitialized_var(size);
- unsigned uninitialized_var(seglen);
+ int size;
+ unsigned seglen;
__be32 dummy;
__be32 *lso_wqe;
- __be32 uninitialized_var(lso_hdr_sz);
+ __be32 lso_hdr_sz;
__be32 blh;
int i;
struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index 8cca61c671f8..b4c009bb0db6 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o
mlx5_ib-y := ah.o \
cmd.o \
cong.o \
+ counters.o \
cq.o \
doorbell.o \
gsi.o \
@@ -22,5 +23,6 @@ mlx5_ib-y := ah.o \
mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o
mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o \
- flow.o \
- qos.o
+ fs.o \
+ qos.o \
+ std_types.o
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index cc24c711e92a..ebb2f108b64f 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -148,18 +148,6 @@ void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length)
spin_unlock(&dm->lock);
}
-int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out)
-{
- u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
- int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
-
- MLX5_SET(ppcnt_reg, in, local_port, 1);
-
- MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP);
- return mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPCNT,
- 0, 0);
-}
-
void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid)
{
u32 in[MLX5_ST_SZ_DW(destroy_tir_in)] = {};
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index f4d8558db434..1d192a8ca87d 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -41,7 +41,6 @@ int mlx5_cmd_dump_fill_mkey(struct mlx5_core_dev *dev, u32 *mkey);
int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey);
int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
void *out);
-int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out);
int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr,
u64 length, u32 alignment);
void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length);
diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c
new file mode 100644
index 000000000000..145f3cb40ccb
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/counters.c
@@ -0,0 +1,709 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include "mlx5_ib.h"
+#include <linux/mlx5/eswitch.h>
+#include "counters.h"
+#include "ib_rep.h"
+#include "qp.h"
+
+struct mlx5_ib_counter {
+ const char *name;
+ size_t offset;
+};
+
+#define INIT_Q_COUNTER(_name) \
+ { .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)}
+
+static const struct mlx5_ib_counter basic_q_cnts[] = {
+ INIT_Q_COUNTER(rx_write_requests),
+ INIT_Q_COUNTER(rx_read_requests),
+ INIT_Q_COUNTER(rx_atomic_requests),
+ INIT_Q_COUNTER(out_of_buffer),
+};
+
+static const struct mlx5_ib_counter out_of_seq_q_cnts[] = {
+ INIT_Q_COUNTER(out_of_sequence),
+};
+
+static const struct mlx5_ib_counter retrans_q_cnts[] = {
+ INIT_Q_COUNTER(duplicate_request),
+ INIT_Q_COUNTER(rnr_nak_retry_err),
+ INIT_Q_COUNTER(packet_seq_err),
+ INIT_Q_COUNTER(implied_nak_seq_err),
+ INIT_Q_COUNTER(local_ack_timeout_err),
+};
+
+#define INIT_CONG_COUNTER(_name) \
+ { .name = #_name, .offset = \
+ MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)}
+
+static const struct mlx5_ib_counter cong_cnts[] = {
+ INIT_CONG_COUNTER(rp_cnp_ignored),
+ INIT_CONG_COUNTER(rp_cnp_handled),
+ INIT_CONG_COUNTER(np_ecn_marked_roce_packets),
+ INIT_CONG_COUNTER(np_cnp_sent),
+};
+
+static const struct mlx5_ib_counter extended_err_cnts[] = {
+ INIT_Q_COUNTER(resp_local_length_error),
+ INIT_Q_COUNTER(resp_cqe_error),
+ INIT_Q_COUNTER(req_cqe_error),
+ INIT_Q_COUNTER(req_remote_invalid_request),
+ INIT_Q_COUNTER(req_remote_access_errors),
+ INIT_Q_COUNTER(resp_remote_access_errors),
+ INIT_Q_COUNTER(resp_cqe_flush_error),
+ INIT_Q_COUNTER(req_cqe_flush_error),
+};
+
+static const struct mlx5_ib_counter roce_accl_cnts[] = {
+ INIT_Q_COUNTER(roce_adp_retrans),
+ INIT_Q_COUNTER(roce_adp_retrans_to),
+ INIT_Q_COUNTER(roce_slow_restart),
+ INIT_Q_COUNTER(roce_slow_restart_cnps),
+ INIT_Q_COUNTER(roce_slow_restart_trans),
+};
+
+#define INIT_EXT_PPCNT_COUNTER(_name) \
+ { .name = #_name, .offset = \
+ MLX5_BYTE_OFF(ppcnt_reg, \
+ counter_set.eth_extended_cntrs_grp_data_layout._name##_high)}
+
+static const struct mlx5_ib_counter ext_ppcnt_cnts[] = {
+ INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated),
+};
+
+static int mlx5_ib_read_counters(struct ib_counters *counters,
+ struct ib_counters_read_attr *read_attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
+ struct mlx5_read_counters_attr mread_attr = {};
+ struct mlx5_ib_flow_counters_desc *desc;
+ int ret, i;
+
+ mutex_lock(&mcounters->mcntrs_mutex);
+ if (mcounters->cntrs_max_index > read_attr->ncounters) {
+ ret = -EINVAL;
+ goto err_bound;
+ }
+
+ mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64),
+ GFP_KERNEL);
+ if (!mread_attr.out) {
+ ret = -ENOMEM;
+ goto err_bound;
+ }
+
+ mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl;
+ mread_attr.flags = read_attr->flags;
+ ret = mcounters->read_counters(counters->device, &mread_attr);
+ if (ret)
+ goto err_read;
+
+ /* do the pass over the counters data array to assign according to the
+ * descriptions and indexing pairs
+ */
+ desc = mcounters->counters_data;
+ for (i = 0; i < mcounters->ncounters; i++)
+ read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description];
+
+err_read:
+ kfree(mread_attr.out);
+err_bound:
+ mutex_unlock(&mcounters->mcntrs_mutex);
+ return ret;
+}
+
+static void mlx5_ib_destroy_counters(struct ib_counters *counters)
+{
+ struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
+
+ mlx5_ib_counters_clear_description(counters);
+ if (mcounters->hw_cntrs_hndl)
+ mlx5_fc_destroy(to_mdev(counters->device)->mdev,
+ mcounters->hw_cntrs_hndl);
+}
+
+static int mlx5_ib_create_counters(struct ib_counters *counters,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
+
+ mutex_init(&mcounters->mcntrs_mutex);
+ return 0;
+}
+
+
+static bool is_mdev_switchdev_mode(const struct mlx5_core_dev *mdev)
+{
+ return MLX5_ESWITCH_MANAGER(mdev) &&
+ mlx5_ib_eswitch_mode(mdev->priv.eswitch) ==
+ MLX5_ESWITCH_OFFLOADS;
+}
+
+static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev,
+ u8 port_num)
+{
+ return is_mdev_switchdev_mode(dev->mdev) ? &dev->port[0].cnts :
+ &dev->port[port_num].cnts;
+}
+
+/**
+ * mlx5_ib_get_counters_id - Returns counters id to use for device+port
+ * @dev: Pointer to mlx5 IB device
+ * @port_num: Zero based port number
+ *
+ * mlx5_ib_get_counters_id() Returns counters set id to use for given
+ * device port combination in switchdev and non switchdev mode of the
+ * parent device.
+ */
+u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num)
+{
+ const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
+
+ return cnts->set_id;
+}
+
+static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
+ u8 port_num)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ const struct mlx5_ib_counters *cnts;
+ bool is_switchdev = is_mdev_switchdev_mode(dev->mdev);
+
+ if ((is_switchdev && port_num) || (!is_switchdev && !port_num))
+ return NULL;
+
+ cnts = get_counters(dev, port_num - 1);
+
+ return rdma_alloc_hw_stats_struct(cnts->names,
+ cnts->num_q_counters +
+ cnts->num_cong_counters +
+ cnts->num_ext_ppcnt_counters,
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev,
+ const struct mlx5_ib_counters *cnts,
+ struct rdma_hw_stats *stats,
+ u16 set_id)
+{
+ u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
+ __be32 val;
+ int ret, i;
+
+ MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
+ MLX5_SET(query_q_counter_in, in, counter_set_id, set_id);
+ ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < cnts->num_q_counters; i++) {
+ val = *(__be32 *)((void *)out + cnts->offsets[i]);
+ stats->value[i] = (u64)be32_to_cpu(val);
+ }
+
+ return 0;
+}
+
+static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev,
+ const struct mlx5_ib_counters *cnts,
+ struct rdma_hw_stats *stats)
+{
+ int offset = cnts->num_q_counters + cnts->num_cong_counters;
+ u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
+ int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+ int ret, i;
+ void *out;
+
+ out = kvzalloc(sz, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ MLX5_SET(ppcnt_reg, in, local_port, 1);
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP);
+ ret = mlx5_core_access_reg(dev->mdev, in, sz, out, sz, MLX5_REG_PPCNT,
+ 0, 0);
+ if (ret)
+ goto free;
+
+ for (i = 0; i < cnts->num_ext_ppcnt_counters; i++)
+ stats->value[i + offset] =
+ be64_to_cpup((__be64 *)(out +
+ cnts->offsets[i + offset]));
+free:
+ kvfree(out);
+ return ret;
+}
+
+static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats,
+ u8 port_num, int index)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1);
+ struct mlx5_core_dev *mdev;
+ int ret, num_counters;
+ u8 mdev_port_num;
+
+ if (!stats)
+ return -EINVAL;
+
+ num_counters = cnts->num_q_counters +
+ cnts->num_cong_counters +
+ cnts->num_ext_ppcnt_counters;
+
+ /* q_counters are per IB device, query the master mdev */
+ ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats, cnts->set_id);
+ if (ret)
+ return ret;
+
+ if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
+ ret = mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats);
+ if (ret)
+ return ret;
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
+ mdev = mlx5_ib_get_native_port_mdev(dev, port_num,
+ &mdev_port_num);
+ if (!mdev) {
+ /* If port is not affiliated yet, its in down state
+ * which doesn't have any counters yet, so it would be
+ * zero. So no need to read from the HCA.
+ */
+ goto done;
+ }
+ ret = mlx5_lag_query_cong_counters(dev->mdev,
+ stats->value +
+ cnts->num_q_counters,
+ cnts->num_cong_counters,
+ cnts->offsets +
+ cnts->num_q_counters);
+
+ mlx5_ib_put_native_port_mdev(dev, port_num);
+ if (ret)
+ return ret;
+ }
+
+done:
+ return num_counters;
+}
+
+static struct rdma_hw_stats *
+mlx5_ib_counter_alloc_stats(struct rdma_counter *counter)
+{
+ struct mlx5_ib_dev *dev = to_mdev(counter->device);
+ const struct mlx5_ib_counters *cnts =
+ get_counters(dev, counter->port - 1);
+
+ return rdma_alloc_hw_stats_struct(cnts->names,
+ cnts->num_q_counters +
+ cnts->num_cong_counters +
+ cnts->num_ext_ppcnt_counters,
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+static int mlx5_ib_counter_update_stats(struct rdma_counter *counter)
+{
+ struct mlx5_ib_dev *dev = to_mdev(counter->device);
+ const struct mlx5_ib_counters *cnts =
+ get_counters(dev, counter->port - 1);
+
+ return mlx5_ib_query_q_counters(dev->mdev, cnts,
+ counter->stats, counter->id);
+}
+
+static int mlx5_ib_counter_dealloc(struct rdma_counter *counter)
+{
+ struct mlx5_ib_dev *dev = to_mdev(counter->device);
+ u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
+
+ if (!counter->id)
+ return 0;
+
+ MLX5_SET(dealloc_q_counter_in, in, opcode,
+ MLX5_CMD_OP_DEALLOC_Q_COUNTER);
+ MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id);
+ return mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
+}
+
+static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
+ struct ib_qp *qp)
+{
+ struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ int err;
+
+ if (!counter->id) {
+ u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
+
+ MLX5_SET(alloc_q_counter_in, in, opcode,
+ MLX5_CMD_OP_ALLOC_Q_COUNTER);
+ MLX5_SET(alloc_q_counter_in, in, uid, MLX5_SHARED_RESOURCE_UID);
+ err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
+ if (err)
+ return err;
+ counter->id =
+ MLX5_GET(alloc_q_counter_out, out, counter_set_id);
+ }
+
+ err = mlx5_ib_qp_set_counter(qp, counter);
+ if (err)
+ goto fail_set_counter;
+
+ return 0;
+
+fail_set_counter:
+ mlx5_ib_counter_dealloc(counter);
+ counter->id = 0;
+
+ return err;
+}
+
+static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp)
+{
+ return mlx5_ib_qp_set_counter(qp, NULL);
+}
+
+
+static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
+ const char **names,
+ size_t *offsets)
+{
+ int i;
+ int j = 0;
+
+ for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) {
+ names[j] = basic_q_cnts[i].name;
+ offsets[j] = basic_q_cnts[i].offset;
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
+ for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) {
+ names[j] = out_of_seq_q_cnts[i].name;
+ offsets[j] = out_of_seq_q_cnts[i].offset;
+ }
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
+ for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) {
+ names[j] = retrans_q_cnts[i].name;
+ offsets[j] = retrans_q_cnts[i].offset;
+ }
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) {
+ for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) {
+ names[j] = extended_err_cnts[i].name;
+ offsets[j] = extended_err_cnts[i].offset;
+ }
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, roce_accl)) {
+ for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) {
+ names[j] = roce_accl_cnts[i].name;
+ offsets[j] = roce_accl_cnts[i].offset;
+ }
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
+ for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) {
+ names[j] = cong_cnts[i].name;
+ offsets[j] = cong_cnts[i].offset;
+ }
+ }
+
+ if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
+ for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) {
+ names[j] = ext_ppcnt_cnts[i].name;
+ offsets[j] = ext_ppcnt_cnts[i].offset;
+ }
+ }
+}
+
+
+static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_counters *cnts)
+{
+ u32 num_counters;
+
+ num_counters = ARRAY_SIZE(basic_q_cnts);
+
+ if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
+ num_counters += ARRAY_SIZE(out_of_seq_q_cnts);
+
+ if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
+ num_counters += ARRAY_SIZE(retrans_q_cnts);
+
+ if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters))
+ num_counters += ARRAY_SIZE(extended_err_cnts);
+
+ if (MLX5_CAP_GEN(dev->mdev, roce_accl))
+ num_counters += ARRAY_SIZE(roce_accl_cnts);
+
+ cnts->num_q_counters = num_counters;
+
+ if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
+ cnts->num_cong_counters = ARRAY_SIZE(cong_cnts);
+ num_counters += ARRAY_SIZE(cong_cnts);
+ }
+ if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
+ cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts);
+ num_counters += ARRAY_SIZE(ext_ppcnt_cnts);
+ }
+ cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL);
+ if (!cnts->names)
+ return -ENOMEM;
+
+ cnts->offsets = kcalloc(num_counters,
+ sizeof(cnts->offsets), GFP_KERNEL);
+ if (!cnts->offsets)
+ goto err_names;
+
+ return 0;
+
+err_names:
+ kfree(cnts->names);
+ cnts->names = NULL;
+ return -ENOMEM;
+}
+
+static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
+{
+ u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
+ int num_cnt_ports;
+ int i;
+
+ num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports;
+
+ MLX5_SET(dealloc_q_counter_in, in, opcode,
+ MLX5_CMD_OP_DEALLOC_Q_COUNTER);
+
+ for (i = 0; i < num_cnt_ports; i++) {
+ if (dev->port[i].cnts.set_id) {
+ MLX5_SET(dealloc_q_counter_in, in, counter_set_id,
+ dev->port[i].cnts.set_id);
+ mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
+ }
+ kfree(dev->port[i].cnts.names);
+ kfree(dev->port[i].cnts.offsets);
+ }
+}
+
+static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
+{
+ u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
+ int num_cnt_ports;
+ int err = 0;
+ int i;
+ bool is_shared;
+
+ MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
+ is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0;
+ num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports;
+
+ for (i = 0; i < num_cnt_ports; i++) {
+ err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts);
+ if (err)
+ goto err_alloc;
+
+ mlx5_ib_fill_counters(dev, dev->port[i].cnts.names,
+ dev->port[i].cnts.offsets);
+
+ MLX5_SET(alloc_q_counter_in, in, uid,
+ is_shared ? MLX5_SHARED_RESOURCE_UID : 0);
+
+ err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
+ if (err) {
+ mlx5_ib_warn(dev,
+ "couldn't allocate queue counter for port %d, err %d\n",
+ i + 1, err);
+ goto err_alloc;
+ }
+
+ dev->port[i].cnts.set_id =
+ MLX5_GET(alloc_q_counter_out, out, counter_set_id);
+ }
+ return 0;
+
+err_alloc:
+ mlx5_ib_dealloc_counters(dev);
+ return err;
+}
+
+static int read_flow_counters(struct ib_device *ibdev,
+ struct mlx5_read_counters_attr *read_attr)
+{
+ struct mlx5_fc *fc = read_attr->hw_cntrs_hndl;
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+
+ return mlx5_fc_query(dev->mdev, fc,
+ &read_attr->out[IB_COUNTER_PACKETS],
+ &read_attr->out[IB_COUNTER_BYTES]);
+}
+
+/* flow counters currently expose two counters packets and bytes */
+#define FLOW_COUNTERS_NUM 2
+static int counters_set_description(
+ struct ib_counters *counters, enum mlx5_ib_counters_type counters_type,
+ struct mlx5_ib_flow_counters_desc *desc_data, u32 ncounters)
+{
+ struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
+ u32 cntrs_max_index = 0;
+ int i;
+
+ if (counters_type != MLX5_IB_COUNTERS_FLOW)
+ return -EINVAL;
+
+ /* init the fields for the object */
+ mcounters->type = counters_type;
+ mcounters->read_counters = read_flow_counters;
+ mcounters->counters_num = FLOW_COUNTERS_NUM;
+ mcounters->ncounters = ncounters;
+ /* each counter entry have both description and index pair */
+ for (i = 0; i < ncounters; i++) {
+ if (desc_data[i].description > IB_COUNTER_BYTES)
+ return -EINVAL;
+
+ if (cntrs_max_index <= desc_data[i].index)
+ cntrs_max_index = desc_data[i].index + 1;
+ }
+
+ mutex_lock(&mcounters->mcntrs_mutex);
+ mcounters->counters_data = desc_data;
+ mcounters->cntrs_max_index = cntrs_max_index;
+ mutex_unlock(&mcounters->mcntrs_mutex);
+
+ return 0;
+}
+
+#define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2))
+int mlx5_ib_flow_counters_set_data(struct ib_counters *ibcounters,
+ struct mlx5_ib_create_flow *ucmd)
+{
+ struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters);
+ struct mlx5_ib_flow_counters_data *cntrs_data = NULL;
+ struct mlx5_ib_flow_counters_desc *desc_data = NULL;
+ bool hw_hndl = false;
+ int ret = 0;
+
+ if (ucmd && ucmd->ncounters_data != 0) {
+ cntrs_data = ucmd->data;
+ if (cntrs_data->ncounters > MAX_COUNTERS_NUM)
+ return -EINVAL;
+
+ desc_data = kcalloc(cntrs_data->ncounters,
+ sizeof(*desc_data),
+ GFP_KERNEL);
+ if (!desc_data)
+ return -ENOMEM;
+
+ if (copy_from_user(desc_data,
+ u64_to_user_ptr(cntrs_data->counters_data),
+ sizeof(*desc_data) * cntrs_data->ncounters)) {
+ ret = -EFAULT;
+ goto free;
+ }
+ }
+
+ if (!mcounters->hw_cntrs_hndl) {
+ mcounters->hw_cntrs_hndl = mlx5_fc_create(
+ to_mdev(ibcounters->device)->mdev, false);
+ if (IS_ERR(mcounters->hw_cntrs_hndl)) {
+ ret = PTR_ERR(mcounters->hw_cntrs_hndl);
+ goto free;
+ }
+ hw_hndl = true;
+ }
+
+ if (desc_data) {
+ /* counters already bound to at least one flow */
+ if (mcounters->cntrs_max_index) {
+ ret = -EINVAL;
+ goto free_hndl;
+ }
+
+ ret = counters_set_description(ibcounters,
+ MLX5_IB_COUNTERS_FLOW,
+ desc_data,
+ cntrs_data->ncounters);
+ if (ret)
+ goto free_hndl;
+
+ } else if (!mcounters->cntrs_max_index) {
+ /* counters not bound yet, must have udata passed */
+ ret = -EINVAL;
+ goto free_hndl;
+ }
+
+ return 0;
+
+free_hndl:
+ if (hw_hndl) {
+ mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev,
+ mcounters->hw_cntrs_hndl);
+ mcounters->hw_cntrs_hndl = NULL;
+ }
+free:
+ kfree(desc_data);
+ return ret;
+}
+
+void mlx5_ib_counters_clear_description(struct ib_counters *counters)
+{
+ struct mlx5_ib_mcounters *mcounters;
+
+ if (!counters || atomic_read(&counters->usecnt) != 1)
+ return;
+
+ mcounters = to_mcounters(counters);
+
+ mutex_lock(&mcounters->mcntrs_mutex);
+ kfree(mcounters->counters_data);
+ mcounters->counters_data = NULL;
+ mcounters->cntrs_max_index = 0;
+ mutex_unlock(&mcounters->mcntrs_mutex);
+}
+
+static const struct ib_device_ops hw_stats_ops = {
+ .alloc_hw_stats = mlx5_ib_alloc_hw_stats,
+ .get_hw_stats = mlx5_ib_get_hw_stats,
+ .counter_bind_qp = mlx5_ib_counter_bind_qp,
+ .counter_unbind_qp = mlx5_ib_counter_unbind_qp,
+ .counter_dealloc = mlx5_ib_counter_dealloc,
+ .counter_alloc_stats = mlx5_ib_counter_alloc_stats,
+ .counter_update_stats = mlx5_ib_counter_update_stats,
+};
+
+static const struct ib_device_ops counters_ops = {
+ .create_counters = mlx5_ib_create_counters,
+ .destroy_counters = mlx5_ib_destroy_counters,
+ .read_counters = mlx5_ib_read_counters,
+
+ INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs),
+};
+
+int mlx5_ib_counters_init(struct mlx5_ib_dev *dev)
+{
+ ib_set_device_ops(&dev->ib_dev, &counters_ops);
+
+ if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
+ return 0;
+
+ ib_set_device_ops(&dev->ib_dev, &hw_stats_ops);
+ return mlx5_ib_alloc_counters(dev);
+}
+
+void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev)
+{
+ if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
+ return;
+
+ mlx5_ib_dealloc_counters(dev);
+}
diff --git a/drivers/infiniband/hw/mlx5/counters.h b/drivers/infiniband/hw/mlx5/counters.h
new file mode 100644
index 000000000000..1aa30c2f3f4d
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/counters.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#ifndef _MLX5_IB_COUNTERS_H
+#define _MLX5_IB_COUNTERS_H
+
+#include "mlx5_ib.h"
+
+int mlx5_ib_counters_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev);
+void mlx5_ib_counters_clear_description(struct ib_counters *counters);
+int mlx5_ib_flow_counters_set_data(struct ib_counters *ibcounters,
+ struct mlx5_ib_create_flow *ucmd);
+u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num);
+#endif /* _MLX5_IB_COUNTERS_H */
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 0c18cb6a2f14..0133ebb8d740 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -925,8 +925,8 @@ int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_ib_cq *cq = to_mcq(ibcq);
u32 out[MLX5_ST_SZ_DW(create_cq_out)];
- int uninitialized_var(index);
- int uninitialized_var(inlen);
+ int index;
+ int inlen;
u32 *cqb = NULL;
void *cqc;
int cqe_size;
@@ -1246,7 +1246,7 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
__be64 *pas;
int page_shift;
int inlen;
- int uninitialized_var(cqe_size);
+ int cqe_size;
unsigned long flags;
if (!MLX5_CAP_GEN(dev->mdev, cq_resize)) {
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 9454a66c12cc..9e3d8b826498 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -14,6 +14,7 @@
#include <linux/mlx5/driver.h>
#include <linux/mlx5/fs.h>
#include "mlx5_ib.h"
+#include "devx.h"
#include "qp.h"
#include <linux/xarray.h>
@@ -89,22 +90,6 @@ struct devx_async_event_file {
u8 is_destroyed:1;
};
-#define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in)
-struct devx_obj {
- struct mlx5_ib_dev *ib_dev;
- u64 obj_id;
- u32 dinlen; /* destroy inbox length */
- u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW];
- u32 flags;
- union {
- struct mlx5_ib_devx_mr devx_mr;
- struct mlx5_core_dct core_dct;
- struct mlx5_core_cq core_cq;
- u32 flow_counter_bulk_size;
- };
- struct list_head event_sub; /* holds devx_event_subscription entries */
-};
-
struct devx_umem {
struct mlx5_core_dev *mdev;
struct ib_umem *umem;
@@ -171,48 +156,6 @@ void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid)
mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
}
-bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type)
-{
- struct devx_obj *devx_obj = obj;
- u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
-
- switch (opcode) {
- case MLX5_CMD_OP_DESTROY_TIR:
- *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- *dest_id = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox,
- obj_id);
- return true;
-
- case MLX5_CMD_OP_DESTROY_FLOW_TABLE:
- *dest_type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
- *dest_id = MLX5_GET(destroy_flow_table_in, devx_obj->dinbox,
- table_id);
- return true;
- default:
- return false;
- }
-}
-
-bool mlx5_ib_devx_is_flow_counter(void *obj, u32 offset, u32 *counter_id)
-{
- struct devx_obj *devx_obj = obj;
- u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
-
- if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) {
-
- if (offset && offset >= devx_obj->flow_counter_bulk_size)
- return false;
-
- *counter_id = MLX5_GET(dealloc_flow_counter_in,
- devx_obj->dinbox,
- flow_counter_id);
- *counter_id += offset;
- return true;
- }
-
- return false;
-}
-
static bool is_legacy_unaffiliated_event_num(u16 event_num)
{
switch (event_num) {
@@ -2419,17 +2362,24 @@ static int devx_event_notifier(struct notifier_block *nb,
return NOTIFY_OK;
}
-void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev)
+int mlx5_ib_devx_init(struct mlx5_ib_dev *dev)
{
struct mlx5_devx_event_table *table = &dev->devx_event_table;
+ int uid;
- xa_init(&table->event_xa);
- mutex_init(&table->event_xa_lock);
- MLX5_NB_INIT(&table->devx_nb, devx_event_notifier, NOTIFY_ANY);
- mlx5_eq_notifier_register(dev->mdev, &table->devx_nb);
+ uid = mlx5_ib_devx_create(dev, false);
+ if (uid > 0) {
+ dev->devx_whitelist_uid = uid;
+ xa_init(&table->event_xa);
+ mutex_init(&table->event_xa_lock);
+ MLX5_NB_INIT(&table->devx_nb, devx_event_notifier, NOTIFY_ANY);
+ mlx5_eq_notifier_register(dev->mdev, &table->devx_nb);
+ }
+
+ return 0;
}
-void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev)
+void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev)
{
struct mlx5_devx_event_table *table = &dev->devx_event_table;
struct devx_event_subscription *sub, *tmp;
@@ -2437,17 +2387,21 @@ void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev)
void *entry;
unsigned long id;
- mlx5_eq_notifier_unregister(dev->mdev, &table->devx_nb);
- mutex_lock(&dev->devx_event_table.event_xa_lock);
- xa_for_each(&table->event_xa, id, entry) {
- event = entry;
- list_for_each_entry_safe(sub, tmp, &event->unaffiliated_list,
- xa_list)
- devx_cleanup_subscription(dev, sub);
- kfree(entry);
+ if (dev->devx_whitelist_uid) {
+ mlx5_eq_notifier_unregister(dev->mdev, &table->devx_nb);
+ mutex_lock(&dev->devx_event_table.event_xa_lock);
+ xa_for_each(&table->event_xa, id, entry) {
+ event = entry;
+ list_for_each_entry_safe(
+ sub, tmp, &event->unaffiliated_list, xa_list)
+ devx_cleanup_subscription(dev, sub);
+ kfree(entry);
+ }
+ mutex_unlock(&dev->devx_event_table.event_xa_lock);
+ xa_destroy(&table->event_xa);
+
+ mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid);
}
- mutex_unlock(&dev->devx_event_table.event_xa_lock);
- xa_destroy(&table->event_xa);
}
static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf,
@@ -2536,7 +2490,7 @@ static ssize_t devx_async_event_read(struct file *filp, char __user *buf,
{
struct devx_async_event_file *ev_file = filp->private_data;
struct devx_event_subscription *event_sub;
- struct devx_async_event_data *uninitialized_var(event);
+ struct devx_async_event_data *event;
int ret = 0;
size_t eventsz;
bool omit_data;
diff --git a/drivers/infiniband/hw/mlx5/devx.h b/drivers/infiniband/hw/mlx5/devx.h
new file mode 100644
index 000000000000..1f69866aed16
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/devx.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2019-2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#ifndef _MLX5_IB_DEVX_H
+#define _MLX5_IB_DEVX_H
+
+#include "mlx5_ib.h"
+
+#define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in)
+struct devx_obj {
+ struct mlx5_ib_dev *ib_dev;
+ u64 obj_id;
+ u32 dinlen; /* destroy inbox length */
+ u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW];
+ u32 flags;
+ union {
+ struct mlx5_ib_devx_mr devx_mr;
+ struct mlx5_core_dct core_dct;
+ struct mlx5_core_cq core_cq;
+ u32 flow_counter_bulk_size;
+ };
+ struct list_head event_sub; /* holds devx_event_subscription entries */
+};
+#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
+int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user);
+void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid);
+int mlx5_ib_devx_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev);
+#else
+static inline int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user)
+{
+ return -EOPNOTSUPP;
+}
+static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) {}
+static inline int mlx5_ib_devx_init(struct mlx5_ib_dev *dev)
+{
+ return 0;
+}
+static inline void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev)
+{
+}
+#endif
+#endif /* _MLX5_IB_DEVX_H */
diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c
deleted file mode 100644
index 216a1108ad34..000000000000
--- a/drivers/infiniband/hw/mlx5/flow.c
+++ /dev/null
@@ -1,765 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
-/*
- * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
- */
-
-#include <rdma/ib_user_verbs.h>
-#include <rdma/ib_verbs.h>
-#include <rdma/uverbs_types.h>
-#include <rdma/uverbs_ioctl.h>
-#include <rdma/uverbs_std_types.h>
-#include <rdma/mlx5_user_ioctl_cmds.h>
-#include <rdma/mlx5_user_ioctl_verbs.h>
-#include <rdma/ib_umem.h>
-#include <linux/mlx5/driver.h>
-#include <linux/mlx5/fs.h>
-#include "mlx5_ib.h"
-
-#define UVERBS_MODULE_NAME mlx5_ib
-#include <rdma/uverbs_named_ioctl.h>
-
-static int
-mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type,
- enum mlx5_flow_namespace_type *namespace)
-{
- switch (table_type) {
- case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX:
- *namespace = MLX5_FLOW_NAMESPACE_BYPASS;
- break;
- case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX:
- *namespace = MLX5_FLOW_NAMESPACE_EGRESS;
- break;
- case MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB:
- *namespace = MLX5_FLOW_NAMESPACE_FDB;
- break;
- case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX:
- *namespace = MLX5_FLOW_NAMESPACE_RDMA_RX;
- break;
- case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX:
- *namespace = MLX5_FLOW_NAMESPACE_RDMA_TX;
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-static const struct uverbs_attr_spec mlx5_ib_flow_type[] = {
- [MLX5_IB_FLOW_TYPE_NORMAL] = {
- .type = UVERBS_ATTR_TYPE_PTR_IN,
- .u.ptr = {
- .len = sizeof(u16), /* data is priority */
- .min_len = sizeof(u16),
- }
- },
- [MLX5_IB_FLOW_TYPE_SNIFFER] = {
- .type = UVERBS_ATTR_TYPE_PTR_IN,
- UVERBS_ATTR_NO_DATA(),
- },
- [MLX5_IB_FLOW_TYPE_ALL_DEFAULT] = {
- .type = UVERBS_ATTR_TYPE_PTR_IN,
- UVERBS_ATTR_NO_DATA(),
- },
- [MLX5_IB_FLOW_TYPE_MC_DEFAULT] = {
- .type = UVERBS_ATTR_TYPE_PTR_IN,
- UVERBS_ATTR_NO_DATA(),
- },
-};
-
-static int get_dests(struct uverbs_attr_bundle *attrs,
- struct mlx5_ib_flow_matcher *fs_matcher, int *dest_id,
- int *dest_type, struct ib_qp **qp, u32 *flags)
-{
- bool dest_devx, dest_qp;
- void *devx_obj;
- int err;
-
- dest_devx = uverbs_attr_is_valid(attrs,
- MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
- dest_qp = uverbs_attr_is_valid(attrs,
- MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
-
- *flags = 0;
- err = uverbs_get_flags32(flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_FLAGS,
- MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS |
- MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP);
- if (err)
- return err;
-
- /* Both flags are not allowed */
- if (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS &&
- *flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)
- return -EINVAL;
-
- if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) {
- if (dest_devx && (dest_qp || *flags))
- return -EINVAL;
- else if (dest_qp && *flags)
- return -EINVAL;
- }
-
- /* Allow only DEVX object, drop as dest for FDB */
- if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB && !(dest_devx ||
- (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)))
- return -EINVAL;
-
- /* Allow only DEVX object or QP as dest when inserting to RDMA_RX */
- if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
- ((!dest_devx && !dest_qp) || (dest_devx && dest_qp)))
- return -EINVAL;
-
- *qp = NULL;
- if (dest_devx) {
- devx_obj =
- uverbs_attr_get_obj(attrs,
- MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
-
- /* Verify that the given DEVX object is a flow
- * steering destination.
- */
- if (!mlx5_ib_devx_is_flow_dest(devx_obj, dest_id, dest_type))
- return -EINVAL;
- /* Allow only flow table as dest when inserting to FDB or RDMA_RX */
- if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB ||
- fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
- *dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
- return -EINVAL;
- } else if (dest_qp) {
- struct mlx5_ib_qp *mqp;
-
- *qp = uverbs_attr_get_obj(attrs,
- MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
- if (IS_ERR(*qp))
- return PTR_ERR(*qp);
-
- if ((*qp)->qp_type != IB_QPT_RAW_PACKET)
- return -EINVAL;
-
- mqp = to_mqp(*qp);
- if (mqp->is_rss)
- *dest_id = mqp->rss_qp.tirn;
- else
- *dest_id = mqp->raw_packet_qp.rq.tirn;
- *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) {
- *dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT;
- }
-
- if (*dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
- fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS)
- return -EINVAL;
-
- return 0;
-}
-
-#define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2
-static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
- struct uverbs_attr_bundle *attrs)
-{
- struct mlx5_flow_context flow_context = {.flow_tag =
- MLX5_FS_DEFAULT_FLOW_TAG};
- u32 *offset_attr, offset = 0, counter_id = 0;
- int dest_id, dest_type, inlen, len, ret, i;
- struct mlx5_ib_flow_handler *flow_handler;
- struct mlx5_ib_flow_matcher *fs_matcher;
- struct ib_uobject **arr_flow_actions;
- struct ib_uflow_resources *uflow_res;
- struct mlx5_flow_act flow_act = {};
- struct ib_qp *qp = NULL;
- void *devx_obj, *cmd_in;
- struct ib_uobject *uobj;
- struct mlx5_ib_dev *dev;
- u32 flags;
-
- if (!capable(CAP_NET_RAW))
- return -EPERM;
-
- fs_matcher = uverbs_attr_get_obj(attrs,
- MLX5_IB_ATTR_CREATE_FLOW_MATCHER);
- uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE);
- dev = mlx5_udata_to_mdev(&attrs->driver_udata);
-
- if (get_dests(attrs, fs_matcher, &dest_id, &dest_type, &qp, &flags))
- return -EINVAL;
-
- if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS)
- flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS;
-
- if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)
- flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
-
- len = uverbs_attr_get_uobjs_arr(attrs,
- MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, &arr_flow_actions);
- if (len) {
- devx_obj = arr_flow_actions[0]->object;
-
- if (uverbs_attr_is_valid(attrs,
- MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET)) {
-
- int num_offsets = uverbs_attr_ptr_get_array_size(
- attrs,
- MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET,
- sizeof(u32));
-
- if (num_offsets != 1)
- return -EINVAL;
-
- offset_attr = uverbs_attr_get_alloced_ptr(
- attrs,
- MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET);
- offset = *offset_attr;
- }
-
- if (!mlx5_ib_devx_is_flow_counter(devx_obj, offset,
- &counter_id))
- return -EINVAL;
-
- flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
- }
-
- cmd_in = uverbs_attr_get_alloced_ptr(
- attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
- inlen = uverbs_attr_get_len(attrs,
- MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
-
- uflow_res = flow_resources_alloc(MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS);
- if (!uflow_res)
- return -ENOMEM;
-
- len = uverbs_attr_get_uobjs_arr(attrs,
- MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, &arr_flow_actions);
- for (i = 0; i < len; i++) {
- struct mlx5_ib_flow_action *maction =
- to_mflow_act(arr_flow_actions[i]->object);
-
- ret = parse_flow_flow_action(maction, false, &flow_act);
- if (ret)
- goto err_out;
- flow_resources_add(uflow_res, IB_FLOW_SPEC_ACTION_HANDLE,
- arr_flow_actions[i]->object);
- }
-
- ret = uverbs_copy_from(&flow_context.flow_tag, attrs,
- MLX5_IB_ATTR_CREATE_FLOW_TAG);
- if (!ret) {
- if (flow_context.flow_tag >= BIT(24)) {
- ret = -EINVAL;
- goto err_out;
- }
- flow_context.flags |= FLOW_CONTEXT_HAS_TAG;
- }
-
- flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher,
- &flow_context,
- &flow_act,
- counter_id,
- cmd_in, inlen,
- dest_id, dest_type);
- if (IS_ERR(flow_handler)) {
- ret = PTR_ERR(flow_handler);
- goto err_out;
- }
-
- ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev, uflow_res);
-
- return 0;
-err_out:
- ib_uverbs_flow_resources_free(uflow_res);
- return ret;
-}
-
-static int flow_matcher_cleanup(struct ib_uobject *uobject,
- enum rdma_remove_reason why,
- struct uverbs_attr_bundle *attrs)
-{
- struct mlx5_ib_flow_matcher *obj = uobject->object;
- int ret;
-
- ret = ib_destroy_usecnt(&obj->usecnt, why, uobject);
- if (ret)
- return ret;
-
- kfree(obj);
- return 0;
-}
-
-static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs,
- struct mlx5_ib_flow_matcher *obj)
-{
- enum mlx5_ib_uapi_flow_table_type ft_type =
- MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX;
- u32 flags;
- int err;
-
- /* New users should use MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE and older
- * users should switch to it. We leave this to not break userspace
- */
- if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE) &&
- uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS))
- return -EINVAL;
-
- if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE)) {
- err = uverbs_get_const(&ft_type, attrs,
- MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE);
- if (err)
- return err;
-
- err = mlx5_ib_ft_type_to_namespace(ft_type, &obj->ns_type);
- if (err)
- return err;
-
- return 0;
- }
-
- if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS)) {
- err = uverbs_get_flags32(&flags, attrs,
- MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
- IB_FLOW_ATTR_FLAGS_EGRESS);
- if (err)
- return err;
-
- if (flags) {
- mlx5_ib_ft_type_to_namespace(
- MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX,
- &obj->ns_type);
- return 0;
- }
- }
-
- obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS;
-
- return 0;
-}
-
-static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
- struct uverbs_attr_bundle *attrs)
-{
- struct ib_uobject *uobj = uverbs_attr_get_uobject(
- attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE);
- struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata);
- struct mlx5_ib_flow_matcher *obj;
- int err;
-
- obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL);
- if (!obj)
- return -ENOMEM;
-
- obj->mask_len = uverbs_attr_get_len(
- attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
- err = uverbs_copy_from(&obj->matcher_mask,
- attrs,
- MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
- if (err)
- goto end;
-
- obj->flow_type = uverbs_attr_get_enum_id(
- attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE);
-
- if (obj->flow_type == MLX5_IB_FLOW_TYPE_NORMAL) {
- err = uverbs_copy_from(&obj->priority,
- attrs,
- MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE);
- if (err)
- goto end;
- }
-
- err = uverbs_copy_from(&obj->match_criteria_enable,
- attrs,
- MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA);
- if (err)
- goto end;
-
- err = mlx5_ib_matcher_ns(attrs, obj);
- if (err)
- goto end;
-
- uobj->object = obj;
- obj->mdev = dev->mdev;
- atomic_set(&obj->usecnt, 0);
- return 0;
-
-end:
- kfree(obj);
- return err;
-}
-
-void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction)
-{
- switch (maction->flow_action_raw.sub_type) {
- case MLX5_IB_FLOW_ACTION_MODIFY_HEADER:
- mlx5_modify_header_dealloc(maction->flow_action_raw.dev->mdev,
- maction->flow_action_raw.modify_hdr);
- break;
- case MLX5_IB_FLOW_ACTION_PACKET_REFORMAT:
- mlx5_packet_reformat_dealloc(maction->flow_action_raw.dev->mdev,
- maction->flow_action_raw.pkt_reformat);
- break;
- case MLX5_IB_FLOW_ACTION_DECAP:
- break;
- default:
- break;
- }
-}
-
-static struct ib_flow_action *
-mlx5_ib_create_modify_header(struct mlx5_ib_dev *dev,
- enum mlx5_ib_uapi_flow_table_type ft_type,
- u8 num_actions, void *in)
-{
- enum mlx5_flow_namespace_type namespace;
- struct mlx5_ib_flow_action *maction;
- int ret;
-
- ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
- if (ret)
- return ERR_PTR(-EINVAL);
-
- maction = kzalloc(sizeof(*maction), GFP_KERNEL);
- if (!maction)
- return ERR_PTR(-ENOMEM);
-
- maction->flow_action_raw.modify_hdr =
- mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in);
-
- if (IS_ERR(maction->flow_action_raw.modify_hdr)) {
- ret = PTR_ERR(maction->flow_action_raw.modify_hdr);
- kfree(maction);
- return ERR_PTR(ret);
- }
- maction->flow_action_raw.sub_type =
- MLX5_IB_FLOW_ACTION_MODIFY_HEADER;
- maction->flow_action_raw.dev = dev;
-
- return &maction->ib_action;
-}
-
-static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev)
-{
- return MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
- max_modify_header_actions) ||
- MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev,
- max_modify_header_actions) ||
- MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
- max_modify_header_actions);
-}
-
-static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)(
- struct uverbs_attr_bundle *attrs)
-{
- struct ib_uobject *uobj = uverbs_attr_get_uobject(
- attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE);
- struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
- enum mlx5_ib_uapi_flow_table_type ft_type;
- struct ib_flow_action *action;
- int num_actions;
- void *in;
- int ret;
-
- if (!mlx5_ib_modify_header_supported(mdev))
- return -EOPNOTSUPP;
-
- in = uverbs_attr_get_alloced_ptr(attrs,
- MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM);
-
- num_actions = uverbs_attr_ptr_get_array_size(
- attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
- MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto));
- if (num_actions < 0)
- return num_actions;
-
- ret = uverbs_get_const(&ft_type, attrs,
- MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE);
- if (ret)
- return ret;
- action = mlx5_ib_create_modify_header(mdev, ft_type, num_actions, in);
- if (IS_ERR(action))
- return PTR_ERR(action);
-
- uverbs_flow_action_fill_action(action, uobj, &mdev->ib_dev,
- IB_FLOW_ACTION_UNSPECIFIED);
-
- return 0;
-}
-
-static bool mlx5_ib_flow_action_packet_reformat_valid(struct mlx5_ib_dev *ibdev,
- u8 packet_reformat_type,
- u8 ft_type)
-{
- switch (packet_reformat_type) {
- case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
- if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
- return MLX5_CAP_FLOWTABLE(ibdev->mdev,
- encap_general_header);
- break;
- case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
- if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
- return MLX5_CAP_FLOWTABLE_NIC_TX(ibdev->mdev,
- reformat_l2_to_l3_tunnel);
- break;
- case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
- if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
- return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev,
- reformat_l3_tunnel_to_l2);
- break;
- case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2:
- if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
- return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, decap);
- break;
- default:
- break;
- }
-
- return false;
-}
-
-static int mlx5_ib_dv_to_prm_packet_reforamt_type(u8 dv_prt, u8 *prm_prt)
-{
- switch (dv_prt) {
- case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
- *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL;
- break;
- case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
- *prm_prt = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
- break;
- case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
- *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL;
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int mlx5_ib_flow_action_create_packet_reformat_ctx(
- struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_action *maction,
- u8 ft_type, u8 dv_prt,
- void *in, size_t len)
-{
- enum mlx5_flow_namespace_type namespace;
- u8 prm_prt;
- int ret;
-
- ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
- if (ret)
- return ret;
-
- ret = mlx5_ib_dv_to_prm_packet_reforamt_type(dv_prt, &prm_prt);
- if (ret)
- return ret;
-
- maction->flow_action_raw.pkt_reformat =
- mlx5_packet_reformat_alloc(dev->mdev, prm_prt, len,
- in, namespace);
- if (IS_ERR(maction->flow_action_raw.pkt_reformat)) {
- ret = PTR_ERR(maction->flow_action_raw.pkt_reformat);
- return ret;
- }
-
- maction->flow_action_raw.sub_type =
- MLX5_IB_FLOW_ACTION_PACKET_REFORMAT;
- maction->flow_action_raw.dev = dev;
-
- return 0;
-}
-
-static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)(
- struct uverbs_attr_bundle *attrs)
-{
- struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
- MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE);
- struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
- enum mlx5_ib_uapi_flow_action_packet_reformat_type dv_prt;
- enum mlx5_ib_uapi_flow_table_type ft_type;
- struct mlx5_ib_flow_action *maction;
- int ret;
-
- ret = uverbs_get_const(&ft_type, attrs,
- MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE);
- if (ret)
- return ret;
-
- ret = uverbs_get_const(&dv_prt, attrs,
- MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE);
- if (ret)
- return ret;
-
- if (!mlx5_ib_flow_action_packet_reformat_valid(mdev, dv_prt, ft_type))
- return -EOPNOTSUPP;
-
- maction = kzalloc(sizeof(*maction), GFP_KERNEL);
- if (!maction)
- return -ENOMEM;
-
- if (dv_prt ==
- MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2) {
- maction->flow_action_raw.sub_type =
- MLX5_IB_FLOW_ACTION_DECAP;
- maction->flow_action_raw.dev = mdev;
- } else {
- void *in;
- int len;
-
- in = uverbs_attr_get_alloced_ptr(attrs,
- MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
- if (IS_ERR(in)) {
- ret = PTR_ERR(in);
- goto free_maction;
- }
-
- len = uverbs_attr_get_len(attrs,
- MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
-
- ret = mlx5_ib_flow_action_create_packet_reformat_ctx(mdev,
- maction, ft_type, dv_prt, in, len);
- if (ret)
- goto free_maction;
- }
-
- uverbs_flow_action_fill_action(&maction->ib_action, uobj, &mdev->ib_dev,
- IB_FLOW_ACTION_UNSPECIFIED);
- return 0;
-
-free_maction:
- kfree(maction);
- return ret;
-}
-
-DECLARE_UVERBS_NAMED_METHOD(
- MLX5_IB_METHOD_CREATE_FLOW,
- UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
- UVERBS_OBJECT_FLOW,
- UVERBS_ACCESS_NEW,
- UA_MANDATORY),
- UVERBS_ATTR_PTR_IN(
- MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE,
- UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)),
- UA_MANDATORY,
- UA_ALLOC_AND_COPY),
- UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_MATCHER,
- MLX5_IB_OBJECT_FLOW_MATCHER,
- UVERBS_ACCESS_READ,
- UA_MANDATORY),
- UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_QP,
- UVERBS_OBJECT_QP,
- UVERBS_ACCESS_READ),
- UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX,
- MLX5_IB_OBJECT_DEVX_OBJ,
- UVERBS_ACCESS_READ),
- UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS,
- UVERBS_OBJECT_FLOW_ACTION,
- UVERBS_ACCESS_READ, 1,
- MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS,
- UA_OPTIONAL),
- UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_TAG,
- UVERBS_ATTR_TYPE(u32),
- UA_OPTIONAL),
- UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX,
- MLX5_IB_OBJECT_DEVX_OBJ,
- UVERBS_ACCESS_READ, 1, 1,
- UA_OPTIONAL),
- UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET,
- UVERBS_ATTR_MIN_SIZE(sizeof(u32)),
- UA_OPTIONAL,
- UA_ALLOC_AND_COPY),
- UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_FLAGS,
- enum mlx5_ib_create_flow_flags,
- UA_OPTIONAL));
-
-DECLARE_UVERBS_NAMED_METHOD_DESTROY(
- MLX5_IB_METHOD_DESTROY_FLOW,
- UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
- UVERBS_OBJECT_FLOW,
- UVERBS_ACCESS_DESTROY,
- UA_MANDATORY));
-
-ADD_UVERBS_METHODS(mlx5_ib_fs,
- UVERBS_OBJECT_FLOW,
- &UVERBS_METHOD(MLX5_IB_METHOD_CREATE_FLOW),
- &UVERBS_METHOD(MLX5_IB_METHOD_DESTROY_FLOW));
-
-DECLARE_UVERBS_NAMED_METHOD(
- MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER,
- UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE,
- UVERBS_OBJECT_FLOW_ACTION,
- UVERBS_ACCESS_NEW,
- UA_MANDATORY),
- UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
- UVERBS_ATTR_MIN_SIZE(MLX5_UN_SZ_BYTES(
- set_add_copy_action_in_auto)),
- UA_MANDATORY,
- UA_ALLOC_AND_COPY),
- UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE,
- enum mlx5_ib_uapi_flow_table_type,
- UA_MANDATORY));
-
-DECLARE_UVERBS_NAMED_METHOD(
- MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT,
- UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE,
- UVERBS_OBJECT_FLOW_ACTION,
- UVERBS_ACCESS_NEW,
- UA_MANDATORY),
- UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF,
- UVERBS_ATTR_MIN_SIZE(1),
- UA_ALLOC_AND_COPY,
- UA_OPTIONAL),
- UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE,
- enum mlx5_ib_uapi_flow_action_packet_reformat_type,
- UA_MANDATORY),
- UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE,
- enum mlx5_ib_uapi_flow_table_type,
- UA_MANDATORY));
-
-ADD_UVERBS_METHODS(
- mlx5_ib_flow_actions,
- UVERBS_OBJECT_FLOW_ACTION,
- &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER),
- &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT));
-
-DECLARE_UVERBS_NAMED_METHOD(
- MLX5_IB_METHOD_FLOW_MATCHER_CREATE,
- UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE,
- MLX5_IB_OBJECT_FLOW_MATCHER,
- UVERBS_ACCESS_NEW,
- UA_MANDATORY),
- UVERBS_ATTR_PTR_IN(
- MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK,
- UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)),
- UA_MANDATORY),
- UVERBS_ATTR_ENUM_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE,
- mlx5_ib_flow_type,
- UA_MANDATORY),
- UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA,
- UVERBS_ATTR_TYPE(u8),
- UA_MANDATORY),
- UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
- enum ib_flow_flags,
- UA_OPTIONAL),
- UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE,
- enum mlx5_ib_uapi_flow_table_type,
- UA_OPTIONAL));
-
-DECLARE_UVERBS_NAMED_METHOD_DESTROY(
- MLX5_IB_METHOD_FLOW_MATCHER_DESTROY,
- UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE,
- MLX5_IB_OBJECT_FLOW_MATCHER,
- UVERBS_ACCESS_DESTROY,
- UA_MANDATORY));
-
-DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER,
- UVERBS_TYPE_ALLOC_IDR(flow_matcher_cleanup),
- &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE),
- &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY));
-
-const struct uapi_definition mlx5_ib_flow_defs[] = {
- UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
- MLX5_IB_OBJECT_FLOW_MATCHER),
- UAPI_DEF_CHAIN_OBJ_TREE(
- UVERBS_OBJECT_FLOW,
- &mlx5_ib_fs),
- UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
- &mlx5_ib_flow_actions),
- {},
-};
diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c
new file mode 100644
index 000000000000..e9cfb9a2ef41
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/fs.c
@@ -0,0 +1,2516 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/uverbs_types.h>
+#include <rdma/uverbs_ioctl.h>
+#include <rdma/uverbs_std_types.h>
+#include <rdma/mlx5_user_ioctl_cmds.h>
+#include <rdma/mlx5_user_ioctl_verbs.h>
+#include <rdma/ib_umem.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/fs.h>
+#include <linux/mlx5/fs_helpers.h>
+#include <linux/mlx5/accel.h>
+#include <linux/mlx5/eswitch.h>
+#include "mlx5_ib.h"
+#include "counters.h"
+#include "devx.h"
+#include "fs.h"
+
+#define UVERBS_MODULE_NAME mlx5_ib
+#include <rdma/uverbs_named_ioctl.h>
+
+enum {
+ MATCH_CRITERIA_ENABLE_OUTER_BIT,
+ MATCH_CRITERIA_ENABLE_MISC_BIT,
+ MATCH_CRITERIA_ENABLE_INNER_BIT,
+ MATCH_CRITERIA_ENABLE_MISC2_BIT
+};
+
+#define HEADER_IS_ZERO(match_criteria, headers) \
+ !(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \
+ 0, MLX5_FLD_SZ_BYTES(fte_match_param, headers))) \
+
+static u8 get_match_criteria_enable(u32 *match_criteria)
+{
+ u8 match_criteria_enable;
+
+ match_criteria_enable =
+ (!HEADER_IS_ZERO(match_criteria, outer_headers)) <<
+ MATCH_CRITERIA_ENABLE_OUTER_BIT;
+ match_criteria_enable |=
+ (!HEADER_IS_ZERO(match_criteria, misc_parameters)) <<
+ MATCH_CRITERIA_ENABLE_MISC_BIT;
+ match_criteria_enable |=
+ (!HEADER_IS_ZERO(match_criteria, inner_headers)) <<
+ MATCH_CRITERIA_ENABLE_INNER_BIT;
+ match_criteria_enable |=
+ (!HEADER_IS_ZERO(match_criteria, misc_parameters_2)) <<
+ MATCH_CRITERIA_ENABLE_MISC2_BIT;
+
+ return match_criteria_enable;
+}
+
+static int set_proto(void *outer_c, void *outer_v, u8 mask, u8 val)
+{
+ u8 entry_mask;
+ u8 entry_val;
+ int err = 0;
+
+ if (!mask)
+ goto out;
+
+ entry_mask = MLX5_GET(fte_match_set_lyr_2_4, outer_c,
+ ip_protocol);
+ entry_val = MLX5_GET(fte_match_set_lyr_2_4, outer_v,
+ ip_protocol);
+ if (!entry_mask) {
+ MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val);
+ goto out;
+ }
+ /* Don't override existing ip protocol */
+ if (mask != entry_mask || val != entry_val)
+ err = -EINVAL;
+out:
+ return err;
+}
+
+static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val,
+ bool inner)
+{
+ if (inner) {
+ MLX5_SET(fte_match_set_misc,
+ misc_c, inner_ipv6_flow_label, mask);
+ MLX5_SET(fte_match_set_misc,
+ misc_v, inner_ipv6_flow_label, val);
+ } else {
+ MLX5_SET(fte_match_set_misc,
+ misc_c, outer_ipv6_flow_label, mask);
+ MLX5_SET(fte_match_set_misc,
+ misc_v, outer_ipv6_flow_label, val);
+ }
+}
+
+static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
+{
+ MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2);
+}
+
+static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask)
+{
+ if (MLX5_GET(fte_match_mpls, set_mask, mpls_label) &&
+ !(field_support & MLX5_FIELD_SUPPORT_MPLS_LABEL))
+ return -EOPNOTSUPP;
+
+ if (MLX5_GET(fte_match_mpls, set_mask, mpls_exp) &&
+ !(field_support & MLX5_FIELD_SUPPORT_MPLS_EXP))
+ return -EOPNOTSUPP;
+
+ if (MLX5_GET(fte_match_mpls, set_mask, mpls_s_bos) &&
+ !(field_support & MLX5_FIELD_SUPPORT_MPLS_S_BOS))
+ return -EOPNOTSUPP;
+
+ if (MLX5_GET(fte_match_mpls, set_mask, mpls_ttl) &&
+ !(field_support & MLX5_FIELD_SUPPORT_MPLS_TTL))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+#define LAST_ETH_FIELD vlan_tag
+#define LAST_IB_FIELD sl
+#define LAST_IPV4_FIELD tos
+#define LAST_IPV6_FIELD traffic_class
+#define LAST_TCP_UDP_FIELD src_port
+#define LAST_TUNNEL_FIELD tunnel_id
+#define LAST_FLOW_TAG_FIELD tag_id
+#define LAST_DROP_FIELD size
+#define LAST_COUNTERS_FIELD counters
+
+/* Field is the last supported field */
+#define FIELDS_NOT_SUPPORTED(filter, field)\
+ memchr_inv((void *)&filter.field +\
+ sizeof(filter.field), 0,\
+ sizeof(filter) -\
+ offsetof(typeof(filter), field) -\
+ sizeof(filter.field))
+
+int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
+ bool is_egress,
+ struct mlx5_flow_act *action)
+{
+
+ switch (maction->ib_action.type) {
+ case IB_FLOW_ACTION_ESP:
+ if (action->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+ MLX5_FLOW_CONTEXT_ACTION_DECRYPT))
+ return -EINVAL;
+ /* Currently only AES_GCM keymat is supported by the driver */
+ action->esp_id = (uintptr_t)maction->esp_aes_gcm.ctx;
+ action->action |= is_egress ?
+ MLX5_FLOW_CONTEXT_ACTION_ENCRYPT :
+ MLX5_FLOW_CONTEXT_ACTION_DECRYPT;
+ return 0;
+ case IB_FLOW_ACTION_UNSPECIFIED:
+ if (maction->flow_action_raw.sub_type ==
+ MLX5_IB_FLOW_ACTION_MODIFY_HEADER) {
+ if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
+ return -EINVAL;
+ action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ action->modify_hdr =
+ maction->flow_action_raw.modify_hdr;
+ return 0;
+ }
+ if (maction->flow_action_raw.sub_type ==
+ MLX5_IB_FLOW_ACTION_DECAP) {
+ if (action->action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
+ return -EINVAL;
+ action->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
+ return 0;
+ }
+ if (maction->flow_action_raw.sub_type ==
+ MLX5_IB_FLOW_ACTION_PACKET_REFORMAT) {
+ if (action->action &
+ MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)
+ return -EINVAL;
+ action->action |=
+ MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ action->pkt_reformat =
+ maction->flow_action_raw.pkt_reformat;
+ return 0;
+ }
+ fallthrough;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int parse_flow_attr(struct mlx5_core_dev *mdev,
+ struct mlx5_flow_spec *spec,
+ const union ib_flow_spec *ib_spec,
+ const struct ib_flow_attr *flow_attr,
+ struct mlx5_flow_act *action, u32 prev_type)
+{
+ struct mlx5_flow_context *flow_context = &spec->flow_context;
+ u32 *match_c = spec->match_criteria;
+ u32 *match_v = spec->match_value;
+ void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
+ misc_parameters);
+ void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v,
+ misc_parameters);
+ void *misc_params2_c = MLX5_ADDR_OF(fte_match_param, match_c,
+ misc_parameters_2);
+ void *misc_params2_v = MLX5_ADDR_OF(fte_match_param, match_v,
+ misc_parameters_2);
+ void *headers_c;
+ void *headers_v;
+ int match_ipv;
+ int ret;
+
+ if (ib_spec->type & IB_FLOW_SPEC_INNER) {
+ headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
+ inner_headers);
+ headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
+ inner_headers);
+ match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.inner_ip_version);
+ } else {
+ headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
+ outer_headers);
+ headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
+ outer_headers);
+ match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.outer_ip_version);
+ }
+
+ switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
+ case IB_FLOW_SPEC_ETH:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
+ return -EOPNOTSUPP;
+
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dmac_47_16),
+ ib_spec->eth.mask.dst_mac);
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dmac_47_16),
+ ib_spec->eth.val.dst_mac);
+
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ smac_47_16),
+ ib_spec->eth.mask.src_mac);
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ smac_47_16),
+ ib_spec->eth.val.src_mac);
+
+ if (ib_spec->eth.mask.vlan_tag) {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ cvlan_tag, 1);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ cvlan_tag, 1);
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ first_vid, ntohs(ib_spec->eth.mask.vlan_tag));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ first_vid, ntohs(ib_spec->eth.val.vlan_tag));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ first_cfi,
+ ntohs(ib_spec->eth.mask.vlan_tag) >> 12);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ first_cfi,
+ ntohs(ib_spec->eth.val.vlan_tag) >> 12);
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ first_prio,
+ ntohs(ib_spec->eth.mask.vlan_tag) >> 13);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ first_prio,
+ ntohs(ib_spec->eth.val.vlan_tag) >> 13);
+ }
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ ethertype, ntohs(ib_spec->eth.mask.ether_type));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ ethertype, ntohs(ib_spec->eth.val.ether_type));
+ break;
+ case IB_FLOW_SPEC_IPV4:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
+ return -EOPNOTSUPP;
+
+ if (match_ipv) {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ ip_version, 0xf);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ ip_version, MLX5_FS_IPV4_VERSION);
+ } else {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ ethertype, 0xffff);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ ethertype, ETH_P_IP);
+ }
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &ib_spec->ipv4.mask.src_ip,
+ sizeof(ib_spec->ipv4.mask.src_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &ib_spec->ipv4.val.src_ip,
+ sizeof(ib_spec->ipv4.val.src_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &ib_spec->ipv4.mask.dst_ip,
+ sizeof(ib_spec->ipv4.mask.dst_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &ib_spec->ipv4.val.dst_ip,
+ sizeof(ib_spec->ipv4.val.dst_ip));
+
+ set_tos(headers_c, headers_v,
+ ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos);
+
+ if (set_proto(headers_c, headers_v,
+ ib_spec->ipv4.mask.proto,
+ ib_spec->ipv4.val.proto))
+ return -EINVAL;
+ break;
+ case IB_FLOW_SPEC_IPV6:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
+ return -EOPNOTSUPP;
+
+ if (match_ipv) {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ ip_version, 0xf);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ ip_version, MLX5_FS_IPV6_VERSION);
+ } else {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ ethertype, 0xffff);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ ethertype, ETH_P_IPV6);
+ }
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &ib_spec->ipv6.mask.src_ip,
+ sizeof(ib_spec->ipv6.mask.src_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &ib_spec->ipv6.val.src_ip,
+ sizeof(ib_spec->ipv6.val.src_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &ib_spec->ipv6.mask.dst_ip,
+ sizeof(ib_spec->ipv6.mask.dst_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &ib_spec->ipv6.val.dst_ip,
+ sizeof(ib_spec->ipv6.val.dst_ip));
+
+ set_tos(headers_c, headers_v,
+ ib_spec->ipv6.mask.traffic_class,
+ ib_spec->ipv6.val.traffic_class);
+
+ if (set_proto(headers_c, headers_v,
+ ib_spec->ipv6.mask.next_hdr,
+ ib_spec->ipv6.val.next_hdr))
+ return -EINVAL;
+
+ set_flow_label(misc_params_c, misc_params_v,
+ ntohl(ib_spec->ipv6.mask.flow_label),
+ ntohl(ib_spec->ipv6.val.flow_label),
+ ib_spec->type & IB_FLOW_SPEC_INNER);
+ break;
+ case IB_FLOW_SPEC_ESP:
+ if (ib_spec->esp.mask.seq)
+ return -EOPNOTSUPP;
+
+ MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi,
+ ntohl(ib_spec->esp.mask.spi));
+ MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi,
+ ntohl(ib_spec->esp.val.spi));
+ break;
+ case IB_FLOW_SPEC_TCP:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
+ LAST_TCP_UDP_FIELD))
+ return -EOPNOTSUPP;
+
+ if (set_proto(headers_c, headers_v, 0xff, IPPROTO_TCP))
+ return -EINVAL;
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport,
+ ntohs(ib_spec->tcp_udp.mask.src_port));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport,
+ ntohs(ib_spec->tcp_udp.val.src_port));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport,
+ ntohs(ib_spec->tcp_udp.mask.dst_port));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport,
+ ntohs(ib_spec->tcp_udp.val.dst_port));
+ break;
+ case IB_FLOW_SPEC_UDP:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
+ LAST_TCP_UDP_FIELD))
+ return -EOPNOTSUPP;
+
+ if (set_proto(headers_c, headers_v, 0xff, IPPROTO_UDP))
+ return -EINVAL;
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
+ ntohs(ib_spec->tcp_udp.mask.src_port));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
+ ntohs(ib_spec->tcp_udp.val.src_port));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport,
+ ntohs(ib_spec->tcp_udp.mask.dst_port));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
+ ntohs(ib_spec->tcp_udp.val.dst_port));
+ break;
+ case IB_FLOW_SPEC_GRE:
+ if (ib_spec->gre.mask.c_ks_res0_ver)
+ return -EOPNOTSUPP;
+
+ if (set_proto(headers_c, headers_v, 0xff, IPPROTO_GRE))
+ return -EINVAL;
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
+ 0xff);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
+ IPPROTO_GRE);
+
+ MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol,
+ ntohs(ib_spec->gre.mask.protocol));
+ MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol,
+ ntohs(ib_spec->gre.val.protocol));
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c,
+ gre_key.nvgre.hi),
+ &ib_spec->gre.mask.key,
+ sizeof(ib_spec->gre.mask.key));
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v,
+ gre_key.nvgre.hi),
+ &ib_spec->gre.val.key,
+ sizeof(ib_spec->gre.val.key));
+ break;
+ case IB_FLOW_SPEC_MPLS:
+ switch (prev_type) {
+ case IB_FLOW_SPEC_UDP:
+ if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.outer_first_mpls_over_udp),
+ &ib_spec->mpls.mask.tag))
+ return -EOPNOTSUPP;
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
+ outer_first_mpls_over_udp),
+ &ib_spec->mpls.val.tag,
+ sizeof(ib_spec->mpls.val.tag));
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
+ outer_first_mpls_over_udp),
+ &ib_spec->mpls.mask.tag,
+ sizeof(ib_spec->mpls.mask.tag));
+ break;
+ case IB_FLOW_SPEC_GRE:
+ if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.outer_first_mpls_over_gre),
+ &ib_spec->mpls.mask.tag))
+ return -EOPNOTSUPP;
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
+ outer_first_mpls_over_gre),
+ &ib_spec->mpls.val.tag,
+ sizeof(ib_spec->mpls.val.tag));
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
+ outer_first_mpls_over_gre),
+ &ib_spec->mpls.mask.tag,
+ sizeof(ib_spec->mpls.mask.tag));
+ break;
+ default:
+ if (ib_spec->type & IB_FLOW_SPEC_INNER) {
+ if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.inner_first_mpls),
+ &ib_spec->mpls.mask.tag))
+ return -EOPNOTSUPP;
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
+ inner_first_mpls),
+ &ib_spec->mpls.val.tag,
+ sizeof(ib_spec->mpls.val.tag));
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
+ inner_first_mpls),
+ &ib_spec->mpls.mask.tag,
+ sizeof(ib_spec->mpls.mask.tag));
+ } else {
+ if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.outer_first_mpls),
+ &ib_spec->mpls.mask.tag))
+ return -EOPNOTSUPP;
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
+ outer_first_mpls),
+ &ib_spec->mpls.val.tag,
+ sizeof(ib_spec->mpls.val.tag));
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
+ outer_first_mpls),
+ &ib_spec->mpls.mask.tag,
+ sizeof(ib_spec->mpls.mask.tag));
+ }
+ }
+ break;
+ case IB_FLOW_SPEC_VXLAN_TUNNEL:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask,
+ LAST_TUNNEL_FIELD))
+ return -EOPNOTSUPP;
+
+ MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni,
+ ntohl(ib_spec->tunnel.mask.tunnel_id));
+ MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni,
+ ntohl(ib_spec->tunnel.val.tunnel_id));
+ break;
+ case IB_FLOW_SPEC_ACTION_TAG:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag,
+ LAST_FLOW_TAG_FIELD))
+ return -EOPNOTSUPP;
+ if (ib_spec->flow_tag.tag_id >= BIT(24))
+ return -EINVAL;
+
+ flow_context->flow_tag = ib_spec->flow_tag.tag_id;
+ flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
+ break;
+ case IB_FLOW_SPEC_ACTION_DROP:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->drop,
+ LAST_DROP_FIELD))
+ return -EOPNOTSUPP;
+ action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
+ break;
+ case IB_FLOW_SPEC_ACTION_HANDLE:
+ ret = parse_flow_flow_action(to_mflow_act(ib_spec->action.act),
+ flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS, action);
+ if (ret)
+ return ret;
+ break;
+ case IB_FLOW_SPEC_ACTION_COUNT:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->flow_count,
+ LAST_COUNTERS_FIELD))
+ return -EOPNOTSUPP;
+
+ /* for now support only one counters spec per flow */
+ if (action->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
+ return -EINVAL;
+
+ action->counters = ib_spec->flow_count.counters;
+ action->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* If a flow could catch both multicast and unicast packets,
+ * it won't fall into the multicast flow steering table and this rule
+ * could steal other multicast packets.
+ */
+static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr)
+{
+ union ib_flow_spec *flow_spec;
+
+ if (ib_attr->type != IB_FLOW_ATTR_NORMAL ||
+ ib_attr->num_of_specs < 1)
+ return false;
+
+ flow_spec = (union ib_flow_spec *)(ib_attr + 1);
+ if (flow_spec->type == IB_FLOW_SPEC_IPV4) {
+ struct ib_flow_spec_ipv4 *ipv4_spec;
+
+ ipv4_spec = (struct ib_flow_spec_ipv4 *)flow_spec;
+ if (ipv4_is_multicast(ipv4_spec->val.dst_ip))
+ return true;
+
+ return false;
+ }
+
+ if (flow_spec->type == IB_FLOW_SPEC_ETH) {
+ struct ib_flow_spec_eth *eth_spec;
+
+ eth_spec = (struct ib_flow_spec_eth *)flow_spec;
+ return is_multicast_ether_addr(eth_spec->mask.dst_mac) &&
+ is_multicast_ether_addr(eth_spec->val.dst_mac);
+ }
+
+ return false;
+}
+
+enum valid_spec {
+ VALID_SPEC_INVALID,
+ VALID_SPEC_VALID,
+ VALID_SPEC_NA,
+};
+
+static enum valid_spec
+is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev,
+ const struct mlx5_flow_spec *spec,
+ const struct mlx5_flow_act *flow_act,
+ bool egress)
+{
+ const u32 *match_c = spec->match_criteria;
+ bool is_crypto =
+ (flow_act->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+ MLX5_FLOW_CONTEXT_ACTION_DECRYPT));
+ bool is_ipsec = mlx5_fs_is_ipsec_flow(match_c);
+ bool is_drop = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_DROP;
+
+ /*
+ * Currently only crypto is supported in egress, when regular egress
+ * rules would be supported, always return VALID_SPEC_NA.
+ */
+ if (!is_crypto)
+ return VALID_SPEC_NA;
+
+ return is_crypto && is_ipsec &&
+ (!egress || (!is_drop &&
+ !(spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG))) ?
+ VALID_SPEC_VALID : VALID_SPEC_INVALID;
+}
+
+static bool is_valid_spec(struct mlx5_core_dev *mdev,
+ const struct mlx5_flow_spec *spec,
+ const struct mlx5_flow_act *flow_act,
+ bool egress)
+{
+ /* We curretly only support ipsec egress flow */
+ return is_valid_esp_aes_gcm(mdev, spec, flow_act, egress) != VALID_SPEC_INVALID;
+}
+
+static bool is_valid_ethertype(struct mlx5_core_dev *mdev,
+ const struct ib_flow_attr *flow_attr,
+ bool check_inner)
+{
+ union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1);
+ int match_ipv = check_inner ?
+ MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.inner_ip_version) :
+ MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.outer_ip_version);
+ int inner_bit = check_inner ? IB_FLOW_SPEC_INNER : 0;
+ bool ipv4_spec_valid, ipv6_spec_valid;
+ unsigned int ip_spec_type = 0;
+ bool has_ethertype = false;
+ unsigned int spec_index;
+ bool mask_valid = true;
+ u16 eth_type = 0;
+ bool type_valid;
+
+ /* Validate that ethertype is correct */
+ for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
+ if ((ib_spec->type == (IB_FLOW_SPEC_ETH | inner_bit)) &&
+ ib_spec->eth.mask.ether_type) {
+ mask_valid = (ib_spec->eth.mask.ether_type ==
+ htons(0xffff));
+ has_ethertype = true;
+ eth_type = ntohs(ib_spec->eth.val.ether_type);
+ } else if ((ib_spec->type == (IB_FLOW_SPEC_IPV4 | inner_bit)) ||
+ (ib_spec->type == (IB_FLOW_SPEC_IPV6 | inner_bit))) {
+ ip_spec_type = ib_spec->type;
+ }
+ ib_spec = (void *)ib_spec + ib_spec->size;
+ }
+
+ type_valid = (!has_ethertype) || (!ip_spec_type);
+ if (!type_valid && mask_valid) {
+ ipv4_spec_valid = (eth_type == ETH_P_IP) &&
+ (ip_spec_type == (IB_FLOW_SPEC_IPV4 | inner_bit));
+ ipv6_spec_valid = (eth_type == ETH_P_IPV6) &&
+ (ip_spec_type == (IB_FLOW_SPEC_IPV6 | inner_bit));
+
+ type_valid = (ipv4_spec_valid) || (ipv6_spec_valid) ||
+ (((eth_type == ETH_P_MPLS_UC) ||
+ (eth_type == ETH_P_MPLS_MC)) && match_ipv);
+ }
+
+ return type_valid;
+}
+
+static bool is_valid_attr(struct mlx5_core_dev *mdev,
+ const struct ib_flow_attr *flow_attr)
+{
+ return is_valid_ethertype(mdev, flow_attr, false) &&
+ is_valid_ethertype(mdev, flow_attr, true);
+}
+
+static void put_flow_table(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *prio, bool ft_added)
+{
+ prio->refcount -= !!ft_added;
+ if (!prio->refcount) {
+ mlx5_destroy_flow_table(prio->flow_table);
+ prio->flow_table = NULL;
+ }
+}
+
+static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
+{
+ struct mlx5_ib_flow_handler *handler = container_of(flow_id,
+ struct mlx5_ib_flow_handler,
+ ibflow);
+ struct mlx5_ib_flow_handler *iter, *tmp;
+ struct mlx5_ib_dev *dev = handler->dev;
+
+ mutex_lock(&dev->flow_db->lock);
+
+ list_for_each_entry_safe(iter, tmp, &handler->list, list) {
+ mlx5_del_flow_rules(iter->rule);
+ put_flow_table(dev, iter->prio, true);
+ list_del(&iter->list);
+ kfree(iter);
+ }
+
+ mlx5_del_flow_rules(handler->rule);
+ put_flow_table(dev, handler->prio, true);
+ mlx5_ib_counters_clear_description(handler->ibcounters);
+ mutex_unlock(&dev->flow_db->lock);
+ if (handler->flow_matcher)
+ atomic_dec(&handler->flow_matcher->usecnt);
+ kfree(handler);
+
+ return 0;
+}
+
+static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap)
+{
+ priority *= 2;
+ if (!dont_trap)
+ priority++;
+ return priority;
+}
+
+enum flow_table_type {
+ MLX5_IB_FT_RX,
+ MLX5_IB_FT_TX
+};
+
+#define MLX5_FS_MAX_TYPES 6
+#define MLX5_FS_MAX_ENTRIES BIT(16)
+
+static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns,
+ struct mlx5_ib_flow_prio *prio,
+ int priority,
+ int num_entries, int num_groups,
+ u32 flags)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_table *ft;
+
+ ft_attr.prio = priority;
+ ft_attr.max_fte = num_entries;
+ ft_attr.flags = flags;
+ ft_attr.autogroup.max_num_groups = num_groups;
+ ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft))
+ return ERR_CAST(ft);
+
+ prio->flow_table = ft;
+ prio->refcount = 0;
+ return prio;
+}
+
+static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
+ struct ib_flow_attr *flow_attr,
+ enum flow_table_type ft_type)
+{
+ bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP;
+ struct mlx5_flow_namespace *ns = NULL;
+ struct mlx5_ib_flow_prio *prio;
+ struct mlx5_flow_table *ft;
+ int max_table_size;
+ int num_entries;
+ int num_groups;
+ bool esw_encap;
+ u32 flags = 0;
+ int priority;
+
+ max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ log_max_ft_size));
+ esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
+ DEVLINK_ESWITCH_ENCAP_MODE_NONE;
+ if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
+ enum mlx5_flow_namespace_type fn_type;
+
+ if (flow_is_multicast_only(flow_attr) &&
+ !dont_trap)
+ priority = MLX5_IB_FLOW_MCAST_PRIO;
+ else
+ priority = ib_prio_to_core_prio(flow_attr->priority,
+ dont_trap);
+ if (ft_type == MLX5_IB_FT_RX) {
+ fn_type = MLX5_FLOW_NAMESPACE_BYPASS;
+ prio = &dev->flow_db->prios[priority];
+ if (!dev->is_rep && !esw_encap &&
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap))
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
+ if (!dev->is_rep && !esw_encap &&
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ reformat_l3_tunnel_to_l2))
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ } else {
+ max_table_size =
+ BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev,
+ log_max_ft_size));
+ fn_type = MLX5_FLOW_NAMESPACE_EGRESS;
+ prio = &dev->flow_db->egress_prios[priority];
+ if (!dev->is_rep && !esw_encap &&
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat))
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ }
+ ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
+ num_entries = MLX5_FS_MAX_ENTRIES;
+ num_groups = MLX5_FS_MAX_TYPES;
+ } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
+ flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
+ ns = mlx5_get_flow_namespace(dev->mdev,
+ MLX5_FLOW_NAMESPACE_LEFTOVERS);
+ build_leftovers_ft_param(&priority,
+ &num_entries,
+ &num_groups);
+ prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
+ } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
+ if (!MLX5_CAP_FLOWTABLE(dev->mdev,
+ allow_sniffer_and_nic_rx_shared_tir))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ ns = mlx5_get_flow_namespace(dev->mdev, ft_type == MLX5_IB_FT_RX ?
+ MLX5_FLOW_NAMESPACE_SNIFFER_RX :
+ MLX5_FLOW_NAMESPACE_SNIFFER_TX);
+
+ prio = &dev->flow_db->sniffer[ft_type];
+ priority = 0;
+ num_entries = 1;
+ num_groups = 1;
+ }
+
+ if (!ns)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ max_table_size = min_t(int, num_entries, max_table_size);
+
+ ft = prio->flow_table;
+ if (!ft)
+ return _get_prio(ns, prio, priority, max_table_size, num_groups,
+ flags);
+
+ return prio;
+}
+
+static void set_underlay_qp(struct mlx5_ib_dev *dev,
+ struct mlx5_flow_spec *spec,
+ u32 underlay_qpn)
+{
+ void *misc_params_c = MLX5_ADDR_OF(fte_match_param,
+ spec->match_criteria,
+ misc_parameters);
+ void *misc_params_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+
+ if (underlay_qpn &&
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ ft_field_support.bth_dst_qp)) {
+ MLX5_SET(fte_match_set_misc,
+ misc_params_v, bth_dst_qp, underlay_qpn);
+ MLX5_SET(fte_match_set_misc,
+ misc_params_c, bth_dst_qp, 0xffffff);
+ }
+}
+
+static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+ void *misc;
+
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters_2);
+
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_for_match(esw,
+ rep->vport));
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters_2);
+
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_mask());
+ } else {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+
+ MLX5_SET(fte_match_set_misc, misc, source_port, rep->vport);
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters);
+
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ }
+}
+
+static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_prio,
+ const struct ib_flow_attr *flow_attr,
+ struct mlx5_flow_destination *dst,
+ u32 underlay_qpn,
+ struct mlx5_ib_create_flow *ucmd)
+{
+ struct mlx5_flow_table *ft = ft_prio->flow_table;
+ struct mlx5_ib_flow_handler *handler;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_spec *spec;
+ struct mlx5_flow_destination dest_arr[2] = {};
+ struct mlx5_flow_destination *rule_dst = dest_arr;
+ const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
+ unsigned int spec_index;
+ u32 prev_type = 0;
+ int err = 0;
+ int dest_num = 0;
+ bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
+
+ if (!is_valid_attr(dev->mdev, flow_attr))
+ return ERR_PTR(-EINVAL);
+
+ if (dev->is_rep && is_egress)
+ return ERR_PTR(-EINVAL);
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ handler = kzalloc(sizeof(*handler), GFP_KERNEL);
+ if (!handler || !spec) {
+ err = -ENOMEM;
+ goto free;
+ }
+
+ INIT_LIST_HEAD(&handler->list);
+
+ for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
+ err = parse_flow_attr(dev->mdev, spec,
+ ib_flow, flow_attr, &flow_act,
+ prev_type);
+ if (err < 0)
+ goto free;
+
+ prev_type = ((union ib_flow_spec *)ib_flow)->type;
+ ib_flow += ((union ib_flow_spec *)ib_flow)->size;
+ }
+
+ if (dst && !(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP)) {
+ memcpy(&dest_arr[0], dst, sizeof(*dst));
+ dest_num++;
+ }
+
+ if (!flow_is_multicast_only(flow_attr))
+ set_underlay_qp(dev, spec, underlay_qpn);
+
+ if (dev->is_rep) {
+ struct mlx5_eswitch_rep *rep;
+
+ rep = dev->port[flow_attr->port - 1].rep;
+ if (!rep) {
+ err = -EINVAL;
+ goto free;
+ }
+
+ mlx5_ib_set_rule_source_port(dev, spec, rep);
+ }
+
+ spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
+
+ if (is_egress &&
+ !is_valid_spec(dev->mdev, spec, &flow_act, is_egress)) {
+ err = -EINVAL;
+ goto free;
+ }
+
+ if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ struct mlx5_ib_mcounters *mcounters;
+
+ err = mlx5_ib_flow_counters_set_data(flow_act.counters, ucmd);
+ if (err)
+ goto free;
+
+ mcounters = to_mcounters(flow_act.counters);
+ handler->ibcounters = flow_act.counters;
+ dest_arr[dest_num].type =
+ MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dest_arr[dest_num].counter_id =
+ mlx5_fc_id(mcounters->hw_cntrs_hndl);
+ dest_num++;
+ }
+
+ if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) {
+ if (!dest_num)
+ rule_dst = NULL;
+ } else {
+ if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)
+ flow_act.action |=
+ MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
+ if (is_egress)
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+ else if (dest_num)
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ }
+
+ if ((spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG) &&
+ (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
+ flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
+ mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
+ spec->flow_context.flow_tag, flow_attr->type);
+ err = -EINVAL;
+ goto free;
+ }
+ handler->rule = mlx5_add_flow_rules(ft, spec,
+ &flow_act,
+ rule_dst, dest_num);
+
+ if (IS_ERR(handler->rule)) {
+ err = PTR_ERR(handler->rule);
+ goto free;
+ }
+
+ ft_prio->refcount++;
+ handler->prio = ft_prio;
+ handler->dev = dev;
+
+ ft_prio->flow_table = ft;
+free:
+ if (err && handler) {
+ mlx5_ib_counters_clear_description(handler->ibcounters);
+ kfree(handler);
+ }
+ kvfree(spec);
+ return err ? ERR_PTR(err) : handler;
+}
+
+static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_prio,
+ const struct ib_flow_attr *flow_attr,
+ struct mlx5_flow_destination *dst)
+{
+ return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL);
+}
+
+enum {
+ LEFTOVERS_MC,
+ LEFTOVERS_UC,
+};
+
+static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_prio,
+ struct ib_flow_attr *flow_attr,
+ struct mlx5_flow_destination *dst)
+{
+ struct mlx5_ib_flow_handler *handler_ucast = NULL;
+ struct mlx5_ib_flow_handler *handler = NULL;
+
+ static struct {
+ struct ib_flow_attr flow_attr;
+ struct ib_flow_spec_eth eth_flow;
+ } leftovers_specs[] = {
+ [LEFTOVERS_MC] = {
+ .flow_attr = {
+ .num_of_specs = 1,
+ .size = sizeof(leftovers_specs[0])
+ },
+ .eth_flow = {
+ .type = IB_FLOW_SPEC_ETH,
+ .size = sizeof(struct ib_flow_spec_eth),
+ .mask = {.dst_mac = {0x1} },
+ .val = {.dst_mac = {0x1} }
+ }
+ },
+ [LEFTOVERS_UC] = {
+ .flow_attr = {
+ .num_of_specs = 1,
+ .size = sizeof(leftovers_specs[0])
+ },
+ .eth_flow = {
+ .type = IB_FLOW_SPEC_ETH,
+ .size = sizeof(struct ib_flow_spec_eth),
+ .mask = {.dst_mac = {0x1} },
+ .val = {.dst_mac = {} }
+ }
+ }
+ };
+
+ handler = create_flow_rule(dev, ft_prio,
+ &leftovers_specs[LEFTOVERS_MC].flow_attr,
+ dst);
+ if (!IS_ERR(handler) &&
+ flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) {
+ handler_ucast = create_flow_rule(dev, ft_prio,
+ &leftovers_specs[LEFTOVERS_UC].flow_attr,
+ dst);
+ if (IS_ERR(handler_ucast)) {
+ mlx5_del_flow_rules(handler->rule);
+ ft_prio->refcount--;
+ kfree(handler);
+ handler = handler_ucast;
+ } else {
+ list_add(&handler_ucast->list, &handler->list);
+ }
+ }
+
+ return handler;
+}
+
+static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_rx,
+ struct mlx5_ib_flow_prio *ft_tx,
+ struct mlx5_flow_destination *dst)
+{
+ struct mlx5_ib_flow_handler *handler_rx;
+ struct mlx5_ib_flow_handler *handler_tx;
+ int err;
+ static const struct ib_flow_attr flow_attr = {
+ .num_of_specs = 0,
+ .size = sizeof(flow_attr)
+ };
+
+ handler_rx = create_flow_rule(dev, ft_rx, &flow_attr, dst);
+ if (IS_ERR(handler_rx)) {
+ err = PTR_ERR(handler_rx);
+ goto err;
+ }
+
+ handler_tx = create_flow_rule(dev, ft_tx, &flow_attr, dst);
+ if (IS_ERR(handler_tx)) {
+ err = PTR_ERR(handler_tx);
+ goto err_tx;
+ }
+
+ list_add(&handler_tx->list, &handler_rx->list);
+
+ return handler_rx;
+
+err_tx:
+ mlx5_del_flow_rules(handler_rx->rule);
+ ft_rx->refcount--;
+ kfree(handler_rx);
+err:
+ return ERR_PTR(err);
+}
+
+
+static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
+ struct ib_flow_attr *flow_attr,
+ int domain,
+ struct ib_udata *udata)
+{
+ struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ struct mlx5_ib_qp *mqp = to_mqp(qp);
+ struct mlx5_ib_flow_handler *handler = NULL;
+ struct mlx5_flow_destination *dst = NULL;
+ struct mlx5_ib_flow_prio *ft_prio_tx = NULL;
+ struct mlx5_ib_flow_prio *ft_prio;
+ bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
+ struct mlx5_ib_create_flow *ucmd = NULL, ucmd_hdr;
+ size_t min_ucmd_sz, required_ucmd_sz;
+ int err;
+ int underlay_qpn;
+
+ if (udata && udata->inlen) {
+ min_ucmd_sz = offsetof(typeof(ucmd_hdr), reserved) +
+ sizeof(ucmd_hdr.reserved);
+ if (udata->inlen < min_ucmd_sz)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ err = ib_copy_from_udata(&ucmd_hdr, udata, min_ucmd_sz);
+ if (err)
+ return ERR_PTR(err);
+
+ /* currently supports only one counters data */
+ if (ucmd_hdr.ncounters_data > 1)
+ return ERR_PTR(-EINVAL);
+
+ required_ucmd_sz = min_ucmd_sz +
+ sizeof(struct mlx5_ib_flow_counters_data) *
+ ucmd_hdr.ncounters_data;
+ if (udata->inlen > required_ucmd_sz &&
+ !ib_is_udata_cleared(udata, required_ucmd_sz,
+ udata->inlen - required_ucmd_sz))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ ucmd = kzalloc(required_ucmd_sz, GFP_KERNEL);
+ if (!ucmd)
+ return ERR_PTR(-ENOMEM);
+
+ err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz);
+ if (err)
+ goto free_ucmd;
+ }
+
+ if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) {
+ err = -ENOMEM;
+ goto free_ucmd;
+ }
+
+ if (domain != IB_FLOW_DOMAIN_USER ||
+ flow_attr->port > dev->num_ports ||
+ (flow_attr->flags & ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP |
+ IB_FLOW_ATTR_FLAGS_EGRESS))) {
+ err = -EINVAL;
+ goto free_ucmd;
+ }
+
+ if (is_egress &&
+ (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
+ flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
+ err = -EINVAL;
+ goto free_ucmd;
+ }
+
+ dst = kzalloc(sizeof(*dst), GFP_KERNEL);
+ if (!dst) {
+ err = -ENOMEM;
+ goto free_ucmd;
+ }
+
+ mutex_lock(&dev->flow_db->lock);
+
+ ft_prio = get_flow_table(dev, flow_attr,
+ is_egress ? MLX5_IB_FT_TX : MLX5_IB_FT_RX);
+ if (IS_ERR(ft_prio)) {
+ err = PTR_ERR(ft_prio);
+ goto unlock;
+ }
+ if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
+ ft_prio_tx = get_flow_table(dev, flow_attr, MLX5_IB_FT_TX);
+ if (IS_ERR(ft_prio_tx)) {
+ err = PTR_ERR(ft_prio_tx);
+ ft_prio_tx = NULL;
+ goto destroy_ft;
+ }
+ }
+
+ if (is_egress) {
+ dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT;
+ } else {
+ dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ if (mqp->is_rss)
+ dst->tir_num = mqp->rss_qp.tirn;
+ else
+ dst->tir_num = mqp->raw_packet_qp.rq.tirn;
+ }
+
+ if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
+ underlay_qpn = (mqp->flags & IB_QP_CREATE_SOURCE_QPN) ?
+ mqp->underlay_qpn :
+ 0;
+ handler = _create_flow_rule(dev, ft_prio, flow_attr, dst,
+ underlay_qpn, ucmd);
+ } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
+ flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
+ handler = create_leftovers_rule(dev, ft_prio, flow_attr,
+ dst);
+ } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
+ handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst);
+ } else {
+ err = -EINVAL;
+ goto destroy_ft;
+ }
+
+ if (IS_ERR(handler)) {
+ err = PTR_ERR(handler);
+ handler = NULL;
+ goto destroy_ft;
+ }
+
+ mutex_unlock(&dev->flow_db->lock);
+ kfree(dst);
+ kfree(ucmd);
+
+ return &handler->ibflow;
+
+destroy_ft:
+ put_flow_table(dev, ft_prio, false);
+ if (ft_prio_tx)
+ put_flow_table(dev, ft_prio_tx, false);
+unlock:
+ mutex_unlock(&dev->flow_db->lock);
+ kfree(dst);
+free_ucmd:
+ kfree(ucmd);
+ return ERR_PTR(err);
+}
+
+static struct mlx5_ib_flow_prio *
+_get_flow_table(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_matcher *fs_matcher,
+ bool mcast)
+{
+ struct mlx5_flow_namespace *ns = NULL;
+ struct mlx5_ib_flow_prio *prio = NULL;
+ int max_table_size = 0;
+ bool esw_encap;
+ u32 flags = 0;
+ int priority;
+
+ if (mcast)
+ priority = MLX5_IB_FLOW_MCAST_PRIO;
+ else
+ priority = ib_prio_to_core_prio(fs_matcher->priority, false);
+
+ esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
+ DEVLINK_ESWITCH_ENCAP_MODE_NONE;
+ if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) {
+ max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ log_max_ft_size));
+ if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap) && !esw_encap)
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
+ if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ reformat_l3_tunnel_to_l2) &&
+ !esw_encap)
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) {
+ max_table_size = BIT(
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size));
+ if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) && !esw_encap)
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) {
+ max_table_size = BIT(
+ MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size));
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, decap) && esw_encap)
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, reformat_l3_tunnel_to_l2) &&
+ esw_encap)
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ priority = FDB_BYPASS_PATH;
+ } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) {
+ max_table_size =
+ BIT(MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
+ log_max_ft_size));
+ priority = fs_matcher->priority;
+ } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) {
+ max_table_size =
+ BIT(MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
+ log_max_ft_size));
+ priority = fs_matcher->priority;
+ }
+
+ max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES);
+
+ ns = mlx5_get_flow_namespace(dev->mdev, fs_matcher->ns_type);
+ if (!ns)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS)
+ prio = &dev->flow_db->prios[priority];
+ else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS)
+ prio = &dev->flow_db->egress_prios[priority];
+ else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB)
+ prio = &dev->flow_db->fdb;
+ else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX)
+ prio = &dev->flow_db->rdma_rx[priority];
+ else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX)
+ prio = &dev->flow_db->rdma_tx[priority];
+
+ if (!prio)
+ return ERR_PTR(-EINVAL);
+
+ if (prio->flow_table)
+ return prio;
+
+ return _get_prio(ns, prio, priority, max_table_size,
+ MLX5_FS_MAX_TYPES, flags);
+}
+
+static struct mlx5_ib_flow_handler *
+_create_raw_flow_rule(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_prio,
+ struct mlx5_flow_destination *dst,
+ struct mlx5_ib_flow_matcher *fs_matcher,
+ struct mlx5_flow_context *flow_context,
+ struct mlx5_flow_act *flow_act,
+ void *cmd_in, int inlen,
+ int dst_num)
+{
+ struct mlx5_ib_flow_handler *handler;
+ struct mlx5_flow_spec *spec;
+ struct mlx5_flow_table *ft = ft_prio->flow_table;
+ int err = 0;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ handler = kzalloc(sizeof(*handler), GFP_KERNEL);
+ if (!handler || !spec) {
+ err = -ENOMEM;
+ goto free;
+ }
+
+ INIT_LIST_HEAD(&handler->list);
+
+ memcpy(spec->match_value, cmd_in, inlen);
+ memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params,
+ fs_matcher->mask_len);
+ spec->match_criteria_enable = fs_matcher->match_criteria_enable;
+ spec->flow_context = *flow_context;
+
+ handler->rule = mlx5_add_flow_rules(ft, spec,
+ flow_act, dst, dst_num);
+
+ if (IS_ERR(handler->rule)) {
+ err = PTR_ERR(handler->rule);
+ goto free;
+ }
+
+ ft_prio->refcount++;
+ handler->prio = ft_prio;
+ handler->dev = dev;
+ ft_prio->flow_table = ft;
+
+free:
+ if (err)
+ kfree(handler);
+ kvfree(spec);
+ return err ? ERR_PTR(err) : handler;
+}
+
+static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher,
+ void *match_v)
+{
+ void *match_c;
+ void *match_v_set_lyr_2_4, *match_c_set_lyr_2_4;
+ void *dmac, *dmac_mask;
+ void *ipv4, *ipv4_mask;
+
+ if (!(fs_matcher->match_criteria_enable &
+ (1 << MATCH_CRITERIA_ENABLE_OUTER_BIT)))
+ return false;
+
+ match_c = fs_matcher->matcher_mask.match_params;
+ match_v_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_v,
+ outer_headers);
+ match_c_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_c,
+ outer_headers);
+
+ dmac = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
+ dmac_47_16);
+ dmac_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
+ dmac_47_16);
+
+ if (is_multicast_ether_addr(dmac) &&
+ is_multicast_ether_addr(dmac_mask))
+ return true;
+
+ ipv4 = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+
+ ipv4_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+
+ if (ipv4_is_multicast(*(__be32 *)(ipv4)) &&
+ ipv4_is_multicast(*(__be32 *)(ipv4_mask)))
+ return true;
+
+ return false;
+}
+
+static struct mlx5_ib_flow_handler *raw_fs_rule_add(
+ struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher,
+ struct mlx5_flow_context *flow_context, struct mlx5_flow_act *flow_act,
+ u32 counter_id, void *cmd_in, int inlen, int dest_id, int dest_type)
+{
+ struct mlx5_flow_destination *dst;
+ struct mlx5_ib_flow_prio *ft_prio;
+ struct mlx5_ib_flow_handler *handler;
+ int dst_num = 0;
+ bool mcast;
+ int err;
+
+ if (fs_matcher->flow_type != MLX5_IB_FLOW_TYPE_NORMAL)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO)
+ return ERR_PTR(-ENOMEM);
+
+ dst = kcalloc(2, sizeof(*dst), GFP_KERNEL);
+ if (!dst)
+ return ERR_PTR(-ENOMEM);
+
+ mcast = raw_fs_is_multicast(fs_matcher, cmd_in);
+ mutex_lock(&dev->flow_db->lock);
+
+ ft_prio = _get_flow_table(dev, fs_matcher, mcast);
+ if (IS_ERR(ft_prio)) {
+ err = PTR_ERR(ft_prio);
+ goto unlock;
+ }
+
+ if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) {
+ dst[dst_num].type = dest_type;
+ dst[dst_num++].tir_num = dest_id;
+ flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) {
+ dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
+ dst[dst_num++].ft_num = dest_id;
+ flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_PORT) {
+ dst[dst_num++].type = MLX5_FLOW_DESTINATION_TYPE_PORT;
+ flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+ }
+
+
+ if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dst[dst_num].counter_id = counter_id;
+ dst_num++;
+ }
+
+ handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher,
+ flow_context, flow_act,
+ cmd_in, inlen, dst_num);
+
+ if (IS_ERR(handler)) {
+ err = PTR_ERR(handler);
+ goto destroy_ft;
+ }
+
+ mutex_unlock(&dev->flow_db->lock);
+ atomic_inc(&fs_matcher->usecnt);
+ handler->flow_matcher = fs_matcher;
+
+ kfree(dst);
+
+ return handler;
+
+destroy_ft:
+ put_flow_table(dev, ft_prio, false);
+unlock:
+ mutex_unlock(&dev->flow_db->lock);
+ kfree(dst);
+
+ return ERR_PTR(err);
+}
+
+static u32 mlx5_ib_flow_action_flags_to_accel_xfrm_flags(u32 mlx5_flags)
+{
+ u32 flags = 0;
+
+ if (mlx5_flags & MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA)
+ flags |= MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA;
+
+ return flags;
+}
+
+#define MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED \
+ MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA
+static struct ib_flow_action *
+mlx5_ib_create_flow_action_esp(struct ib_device *device,
+ const struct ib_flow_action_attrs_esp *attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_dev *mdev = to_mdev(device);
+ struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm;
+ struct mlx5_accel_esp_xfrm_attrs accel_attrs = {};
+ struct mlx5_ib_flow_action *action;
+ u64 action_flags;
+ u64 flags;
+ int err = 0;
+
+ err = uverbs_get_flags64(
+ &action_flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
+ ((MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1) - 1));
+ if (err)
+ return ERR_PTR(err);
+
+ flags = mlx5_ib_flow_action_flags_to_accel_xfrm_flags(action_flags);
+
+ /* We current only support a subset of the standard features. Only a
+ * keymat of type AES_GCM, with icv_len == 16, iv_algo == SEQ and esn
+ * (with overlap). Full offload mode isn't supported.
+ */
+ if (!attr->keymat || attr->replay || attr->encap ||
+ attr->spi || attr->seq || attr->tfc_pad ||
+ attr->hard_limit_pkts ||
+ (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
+ IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT)))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (attr->keymat->protocol !=
+ IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ aes_gcm = &attr->keymat->keymat.aes_gcm;
+
+ if (aes_gcm->icv_len != 16 ||
+ aes_gcm->iv_algo != IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ action = kmalloc(sizeof(*action), GFP_KERNEL);
+ if (!action)
+ return ERR_PTR(-ENOMEM);
+
+ action->esp_aes_gcm.ib_flags = attr->flags;
+ memcpy(&accel_attrs.keymat.aes_gcm.aes_key, &aes_gcm->aes_key,
+ sizeof(accel_attrs.keymat.aes_gcm.aes_key));
+ accel_attrs.keymat.aes_gcm.key_len = aes_gcm->key_len * 8;
+ memcpy(&accel_attrs.keymat.aes_gcm.salt, &aes_gcm->salt,
+ sizeof(accel_attrs.keymat.aes_gcm.salt));
+ memcpy(&accel_attrs.keymat.aes_gcm.seq_iv, &aes_gcm->iv,
+ sizeof(accel_attrs.keymat.aes_gcm.seq_iv));
+ accel_attrs.keymat.aes_gcm.icv_len = aes_gcm->icv_len * 8;
+ accel_attrs.keymat.aes_gcm.iv_algo = MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ;
+ accel_attrs.keymat_type = MLX5_ACCEL_ESP_KEYMAT_AES_GCM;
+
+ accel_attrs.esn = attr->esn;
+ if (attr->flags & IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED)
+ accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED;
+ if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)
+ accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
+
+ if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT)
+ accel_attrs.action |= MLX5_ACCEL_ESP_ACTION_ENCRYPT;
+
+ action->esp_aes_gcm.ctx =
+ mlx5_accel_esp_create_xfrm(mdev->mdev, &accel_attrs, flags);
+ if (IS_ERR(action->esp_aes_gcm.ctx)) {
+ err = PTR_ERR(action->esp_aes_gcm.ctx);
+ goto err_parse;
+ }
+
+ action->esp_aes_gcm.ib_flags = attr->flags;
+
+ return &action->ib_action;
+
+err_parse:
+ kfree(action);
+ return ERR_PTR(err);
+}
+
+static int
+mlx5_ib_modify_flow_action_esp(struct ib_flow_action *action,
+ const struct ib_flow_action_attrs_esp *attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_flow_action *maction = to_mflow_act(action);
+ struct mlx5_accel_esp_xfrm_attrs accel_attrs;
+ int err = 0;
+
+ if (attr->keymat || attr->replay || attr->encap ||
+ attr->spi || attr->seq || attr->tfc_pad ||
+ attr->hard_limit_pkts ||
+ (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
+ IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS |
+ IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)))
+ return -EOPNOTSUPP;
+
+ /* Only the ESN value or the MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP can
+ * be modified.
+ */
+ if (!(maction->esp_aes_gcm.ib_flags &
+ IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) &&
+ attr->flags & (IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
+ IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW))
+ return -EINVAL;
+
+ memcpy(&accel_attrs, &maction->esp_aes_gcm.ctx->attrs,
+ sizeof(accel_attrs));
+
+ accel_attrs.esn = attr->esn;
+ if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)
+ accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
+ else
+ accel_attrs.flags &= ~MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
+
+ err = mlx5_accel_esp_modify_xfrm(maction->esp_aes_gcm.ctx,
+ &accel_attrs);
+ if (err)
+ return err;
+
+ maction->esp_aes_gcm.ib_flags &=
+ ~IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW;
+ maction->esp_aes_gcm.ib_flags |=
+ attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW;
+
+ return 0;
+}
+
+static void destroy_flow_action_raw(struct mlx5_ib_flow_action *maction)
+{
+ switch (maction->flow_action_raw.sub_type) {
+ case MLX5_IB_FLOW_ACTION_MODIFY_HEADER:
+ mlx5_modify_header_dealloc(maction->flow_action_raw.dev->mdev,
+ maction->flow_action_raw.modify_hdr);
+ break;
+ case MLX5_IB_FLOW_ACTION_PACKET_REFORMAT:
+ mlx5_packet_reformat_dealloc(maction->flow_action_raw.dev->mdev,
+ maction->flow_action_raw.pkt_reformat);
+ break;
+ case MLX5_IB_FLOW_ACTION_DECAP:
+ break;
+ default:
+ break;
+ }
+}
+
+static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action)
+{
+ struct mlx5_ib_flow_action *maction = to_mflow_act(action);
+
+ switch (action->type) {
+ case IB_FLOW_ACTION_ESP:
+ /*
+ * We only support aes_gcm by now, so we implicitly know this is
+ * the underline crypto.
+ */
+ mlx5_accel_esp_destroy_xfrm(maction->esp_aes_gcm.ctx);
+ break;
+ case IB_FLOW_ACTION_UNSPECIFIED:
+ destroy_flow_action_raw(maction);
+ break;
+ default:
+ WARN_ON(true);
+ break;
+ }
+
+ kfree(maction);
+ return 0;
+}
+
+static int
+mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type,
+ enum mlx5_flow_namespace_type *namespace)
+{
+ switch (table_type) {
+ case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX:
+ *namespace = MLX5_FLOW_NAMESPACE_BYPASS;
+ break;
+ case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX:
+ *namespace = MLX5_FLOW_NAMESPACE_EGRESS;
+ break;
+ case MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB:
+ *namespace = MLX5_FLOW_NAMESPACE_FDB;
+ break;
+ case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX:
+ *namespace = MLX5_FLOW_NAMESPACE_RDMA_RX;
+ break;
+ case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX:
+ *namespace = MLX5_FLOW_NAMESPACE_RDMA_TX;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static const struct uverbs_attr_spec mlx5_ib_flow_type[] = {
+ [MLX5_IB_FLOW_TYPE_NORMAL] = {
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ .u.ptr = {
+ .len = sizeof(u16), /* data is priority */
+ .min_len = sizeof(u16),
+ }
+ },
+ [MLX5_IB_FLOW_TYPE_SNIFFER] = {
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ UVERBS_ATTR_NO_DATA(),
+ },
+ [MLX5_IB_FLOW_TYPE_ALL_DEFAULT] = {
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ UVERBS_ATTR_NO_DATA(),
+ },
+ [MLX5_IB_FLOW_TYPE_MC_DEFAULT] = {
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ UVERBS_ATTR_NO_DATA(),
+ },
+};
+
+static bool is_flow_dest(void *obj, int *dest_id, int *dest_type)
+{
+ struct devx_obj *devx_obj = obj;
+ u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
+
+ switch (opcode) {
+ case MLX5_CMD_OP_DESTROY_TIR:
+ *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ *dest_id = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox,
+ obj_id);
+ return true;
+
+ case MLX5_CMD_OP_DESTROY_FLOW_TABLE:
+ *dest_type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ *dest_id = MLX5_GET(destroy_flow_table_in, devx_obj->dinbox,
+ table_id);
+ return true;
+ default:
+ return false;
+ }
+}
+
+static int get_dests(struct uverbs_attr_bundle *attrs,
+ struct mlx5_ib_flow_matcher *fs_matcher, int *dest_id,
+ int *dest_type, struct ib_qp **qp, u32 *flags)
+{
+ bool dest_devx, dest_qp;
+ void *devx_obj;
+ int err;
+
+ dest_devx = uverbs_attr_is_valid(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
+ dest_qp = uverbs_attr_is_valid(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
+
+ *flags = 0;
+ err = uverbs_get_flags32(flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_FLAGS,
+ MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS |
+ MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP);
+ if (err)
+ return err;
+
+ /* Both flags are not allowed */
+ if (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS &&
+ *flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)
+ return -EINVAL;
+
+ if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) {
+ if (dest_devx && (dest_qp || *flags))
+ return -EINVAL;
+ else if (dest_qp && *flags)
+ return -EINVAL;
+ }
+
+ /* Allow only DEVX object, drop as dest for FDB */
+ if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB && !(dest_devx ||
+ (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)))
+ return -EINVAL;
+
+ /* Allow only DEVX object or QP as dest when inserting to RDMA_RX */
+ if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
+ ((!dest_devx && !dest_qp) || (dest_devx && dest_qp)))
+ return -EINVAL;
+
+ *qp = NULL;
+ if (dest_devx) {
+ devx_obj =
+ uverbs_attr_get_obj(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
+
+ /* Verify that the given DEVX object is a flow
+ * steering destination.
+ */
+ if (!is_flow_dest(devx_obj, dest_id, dest_type))
+ return -EINVAL;
+ /* Allow only flow table as dest when inserting to FDB or RDMA_RX */
+ if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB ||
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
+ *dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
+ return -EINVAL;
+ } else if (dest_qp) {
+ struct mlx5_ib_qp *mqp;
+
+ *qp = uverbs_attr_get_obj(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
+ if (IS_ERR(*qp))
+ return PTR_ERR(*qp);
+
+ if ((*qp)->qp_type != IB_QPT_RAW_PACKET)
+ return -EINVAL;
+
+ mqp = to_mqp(*qp);
+ if (mqp->is_rss)
+ *dest_id = mqp->rss_qp.tirn;
+ else
+ *dest_id = mqp->raw_packet_qp.rq.tirn;
+ *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) {
+ *dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT;
+ }
+
+ if (*dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
+ (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX))
+ return -EINVAL;
+
+ return 0;
+}
+
+static bool is_flow_counter(void *obj, u32 offset, u32 *counter_id)
+{
+ struct devx_obj *devx_obj = obj;
+ u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
+
+ if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) {
+
+ if (offset && offset >= devx_obj->flow_counter_bulk_size)
+ return false;
+
+ *counter_id = MLX5_GET(dealloc_flow_counter_in,
+ devx_obj->dinbox,
+ flow_counter_id);
+ *counter_id += offset;
+ return true;
+ }
+
+ return false;
+}
+
+#define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2
+static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_flow_context flow_context = {.flow_tag =
+ MLX5_FS_DEFAULT_FLOW_TAG};
+ u32 *offset_attr, offset = 0, counter_id = 0;
+ int dest_id, dest_type = -1, inlen, len, ret, i;
+ struct mlx5_ib_flow_handler *flow_handler;
+ struct mlx5_ib_flow_matcher *fs_matcher;
+ struct ib_uobject **arr_flow_actions;
+ struct ib_uflow_resources *uflow_res;
+ struct mlx5_flow_act flow_act = {};
+ struct ib_qp *qp = NULL;
+ void *devx_obj, *cmd_in;
+ struct ib_uobject *uobj;
+ struct mlx5_ib_dev *dev;
+ u32 flags;
+
+ if (!capable(CAP_NET_RAW))
+ return -EPERM;
+
+ fs_matcher = uverbs_attr_get_obj(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_MATCHER);
+ uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE);
+ dev = mlx5_udata_to_mdev(&attrs->driver_udata);
+
+ if (get_dests(attrs, fs_matcher, &dest_id, &dest_type, &qp, &flags))
+ return -EINVAL;
+
+ if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS)
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS;
+
+ if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
+
+ len = uverbs_attr_get_uobjs_arr(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, &arr_flow_actions);
+ if (len) {
+ devx_obj = arr_flow_actions[0]->object;
+
+ if (uverbs_attr_is_valid(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET)) {
+
+ int num_offsets = uverbs_attr_ptr_get_array_size(
+ attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET,
+ sizeof(u32));
+
+ if (num_offsets != 1)
+ return -EINVAL;
+
+ offset_attr = uverbs_attr_get_alloced_ptr(
+ attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET);
+ offset = *offset_attr;
+ }
+
+ if (!is_flow_counter(devx_obj, offset, &counter_id))
+ return -EINVAL;
+
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ }
+
+ cmd_in = uverbs_attr_get_alloced_ptr(
+ attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
+ inlen = uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
+
+ uflow_res = flow_resources_alloc(MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS);
+ if (!uflow_res)
+ return -ENOMEM;
+
+ len = uverbs_attr_get_uobjs_arr(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, &arr_flow_actions);
+ for (i = 0; i < len; i++) {
+ struct mlx5_ib_flow_action *maction =
+ to_mflow_act(arr_flow_actions[i]->object);
+
+ ret = parse_flow_flow_action(maction, false, &flow_act);
+ if (ret)
+ goto err_out;
+ flow_resources_add(uflow_res, IB_FLOW_SPEC_ACTION_HANDLE,
+ arr_flow_actions[i]->object);
+ }
+
+ ret = uverbs_copy_from(&flow_context.flow_tag, attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_TAG);
+ if (!ret) {
+ if (flow_context.flow_tag >= BIT(24)) {
+ ret = -EINVAL;
+ goto err_out;
+ }
+ flow_context.flags |= FLOW_CONTEXT_HAS_TAG;
+ }
+
+ flow_handler =
+ raw_fs_rule_add(dev, fs_matcher, &flow_context, &flow_act,
+ counter_id, cmd_in, inlen, dest_id, dest_type);
+ if (IS_ERR(flow_handler)) {
+ ret = PTR_ERR(flow_handler);
+ goto err_out;
+ }
+
+ ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev, uflow_res);
+
+ return 0;
+err_out:
+ ib_uverbs_flow_resources_free(uflow_res);
+ return ret;
+}
+
+static int flow_matcher_cleanup(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_flow_matcher *obj = uobject->object;
+ int ret;
+
+ ret = ib_destroy_usecnt(&obj->usecnt, why, uobject);
+ if (ret)
+ return ret;
+
+ kfree(obj);
+ return 0;
+}
+
+static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs,
+ struct mlx5_ib_flow_matcher *obj)
+{
+ enum mlx5_ib_uapi_flow_table_type ft_type =
+ MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX;
+ u32 flags;
+ int err;
+
+ /* New users should use MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE and older
+ * users should switch to it. We leave this to not break userspace
+ */
+ if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE) &&
+ uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS))
+ return -EINVAL;
+
+ if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE)) {
+ err = uverbs_get_const(&ft_type, attrs,
+ MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE);
+ if (err)
+ return err;
+
+ err = mlx5_ib_ft_type_to_namespace(ft_type, &obj->ns_type);
+ if (err)
+ return err;
+
+ return 0;
+ }
+
+ if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS)) {
+ err = uverbs_get_flags32(&flags, attrs,
+ MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
+ IB_FLOW_ATTR_FLAGS_EGRESS);
+ if (err)
+ return err;
+
+ if (flags) {
+ mlx5_ib_ft_type_to_namespace(
+ MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX,
+ &obj->ns_type);
+ return 0;
+ }
+ }
+
+ obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS;
+
+ return 0;
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE);
+ struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata);
+ struct mlx5_ib_flow_matcher *obj;
+ int err;
+
+ obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL);
+ if (!obj)
+ return -ENOMEM;
+
+ obj->mask_len = uverbs_attr_get_len(
+ attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
+ err = uverbs_copy_from(&obj->matcher_mask,
+ attrs,
+ MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
+ if (err)
+ goto end;
+
+ obj->flow_type = uverbs_attr_get_enum_id(
+ attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE);
+
+ if (obj->flow_type == MLX5_IB_FLOW_TYPE_NORMAL) {
+ err = uverbs_copy_from(&obj->priority,
+ attrs,
+ MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE);
+ if (err)
+ goto end;
+ }
+
+ err = uverbs_copy_from(&obj->match_criteria_enable,
+ attrs,
+ MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA);
+ if (err)
+ goto end;
+
+ err = mlx5_ib_matcher_ns(attrs, obj);
+ if (err)
+ goto end;
+
+ uobj->object = obj;
+ obj->mdev = dev->mdev;
+ atomic_set(&obj->usecnt, 0);
+ return 0;
+
+end:
+ kfree(obj);
+ return err;
+}
+
+static struct ib_flow_action *
+mlx5_ib_create_modify_header(struct mlx5_ib_dev *dev,
+ enum mlx5_ib_uapi_flow_table_type ft_type,
+ u8 num_actions, void *in)
+{
+ enum mlx5_flow_namespace_type namespace;
+ struct mlx5_ib_flow_action *maction;
+ int ret;
+
+ ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
+ if (ret)
+ return ERR_PTR(-EINVAL);
+
+ maction = kzalloc(sizeof(*maction), GFP_KERNEL);
+ if (!maction)
+ return ERR_PTR(-ENOMEM);
+
+ maction->flow_action_raw.modify_hdr =
+ mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in);
+
+ if (IS_ERR(maction->flow_action_raw.modify_hdr)) {
+ ret = PTR_ERR(maction->flow_action_raw.modify_hdr);
+ kfree(maction);
+ return ERR_PTR(ret);
+ }
+ maction->flow_action_raw.sub_type =
+ MLX5_IB_FLOW_ACTION_MODIFY_HEADER;
+ maction->flow_action_raw.dev = dev;
+
+ return &maction->ib_action;
+}
+
+static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev)
+{
+ return MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ max_modify_header_actions) ||
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev,
+ max_modify_header_actions) ||
+ MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
+ max_modify_header_actions);
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE);
+ struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
+ enum mlx5_ib_uapi_flow_table_type ft_type;
+ struct ib_flow_action *action;
+ int num_actions;
+ void *in;
+ int ret;
+
+ if (!mlx5_ib_modify_header_supported(mdev))
+ return -EOPNOTSUPP;
+
+ in = uverbs_attr_get_alloced_ptr(attrs,
+ MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM);
+
+ num_actions = uverbs_attr_ptr_get_array_size(
+ attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
+ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto));
+ if (num_actions < 0)
+ return num_actions;
+
+ ret = uverbs_get_const(&ft_type, attrs,
+ MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE);
+ if (ret)
+ return ret;
+ action = mlx5_ib_create_modify_header(mdev, ft_type, num_actions, in);
+ if (IS_ERR(action))
+ return PTR_ERR(action);
+
+ uverbs_flow_action_fill_action(action, uobj, &mdev->ib_dev,
+ IB_FLOW_ACTION_UNSPECIFIED);
+
+ return 0;
+}
+
+static bool mlx5_ib_flow_action_packet_reformat_valid(struct mlx5_ib_dev *ibdev,
+ u8 packet_reformat_type,
+ u8 ft_type)
+{
+ switch (packet_reformat_type) {
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
+ if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
+ return MLX5_CAP_FLOWTABLE(ibdev->mdev,
+ encap_general_header);
+ break;
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
+ if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
+ return MLX5_CAP_FLOWTABLE_NIC_TX(ibdev->mdev,
+ reformat_l2_to_l3_tunnel);
+ break;
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
+ if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
+ return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev,
+ reformat_l3_tunnel_to_l2);
+ break;
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2:
+ if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
+ return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, decap);
+ break;
+ default:
+ break;
+ }
+
+ return false;
+}
+
+static int mlx5_ib_dv_to_prm_packet_reforamt_type(u8 dv_prt, u8 *prm_prt)
+{
+ switch (dv_prt) {
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
+ *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL;
+ break;
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
+ *prm_prt = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
+ break;
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
+ *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int mlx5_ib_flow_action_create_packet_reformat_ctx(
+ struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_action *maction,
+ u8 ft_type, u8 dv_prt,
+ void *in, size_t len)
+{
+ enum mlx5_flow_namespace_type namespace;
+ u8 prm_prt;
+ int ret;
+
+ ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
+ if (ret)
+ return ret;
+
+ ret = mlx5_ib_dv_to_prm_packet_reforamt_type(dv_prt, &prm_prt);
+ if (ret)
+ return ret;
+
+ maction->flow_action_raw.pkt_reformat =
+ mlx5_packet_reformat_alloc(dev->mdev, prm_prt, len,
+ in, namespace);
+ if (IS_ERR(maction->flow_action_raw.pkt_reformat)) {
+ ret = PTR_ERR(maction->flow_action_raw.pkt_reformat);
+ return ret;
+ }
+
+ maction->flow_action_raw.sub_type =
+ MLX5_IB_FLOW_ACTION_PACKET_REFORMAT;
+ maction->flow_action_raw.dev = dev;
+
+ return 0;
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE);
+ struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
+ enum mlx5_ib_uapi_flow_action_packet_reformat_type dv_prt;
+ enum mlx5_ib_uapi_flow_table_type ft_type;
+ struct mlx5_ib_flow_action *maction;
+ int ret;
+
+ ret = uverbs_get_const(&ft_type, attrs,
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE);
+ if (ret)
+ return ret;
+
+ ret = uverbs_get_const(&dv_prt, attrs,
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE);
+ if (ret)
+ return ret;
+
+ if (!mlx5_ib_flow_action_packet_reformat_valid(mdev, dv_prt, ft_type))
+ return -EOPNOTSUPP;
+
+ maction = kzalloc(sizeof(*maction), GFP_KERNEL);
+ if (!maction)
+ return -ENOMEM;
+
+ if (dv_prt ==
+ MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2) {
+ maction->flow_action_raw.sub_type =
+ MLX5_IB_FLOW_ACTION_DECAP;
+ maction->flow_action_raw.dev = mdev;
+ } else {
+ void *in;
+ int len;
+
+ in = uverbs_attr_get_alloced_ptr(attrs,
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
+ if (IS_ERR(in)) {
+ ret = PTR_ERR(in);
+ goto free_maction;
+ }
+
+ len = uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
+
+ ret = mlx5_ib_flow_action_create_packet_reformat_ctx(mdev,
+ maction, ft_type, dv_prt, in, len);
+ if (ret)
+ goto free_maction;
+ }
+
+ uverbs_flow_action_fill_action(&maction->ib_action, uobj, &mdev->ib_dev,
+ IB_FLOW_ACTION_UNSPECIFIED);
+ return 0;
+
+free_maction:
+ kfree(maction);
+ return ret;
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_CREATE_FLOW,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
+ UVERBS_OBJECT_FLOW,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(
+ MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE,
+ UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)),
+ UA_MANDATORY,
+ UA_ALLOC_AND_COPY),
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_MATCHER,
+ MLX5_IB_OBJECT_FLOW_MATCHER,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_QP,
+ UVERBS_OBJECT_QP,
+ UVERBS_ACCESS_READ),
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX,
+ MLX5_IB_OBJECT_DEVX_OBJ,
+ UVERBS_ACCESS_READ),
+ UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS,
+ UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_ACCESS_READ, 1,
+ MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_TAG,
+ UVERBS_ATTR_TYPE(u32),
+ UA_OPTIONAL),
+ UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX,
+ MLX5_IB_OBJECT_DEVX_OBJ,
+ UVERBS_ACCESS_READ, 1, 1,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET,
+ UVERBS_ATTR_MIN_SIZE(sizeof(u32)),
+ UA_OPTIONAL,
+ UA_ALLOC_AND_COPY),
+ UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_FLAGS,
+ enum mlx5_ib_create_flow_flags,
+ UA_OPTIONAL));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ MLX5_IB_METHOD_DESTROY_FLOW,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
+ UVERBS_OBJECT_FLOW,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+ADD_UVERBS_METHODS(mlx5_ib_fs,
+ UVERBS_OBJECT_FLOW,
+ &UVERBS_METHOD(MLX5_IB_METHOD_CREATE_FLOW),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DESTROY_FLOW));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE,
+ UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
+ UVERBS_ATTR_MIN_SIZE(MLX5_UN_SZ_BYTES(
+ set_add_copy_action_in_auto)),
+ UA_MANDATORY,
+ UA_ALLOC_AND_COPY),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE,
+ enum mlx5_ib_uapi_flow_table_type,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE,
+ UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF,
+ UVERBS_ATTR_MIN_SIZE(1),
+ UA_ALLOC_AND_COPY,
+ UA_OPTIONAL),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE,
+ enum mlx5_ib_uapi_flow_action_packet_reformat_type,
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE,
+ enum mlx5_ib_uapi_flow_table_type,
+ UA_MANDATORY));
+
+ADD_UVERBS_METHODS(
+ mlx5_ib_flow_actions,
+ UVERBS_OBJECT_FLOW_ACTION,
+ &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER),
+ &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_FLOW_MATCHER_CREATE,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE,
+ MLX5_IB_OBJECT_FLOW_MATCHER,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(
+ MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK,
+ UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)),
+ UA_MANDATORY),
+ UVERBS_ATTR_ENUM_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE,
+ mlx5_ib_flow_type,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA,
+ UVERBS_ATTR_TYPE(u8),
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
+ enum ib_flow_flags,
+ UA_OPTIONAL),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE,
+ enum mlx5_ib_uapi_flow_table_type,
+ UA_OPTIONAL));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ MLX5_IB_METHOD_FLOW_MATCHER_DESTROY,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE,
+ MLX5_IB_OBJECT_FLOW_MATCHER,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER,
+ UVERBS_TYPE_ALLOC_IDR(flow_matcher_cleanup),
+ &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE),
+ &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY));
+
+const struct uapi_definition mlx5_ib_flow_defs[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ MLX5_IB_OBJECT_FLOW_MATCHER),
+ UAPI_DEF_CHAIN_OBJ_TREE(
+ UVERBS_OBJECT_FLOW,
+ &mlx5_ib_fs),
+ UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
+ &mlx5_ib_flow_actions),
+ {},
+};
+
+static const struct ib_device_ops flow_ops = {
+ .create_flow = mlx5_ib_create_flow,
+ .destroy_flow = mlx5_ib_destroy_flow,
+ .destroy_flow_action = mlx5_ib_destroy_flow_action,
+};
+
+static const struct ib_device_ops flow_ipsec_ops = {
+ .create_flow_action_esp = mlx5_ib_create_flow_action_esp,
+ .modify_flow_action_esp = mlx5_ib_modify_flow_action_esp,
+};
+
+int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
+{
+ dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL);
+
+ if (!dev->flow_db)
+ return -ENOMEM;
+
+ mutex_init(&dev->flow_db->lock);
+
+ ib_set_device_ops(&dev->ib_dev, &flow_ops);
+ if (mlx5_accel_ipsec_device_caps(dev->mdev) &
+ MLX5_ACCEL_IPSEC_CAP_DEVICE)
+ ib_set_device_ops(&dev->ib_dev, &flow_ipsec_ops);
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mlx5/fs.h b/drivers/infiniband/hw/mlx5/fs.h
new file mode 100644
index 000000000000..ad320adaf321
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/fs.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#ifndef _MLX5_IB_FS_H
+#define _MLX5_IB_FS_H
+
+#include "mlx5_ib.h"
+
+#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
+int mlx5_ib_fs_init(struct mlx5_ib_dev *dev);
+#else
+static inline int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
+{
+ dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL);
+
+ if (!dev->flow_db)
+ return -ENOMEM;
+
+ mutex_init(&dev->flow_db->lock);
+ return 0;
+}
+#endif
+static inline void mlx5_ib_fs_cleanup(struct mlx5_ib_dev *dev)
+{
+ kfree(dev->flow_db);
+}
+#endif /* _MLX5_IB_FS_H */
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 343a8b8361e7..fbc45a5e76c5 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1,33 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
- * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
*/
#include <linux/debugfs.h>
@@ -59,10 +32,13 @@
#include "mlx5_ib.h"
#include "ib_rep.h"
#include "cmd.h"
+#include "devx.h"
+#include "fs.h"
#include "srq.h"
#include "qp.h"
#include "wr.h"
-#include <linux/mlx5/fs_helpers.h>
+#include "restrack.h"
+#include "counters.h"
#include <linux/mlx5/accel.h>
#include <rdma/uverbs_std_types.h>
#include <rdma/mlx5_user_ioctl_verbs.h>
@@ -311,9 +287,6 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *ibdev,
*native_port_num = 1;
port = &ibdev->port[ib_port_num - 1];
- if (!port)
- return NULL;
-
spin_lock(&port->mp.mpi_lock);
mpi = ibdev->port[ib_port_num - 1].mp.mpi;
if (mpi && !mpi->unaffiliate) {
@@ -511,7 +484,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
mdev_port_num);
if (err)
goto out;
- ext = MLX5_CAP_PCAM_FEATURE(dev->mdev, ptys_extended_ethernet);
+ ext = !!MLX5_GET_ETH_PROTO(ptys_reg, out, true, eth_proto_capability);
eth_prot_oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper);
props->active_width = IB_WIDTH_4X;
@@ -1765,6 +1738,92 @@ static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn,
mlx5_ib_disable_lb(dev, true, false);
}
+static int set_ucontext_resp(struct ib_ucontext *uctx,
+ struct mlx5_ib_alloc_ucontext_resp *resp)
+{
+ struct ib_device *ibdev = uctx->device;
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ struct mlx5_ib_ucontext *context = to_mucontext(uctx);
+ struct mlx5_bfreg_info *bfregi = &context->bfregi;
+ int err;
+
+ if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) {
+ err = mlx5_cmd_dump_fill_mkey(dev->mdev,
+ &resp->dump_fill_mkey);
+ if (err)
+ return err;
+ resp->comp_mask |=
+ MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY;
+ }
+
+ resp->qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
+ if (dev->wc_support)
+ resp->bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev,
+ log_bf_reg_size);
+ resp->cache_line_size = cache_line_size();
+ resp->max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq);
+ resp->max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq);
+ resp->max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
+ resp->max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
+ resp->max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
+ resp->cqe_version = context->cqe_version;
+ resp->log_uar_size = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
+ MLX5_ADAPTER_PAGE_SHIFT : PAGE_SHIFT;
+ resp->num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
+ MLX5_CAP_GEN(dev->mdev,
+ num_of_uars_per_page) : 1;
+
+ if (mlx5_accel_ipsec_device_caps(dev->mdev) &
+ MLX5_ACCEL_IPSEC_CAP_DEVICE) {
+ if (mlx5_get_flow_namespace(dev->mdev,
+ MLX5_FLOW_NAMESPACE_EGRESS))
+ resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM;
+ if (mlx5_accel_ipsec_device_caps(dev->mdev) &
+ MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA)
+ resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_REQ_METADATA;
+ if (MLX5_CAP_FLOWTABLE(dev->mdev, flow_table_properties_nic_receive.ft_field_support.outer_esp_spi))
+ resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_SPI_STEERING;
+ if (mlx5_accel_ipsec_device_caps(dev->mdev) &
+ MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN)
+ resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_TX_IV_IS_ESN;
+ /* MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_FULL_OFFLOAD is currently always 0 */
+ }
+
+ resp->tot_bfregs = bfregi->lib_uar_dyn ? 0 :
+ bfregi->total_num_bfregs - bfregi->num_dyn_bfregs;
+ resp->num_ports = dev->num_ports;
+ resp->cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE |
+ MLX5_USER_CMDS_SUPP_UHW_CREATE_AH;
+
+ if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) {
+ mlx5_query_min_inline(dev->mdev, &resp->eth_min_inline);
+ resp->eth_min_inline++;
+ }
+
+ if (dev->mdev->clock_info)
+ resp->clock_info_versions = BIT(MLX5_IB_CLOCK_INFO_V1);
+
+ /*
+ * We don't want to expose information from the PCI bar that is located
+ * after 4096 bytes, so if the arch only supports larger pages, let's
+ * pretend we don't support reading the HCA's core clock. This is also
+ * forced by mmap function.
+ */
+ if (PAGE_SIZE <= 4096) {
+ resp->comp_mask |=
+ MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET;
+ resp->hca_core_clock_offset =
+ offsetof(struct mlx5_init_seg,
+ internal_timer_h) % PAGE_SIZE;
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, ece_support))
+ resp->comp_mask |= MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_ECE;
+
+ resp->num_dyn_bfregs = bfregi->num_dyn_bfregs;
+ return 0;
+}
+
static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
struct ib_udata *udata)
{
@@ -1772,14 +1831,12 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_ib_alloc_ucontext_req_v2 req = {};
struct mlx5_ib_alloc_ucontext_resp resp = {};
- struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_ib_ucontext *context = to_mucontext(uctx);
struct mlx5_bfreg_info *bfregi;
int ver;
int err;
size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2,
max_cqe_version);
- u32 dump_fill_mkey;
bool lib_uar_4k;
bool lib_uar_dyn;
@@ -1808,37 +1865,6 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
if (req.num_low_latency_bfregs > req.total_num_bfregs - 1)
return -EINVAL;
- resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
- if (dev->wc_support)
- resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
- resp.cache_line_size = cache_line_size();
- resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq);
- resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq);
- resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
- resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
- resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
- resp.cqe_version = min_t(__u8,
- (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version),
- req.max_cqe_version);
- resp.log_uar_size = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
- MLX5_ADAPTER_PAGE_SHIFT : PAGE_SHIFT;
- resp.num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
- MLX5_CAP_GEN(dev->mdev, num_of_uars_per_page) : 1;
- resp.response_length = min(offsetof(typeof(resp), response_length) +
- sizeof(resp.response_length), udata->outlen);
-
- if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_DEVICE) {
- if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_EGRESS))
- resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM;
- if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA)
- resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_REQ_METADATA;
- if (MLX5_CAP_FLOWTABLE(dev->mdev, flow_table_properties_nic_receive.ft_field_support.outer_esp_spi))
- resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_SPI_STEERING;
- if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN)
- resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_TX_IV_IS_ESN;
- /* MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_FULL_OFFLOAD is currently always 0 */
- }
-
lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR;
lib_uar_dyn = req.lib_caps & MLX5_LIB_CAP_DYN_UAR;
bfregi = &context->bfregi;
@@ -1887,87 +1913,24 @@ uar_done:
if (err)
goto out_devx;
- if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) {
- err = mlx5_cmd_dump_fill_mkey(dev->mdev, &dump_fill_mkey);
- if (err)
- goto out_mdev;
- }
-
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
- resp.tot_bfregs = lib_uar_dyn ? 0 : req.total_num_bfregs;
- resp.num_ports = dev->num_ports;
-
- if (offsetofend(typeof(resp), cqe_version) <= udata->outlen)
- resp.response_length += sizeof(resp.cqe_version);
-
- if (offsetofend(typeof(resp), cmds_supp_uhw) <= udata->outlen) {
- resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE |
- MLX5_USER_CMDS_SUPP_UHW_CREATE_AH;
- resp.response_length += sizeof(resp.cmds_supp_uhw);
- }
-
- if (offsetofend(typeof(resp), eth_min_inline) <= udata->outlen) {
- if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) {
- mlx5_query_min_inline(dev->mdev, &resp.eth_min_inline);
- resp.eth_min_inline++;
- }
- resp.response_length += sizeof(resp.eth_min_inline);
- }
-
- if (offsetofend(typeof(resp), clock_info_versions) <= udata->outlen) {
- if (mdev->clock_info)
- resp.clock_info_versions = BIT(MLX5_IB_CLOCK_INFO_V1);
- resp.response_length += sizeof(resp.clock_info_versions);
- }
-
- /*
- * We don't want to expose information from the PCI bar that is located
- * after 4096 bytes, so if the arch only supports larger pages, let's
- * pretend we don't support reading the HCA's core clock. This is also
- * forced by mmap function.
- */
- if (offsetofend(typeof(resp), hca_core_clock_offset) <= udata->outlen) {
- if (PAGE_SIZE <= 4096) {
- resp.comp_mask |=
- MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET;
- resp.hca_core_clock_offset =
- offsetof(struct mlx5_init_seg, internal_timer_h) % PAGE_SIZE;
- }
- resp.response_length += sizeof(resp.hca_core_clock_offset);
- }
-
- if (offsetofend(typeof(resp), log_uar_size) <= udata->outlen)
- resp.response_length += sizeof(resp.log_uar_size);
-
- if (offsetofend(typeof(resp), num_uars_per_page) <= udata->outlen)
- resp.response_length += sizeof(resp.num_uars_per_page);
-
- if (offsetofend(typeof(resp), num_dyn_bfregs) <= udata->outlen) {
- resp.num_dyn_bfregs = bfregi->num_dyn_bfregs;
- resp.response_length += sizeof(resp.num_dyn_bfregs);
- }
-
- if (offsetofend(typeof(resp), dump_fill_mkey) <= udata->outlen) {
- if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) {
- resp.dump_fill_mkey = dump_fill_mkey;
- resp.comp_mask |=
- MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY;
- }
- resp.response_length += sizeof(resp.dump_fill_mkey);
- }
+ context->cqe_version = min_t(__u8,
+ (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version),
+ req.max_cqe_version);
- if (MLX5_CAP_GEN(dev->mdev, ece_support))
- resp.comp_mask |= MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_ECE;
+ err = set_ucontext_resp(uctx, &resp);
+ if (err)
+ goto out_mdev;
+ resp.response_length = min(udata->outlen, sizeof(resp));
err = ib_copy_to_udata(udata, &resp, resp.response_length);
if (err)
goto out_mdev;
bfregi->ver = ver;
bfregi->num_low_latency_bfregs = req.num_low_latency_bfregs;
- context->cqe_version = resp.cqe_version;
context->lib_caps = req.lib_caps;
print_lib_caps(dev, context->lib_caps);
@@ -2000,6 +1963,29 @@ out_ctx:
return err;
}
+static int mlx5_ib_query_ucontext(struct ib_ucontext *ibcontext,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_alloc_ucontext_resp uctx_resp = {};
+ int ret;
+
+ ret = set_ucontext_resp(ibcontext, &uctx_resp);
+ if (ret)
+ return ret;
+
+ uctx_resp.response_length =
+ min_t(size_t,
+ uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX),
+ sizeof(uctx_resp));
+
+ ret = uverbs_copy_to_struct_or_zero(attrs,
+ MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX,
+ &uctx_resp,
+ sizeof(uctx_resp));
+ return ret;
+}
+
static void mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
{
struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
@@ -2591,1820 +2577,6 @@ static void mlx5_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
mlx5_cmd_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid);
}
-enum {
- MATCH_CRITERIA_ENABLE_OUTER_BIT,
- MATCH_CRITERIA_ENABLE_MISC_BIT,
- MATCH_CRITERIA_ENABLE_INNER_BIT,
- MATCH_CRITERIA_ENABLE_MISC2_BIT
-};
-
-#define HEADER_IS_ZERO(match_criteria, headers) \
- !(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \
- 0, MLX5_FLD_SZ_BYTES(fte_match_param, headers))) \
-
-static u8 get_match_criteria_enable(u32 *match_criteria)
-{
- u8 match_criteria_enable;
-
- match_criteria_enable =
- (!HEADER_IS_ZERO(match_criteria, outer_headers)) <<
- MATCH_CRITERIA_ENABLE_OUTER_BIT;
- match_criteria_enable |=
- (!HEADER_IS_ZERO(match_criteria, misc_parameters)) <<
- MATCH_CRITERIA_ENABLE_MISC_BIT;
- match_criteria_enable |=
- (!HEADER_IS_ZERO(match_criteria, inner_headers)) <<
- MATCH_CRITERIA_ENABLE_INNER_BIT;
- match_criteria_enable |=
- (!HEADER_IS_ZERO(match_criteria, misc_parameters_2)) <<
- MATCH_CRITERIA_ENABLE_MISC2_BIT;
-
- return match_criteria_enable;
-}
-
-static int set_proto(void *outer_c, void *outer_v, u8 mask, u8 val)
-{
- u8 entry_mask;
- u8 entry_val;
- int err = 0;
-
- if (!mask)
- goto out;
-
- entry_mask = MLX5_GET(fte_match_set_lyr_2_4, outer_c,
- ip_protocol);
- entry_val = MLX5_GET(fte_match_set_lyr_2_4, outer_v,
- ip_protocol);
- if (!entry_mask) {
- MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask);
- MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val);
- goto out;
- }
- /* Don't override existing ip protocol */
- if (mask != entry_mask || val != entry_val)
- err = -EINVAL;
-out:
- return err;
-}
-
-static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val,
- bool inner)
-{
- if (inner) {
- MLX5_SET(fte_match_set_misc,
- misc_c, inner_ipv6_flow_label, mask);
- MLX5_SET(fte_match_set_misc,
- misc_v, inner_ipv6_flow_label, val);
- } else {
- MLX5_SET(fte_match_set_misc,
- misc_c, outer_ipv6_flow_label, mask);
- MLX5_SET(fte_match_set_misc,
- misc_v, outer_ipv6_flow_label, val);
- }
-}
-
-static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
-{
- MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask);
- MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val);
- MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2);
- MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2);
-}
-
-static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask)
-{
- if (MLX5_GET(fte_match_mpls, set_mask, mpls_label) &&
- !(field_support & MLX5_FIELD_SUPPORT_MPLS_LABEL))
- return -EOPNOTSUPP;
-
- if (MLX5_GET(fte_match_mpls, set_mask, mpls_exp) &&
- !(field_support & MLX5_FIELD_SUPPORT_MPLS_EXP))
- return -EOPNOTSUPP;
-
- if (MLX5_GET(fte_match_mpls, set_mask, mpls_s_bos) &&
- !(field_support & MLX5_FIELD_SUPPORT_MPLS_S_BOS))
- return -EOPNOTSUPP;
-
- if (MLX5_GET(fte_match_mpls, set_mask, mpls_ttl) &&
- !(field_support & MLX5_FIELD_SUPPORT_MPLS_TTL))
- return -EOPNOTSUPP;
-
- return 0;
-}
-
-#define LAST_ETH_FIELD vlan_tag
-#define LAST_IB_FIELD sl
-#define LAST_IPV4_FIELD tos
-#define LAST_IPV6_FIELD traffic_class
-#define LAST_TCP_UDP_FIELD src_port
-#define LAST_TUNNEL_FIELD tunnel_id
-#define LAST_FLOW_TAG_FIELD tag_id
-#define LAST_DROP_FIELD size
-#define LAST_COUNTERS_FIELD counters
-
-/* Field is the last supported field */
-#define FIELDS_NOT_SUPPORTED(filter, field)\
- memchr_inv((void *)&filter.field +\
- sizeof(filter.field), 0,\
- sizeof(filter) -\
- offsetof(typeof(filter), field) -\
- sizeof(filter.field))
-
-int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
- bool is_egress,
- struct mlx5_flow_act *action)
-{
-
- switch (maction->ib_action.type) {
- case IB_FLOW_ACTION_ESP:
- if (action->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
- MLX5_FLOW_CONTEXT_ACTION_DECRYPT))
- return -EINVAL;
- /* Currently only AES_GCM keymat is supported by the driver */
- action->esp_id = (uintptr_t)maction->esp_aes_gcm.ctx;
- action->action |= is_egress ?
- MLX5_FLOW_CONTEXT_ACTION_ENCRYPT :
- MLX5_FLOW_CONTEXT_ACTION_DECRYPT;
- return 0;
- case IB_FLOW_ACTION_UNSPECIFIED:
- if (maction->flow_action_raw.sub_type ==
- MLX5_IB_FLOW_ACTION_MODIFY_HEADER) {
- if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
- return -EINVAL;
- action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
- action->modify_hdr =
- maction->flow_action_raw.modify_hdr;
- return 0;
- }
- if (maction->flow_action_raw.sub_type ==
- MLX5_IB_FLOW_ACTION_DECAP) {
- if (action->action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
- return -EINVAL;
- action->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
- return 0;
- }
- if (maction->flow_action_raw.sub_type ==
- MLX5_IB_FLOW_ACTION_PACKET_REFORMAT) {
- if (action->action &
- MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)
- return -EINVAL;
- action->action |=
- MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
- action->pkt_reformat =
- maction->flow_action_raw.pkt_reformat;
- return 0;
- }
- /* fall through */
- default:
- return -EOPNOTSUPP;
- }
-}
-
-static int parse_flow_attr(struct mlx5_core_dev *mdev,
- struct mlx5_flow_spec *spec,
- const union ib_flow_spec *ib_spec,
- const struct ib_flow_attr *flow_attr,
- struct mlx5_flow_act *action, u32 prev_type)
-{
- struct mlx5_flow_context *flow_context = &spec->flow_context;
- u32 *match_c = spec->match_criteria;
- u32 *match_v = spec->match_value;
- void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
- misc_parameters);
- void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v,
- misc_parameters);
- void *misc_params2_c = MLX5_ADDR_OF(fte_match_param, match_c,
- misc_parameters_2);
- void *misc_params2_v = MLX5_ADDR_OF(fte_match_param, match_v,
- misc_parameters_2);
- void *headers_c;
- void *headers_v;
- int match_ipv;
- int ret;
-
- if (ib_spec->type & IB_FLOW_SPEC_INNER) {
- headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
- inner_headers);
- headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
- inner_headers);
- match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
- ft_field_support.inner_ip_version);
- } else {
- headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
- outer_headers);
- headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
- outer_headers);
- match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
- ft_field_support.outer_ip_version);
- }
-
- switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
- case IB_FLOW_SPEC_ETH:
- if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
- return -EOPNOTSUPP;
-
- ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
- dmac_47_16),
- ib_spec->eth.mask.dst_mac);
- ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- dmac_47_16),
- ib_spec->eth.val.dst_mac);
-
- ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
- smac_47_16),
- ib_spec->eth.mask.src_mac);
- ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- smac_47_16),
- ib_spec->eth.val.src_mac);
-
- if (ib_spec->eth.mask.vlan_tag) {
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- cvlan_tag, 1);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- cvlan_tag, 1);
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- first_vid, ntohs(ib_spec->eth.mask.vlan_tag));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- first_vid, ntohs(ib_spec->eth.val.vlan_tag));
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- first_cfi,
- ntohs(ib_spec->eth.mask.vlan_tag) >> 12);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- first_cfi,
- ntohs(ib_spec->eth.val.vlan_tag) >> 12);
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- first_prio,
- ntohs(ib_spec->eth.mask.vlan_tag) >> 13);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- first_prio,
- ntohs(ib_spec->eth.val.vlan_tag) >> 13);
- }
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- ethertype, ntohs(ib_spec->eth.mask.ether_type));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- ethertype, ntohs(ib_spec->eth.val.ether_type));
- break;
- case IB_FLOW_SPEC_IPV4:
- if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
- return -EOPNOTSUPP;
-
- if (match_ipv) {
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- ip_version, 0xf);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- ip_version, MLX5_FS_IPV4_VERSION);
- } else {
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- ethertype, 0xffff);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- ethertype, ETH_P_IP);
- }
-
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
- src_ipv4_src_ipv6.ipv4_layout.ipv4),
- &ib_spec->ipv4.mask.src_ip,
- sizeof(ib_spec->ipv4.mask.src_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- src_ipv4_src_ipv6.ipv4_layout.ipv4),
- &ib_spec->ipv4.val.src_ip,
- sizeof(ib_spec->ipv4.val.src_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
- dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
- &ib_spec->ipv4.mask.dst_ip,
- sizeof(ib_spec->ipv4.mask.dst_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
- &ib_spec->ipv4.val.dst_ip,
- sizeof(ib_spec->ipv4.val.dst_ip));
-
- set_tos(headers_c, headers_v,
- ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos);
-
- if (set_proto(headers_c, headers_v,
- ib_spec->ipv4.mask.proto,
- ib_spec->ipv4.val.proto))
- return -EINVAL;
- break;
- case IB_FLOW_SPEC_IPV6:
- if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
- return -EOPNOTSUPP;
-
- if (match_ipv) {
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- ip_version, 0xf);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- ip_version, MLX5_FS_IPV6_VERSION);
- } else {
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- ethertype, 0xffff);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- ethertype, ETH_P_IPV6);
- }
-
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
- src_ipv4_src_ipv6.ipv6_layout.ipv6),
- &ib_spec->ipv6.mask.src_ip,
- sizeof(ib_spec->ipv6.mask.src_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- src_ipv4_src_ipv6.ipv6_layout.ipv6),
- &ib_spec->ipv6.val.src_ip,
- sizeof(ib_spec->ipv6.val.src_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
- dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
- &ib_spec->ipv6.mask.dst_ip,
- sizeof(ib_spec->ipv6.mask.dst_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
- &ib_spec->ipv6.val.dst_ip,
- sizeof(ib_spec->ipv6.val.dst_ip));
-
- set_tos(headers_c, headers_v,
- ib_spec->ipv6.mask.traffic_class,
- ib_spec->ipv6.val.traffic_class);
-
- if (set_proto(headers_c, headers_v,
- ib_spec->ipv6.mask.next_hdr,
- ib_spec->ipv6.val.next_hdr))
- return -EINVAL;
-
- set_flow_label(misc_params_c, misc_params_v,
- ntohl(ib_spec->ipv6.mask.flow_label),
- ntohl(ib_spec->ipv6.val.flow_label),
- ib_spec->type & IB_FLOW_SPEC_INNER);
- break;
- case IB_FLOW_SPEC_ESP:
- if (ib_spec->esp.mask.seq)
- return -EOPNOTSUPP;
-
- MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi,
- ntohl(ib_spec->esp.mask.spi));
- MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi,
- ntohl(ib_spec->esp.val.spi));
- break;
- case IB_FLOW_SPEC_TCP:
- if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
- LAST_TCP_UDP_FIELD))
- return -EOPNOTSUPP;
-
- if (set_proto(headers_c, headers_v, 0xff, IPPROTO_TCP))
- return -EINVAL;
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport,
- ntohs(ib_spec->tcp_udp.mask.src_port));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport,
- ntohs(ib_spec->tcp_udp.val.src_port));
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport,
- ntohs(ib_spec->tcp_udp.mask.dst_port));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport,
- ntohs(ib_spec->tcp_udp.val.dst_port));
- break;
- case IB_FLOW_SPEC_UDP:
- if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
- LAST_TCP_UDP_FIELD))
- return -EOPNOTSUPP;
-
- if (set_proto(headers_c, headers_v, 0xff, IPPROTO_UDP))
- return -EINVAL;
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
- ntohs(ib_spec->tcp_udp.mask.src_port));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
- ntohs(ib_spec->tcp_udp.val.src_port));
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport,
- ntohs(ib_spec->tcp_udp.mask.dst_port));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
- ntohs(ib_spec->tcp_udp.val.dst_port));
- break;
- case IB_FLOW_SPEC_GRE:
- if (ib_spec->gre.mask.c_ks_res0_ver)
- return -EOPNOTSUPP;
-
- if (set_proto(headers_c, headers_v, 0xff, IPPROTO_GRE))
- return -EINVAL;
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
- 0xff);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
- IPPROTO_GRE);
-
- MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol,
- ntohs(ib_spec->gre.mask.protocol));
- MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol,
- ntohs(ib_spec->gre.val.protocol));
-
- memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c,
- gre_key.nvgre.hi),
- &ib_spec->gre.mask.key,
- sizeof(ib_spec->gre.mask.key));
- memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v,
- gre_key.nvgre.hi),
- &ib_spec->gre.val.key,
- sizeof(ib_spec->gre.val.key));
- break;
- case IB_FLOW_SPEC_MPLS:
- switch (prev_type) {
- case IB_FLOW_SPEC_UDP:
- if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
- ft_field_support.outer_first_mpls_over_udp),
- &ib_spec->mpls.mask.tag))
- return -EOPNOTSUPP;
-
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
- outer_first_mpls_over_udp),
- &ib_spec->mpls.val.tag,
- sizeof(ib_spec->mpls.val.tag));
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
- outer_first_mpls_over_udp),
- &ib_spec->mpls.mask.tag,
- sizeof(ib_spec->mpls.mask.tag));
- break;
- case IB_FLOW_SPEC_GRE:
- if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
- ft_field_support.outer_first_mpls_over_gre),
- &ib_spec->mpls.mask.tag))
- return -EOPNOTSUPP;
-
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
- outer_first_mpls_over_gre),
- &ib_spec->mpls.val.tag,
- sizeof(ib_spec->mpls.val.tag));
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
- outer_first_mpls_over_gre),
- &ib_spec->mpls.mask.tag,
- sizeof(ib_spec->mpls.mask.tag));
- break;
- default:
- if (ib_spec->type & IB_FLOW_SPEC_INNER) {
- if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
- ft_field_support.inner_first_mpls),
- &ib_spec->mpls.mask.tag))
- return -EOPNOTSUPP;
-
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
- inner_first_mpls),
- &ib_spec->mpls.val.tag,
- sizeof(ib_spec->mpls.val.tag));
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
- inner_first_mpls),
- &ib_spec->mpls.mask.tag,
- sizeof(ib_spec->mpls.mask.tag));
- } else {
- if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
- ft_field_support.outer_first_mpls),
- &ib_spec->mpls.mask.tag))
- return -EOPNOTSUPP;
-
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
- outer_first_mpls),
- &ib_spec->mpls.val.tag,
- sizeof(ib_spec->mpls.val.tag));
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
- outer_first_mpls),
- &ib_spec->mpls.mask.tag,
- sizeof(ib_spec->mpls.mask.tag));
- }
- }
- break;
- case IB_FLOW_SPEC_VXLAN_TUNNEL:
- if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask,
- LAST_TUNNEL_FIELD))
- return -EOPNOTSUPP;
-
- MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni,
- ntohl(ib_spec->tunnel.mask.tunnel_id));
- MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni,
- ntohl(ib_spec->tunnel.val.tunnel_id));
- break;
- case IB_FLOW_SPEC_ACTION_TAG:
- if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag,
- LAST_FLOW_TAG_FIELD))
- return -EOPNOTSUPP;
- if (ib_spec->flow_tag.tag_id >= BIT(24))
- return -EINVAL;
-
- flow_context->flow_tag = ib_spec->flow_tag.tag_id;
- flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
- break;
- case IB_FLOW_SPEC_ACTION_DROP:
- if (FIELDS_NOT_SUPPORTED(ib_spec->drop,
- LAST_DROP_FIELD))
- return -EOPNOTSUPP;
- action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
- break;
- case IB_FLOW_SPEC_ACTION_HANDLE:
- ret = parse_flow_flow_action(to_mflow_act(ib_spec->action.act),
- flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS, action);
- if (ret)
- return ret;
- break;
- case IB_FLOW_SPEC_ACTION_COUNT:
- if (FIELDS_NOT_SUPPORTED(ib_spec->flow_count,
- LAST_COUNTERS_FIELD))
- return -EOPNOTSUPP;
-
- /* for now support only one counters spec per flow */
- if (action->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
- return -EINVAL;
-
- action->counters = ib_spec->flow_count.counters;
- action->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-/* If a flow could catch both multicast and unicast packets,
- * it won't fall into the multicast flow steering table and this rule
- * could steal other multicast packets.
- */
-static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr)
-{
- union ib_flow_spec *flow_spec;
-
- if (ib_attr->type != IB_FLOW_ATTR_NORMAL ||
- ib_attr->num_of_specs < 1)
- return false;
-
- flow_spec = (union ib_flow_spec *)(ib_attr + 1);
- if (flow_spec->type == IB_FLOW_SPEC_IPV4) {
- struct ib_flow_spec_ipv4 *ipv4_spec;
-
- ipv4_spec = (struct ib_flow_spec_ipv4 *)flow_spec;
- if (ipv4_is_multicast(ipv4_spec->val.dst_ip))
- return true;
-
- return false;
- }
-
- if (flow_spec->type == IB_FLOW_SPEC_ETH) {
- struct ib_flow_spec_eth *eth_spec;
-
- eth_spec = (struct ib_flow_spec_eth *)flow_spec;
- return is_multicast_ether_addr(eth_spec->mask.dst_mac) &&
- is_multicast_ether_addr(eth_spec->val.dst_mac);
- }
-
- return false;
-}
-
-enum valid_spec {
- VALID_SPEC_INVALID,
- VALID_SPEC_VALID,
- VALID_SPEC_NA,
-};
-
-static enum valid_spec
-is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev,
- const struct mlx5_flow_spec *spec,
- const struct mlx5_flow_act *flow_act,
- bool egress)
-{
- const u32 *match_c = spec->match_criteria;
- bool is_crypto =
- (flow_act->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
- MLX5_FLOW_CONTEXT_ACTION_DECRYPT));
- bool is_ipsec = mlx5_fs_is_ipsec_flow(match_c);
- bool is_drop = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_DROP;
-
- /*
- * Currently only crypto is supported in egress, when regular egress
- * rules would be supported, always return VALID_SPEC_NA.
- */
- if (!is_crypto)
- return VALID_SPEC_NA;
-
- return is_crypto && is_ipsec &&
- (!egress || (!is_drop &&
- !(spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG))) ?
- VALID_SPEC_VALID : VALID_SPEC_INVALID;
-}
-
-static bool is_valid_spec(struct mlx5_core_dev *mdev,
- const struct mlx5_flow_spec *spec,
- const struct mlx5_flow_act *flow_act,
- bool egress)
-{
- /* We curretly only support ipsec egress flow */
- return is_valid_esp_aes_gcm(mdev, spec, flow_act, egress) != VALID_SPEC_INVALID;
-}
-
-static bool is_valid_ethertype(struct mlx5_core_dev *mdev,
- const struct ib_flow_attr *flow_attr,
- bool check_inner)
-{
- union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1);
- int match_ipv = check_inner ?
- MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
- ft_field_support.inner_ip_version) :
- MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
- ft_field_support.outer_ip_version);
- int inner_bit = check_inner ? IB_FLOW_SPEC_INNER : 0;
- bool ipv4_spec_valid, ipv6_spec_valid;
- unsigned int ip_spec_type = 0;
- bool has_ethertype = false;
- unsigned int spec_index;
- bool mask_valid = true;
- u16 eth_type = 0;
- bool type_valid;
-
- /* Validate that ethertype is correct */
- for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
- if ((ib_spec->type == (IB_FLOW_SPEC_ETH | inner_bit)) &&
- ib_spec->eth.mask.ether_type) {
- mask_valid = (ib_spec->eth.mask.ether_type ==
- htons(0xffff));
- has_ethertype = true;
- eth_type = ntohs(ib_spec->eth.val.ether_type);
- } else if ((ib_spec->type == (IB_FLOW_SPEC_IPV4 | inner_bit)) ||
- (ib_spec->type == (IB_FLOW_SPEC_IPV6 | inner_bit))) {
- ip_spec_type = ib_spec->type;
- }
- ib_spec = (void *)ib_spec + ib_spec->size;
- }
-
- type_valid = (!has_ethertype) || (!ip_spec_type);
- if (!type_valid && mask_valid) {
- ipv4_spec_valid = (eth_type == ETH_P_IP) &&
- (ip_spec_type == (IB_FLOW_SPEC_IPV4 | inner_bit));
- ipv6_spec_valid = (eth_type == ETH_P_IPV6) &&
- (ip_spec_type == (IB_FLOW_SPEC_IPV6 | inner_bit));
-
- type_valid = (ipv4_spec_valid) || (ipv6_spec_valid) ||
- (((eth_type == ETH_P_MPLS_UC) ||
- (eth_type == ETH_P_MPLS_MC)) && match_ipv);
- }
-
- return type_valid;
-}
-
-static bool is_valid_attr(struct mlx5_core_dev *mdev,
- const struct ib_flow_attr *flow_attr)
-{
- return is_valid_ethertype(mdev, flow_attr, false) &&
- is_valid_ethertype(mdev, flow_attr, true);
-}
-
-static void put_flow_table(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_prio *prio, bool ft_added)
-{
- prio->refcount -= !!ft_added;
- if (!prio->refcount) {
- mlx5_destroy_flow_table(prio->flow_table);
- prio->flow_table = NULL;
- }
-}
-
-static void counters_clear_description(struct ib_counters *counters)
-{
- struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
-
- mutex_lock(&mcounters->mcntrs_mutex);
- kfree(mcounters->counters_data);
- mcounters->counters_data = NULL;
- mcounters->cntrs_max_index = 0;
- mutex_unlock(&mcounters->mcntrs_mutex);
-}
-
-static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
-{
- struct mlx5_ib_flow_handler *handler = container_of(flow_id,
- struct mlx5_ib_flow_handler,
- ibflow);
- struct mlx5_ib_flow_handler *iter, *tmp;
- struct mlx5_ib_dev *dev = handler->dev;
-
- mutex_lock(&dev->flow_db->lock);
-
- list_for_each_entry_safe(iter, tmp, &handler->list, list) {
- mlx5_del_flow_rules(iter->rule);
- put_flow_table(dev, iter->prio, true);
- list_del(&iter->list);
- kfree(iter);
- }
-
- mlx5_del_flow_rules(handler->rule);
- put_flow_table(dev, handler->prio, true);
- if (handler->ibcounters &&
- atomic_read(&handler->ibcounters->usecnt) == 1)
- counters_clear_description(handler->ibcounters);
-
- mutex_unlock(&dev->flow_db->lock);
- if (handler->flow_matcher)
- atomic_dec(&handler->flow_matcher->usecnt);
- kfree(handler);
-
- return 0;
-}
-
-static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap)
-{
- priority *= 2;
- if (!dont_trap)
- priority++;
- return priority;
-}
-
-enum flow_table_type {
- MLX5_IB_FT_RX,
- MLX5_IB_FT_TX
-};
-
-#define MLX5_FS_MAX_TYPES 6
-#define MLX5_FS_MAX_ENTRIES BIT(16)
-
-static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns,
- struct mlx5_ib_flow_prio *prio,
- int priority,
- int num_entries, int num_groups,
- u32 flags)
-{
- struct mlx5_flow_table_attr ft_attr = {};
- struct mlx5_flow_table *ft;
-
- ft_attr.prio = priority;
- ft_attr.max_fte = num_entries;
- ft_attr.flags = flags;
- ft_attr.autogroup.max_num_groups = num_groups;
- ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
- if (IS_ERR(ft))
- return ERR_CAST(ft);
-
- prio->flow_table = ft;
- prio->refcount = 0;
- return prio;
-}
-
-static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
- struct ib_flow_attr *flow_attr,
- enum flow_table_type ft_type)
-{
- bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP;
- struct mlx5_flow_namespace *ns = NULL;
- struct mlx5_ib_flow_prio *prio;
- struct mlx5_flow_table *ft;
- int max_table_size;
- int num_entries;
- int num_groups;
- bool esw_encap;
- u32 flags = 0;
- int priority;
-
- max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
- log_max_ft_size));
- esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
- DEVLINK_ESWITCH_ENCAP_MODE_NONE;
- if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
- enum mlx5_flow_namespace_type fn_type;
-
- if (flow_is_multicast_only(flow_attr) &&
- !dont_trap)
- priority = MLX5_IB_FLOW_MCAST_PRIO;
- else
- priority = ib_prio_to_core_prio(flow_attr->priority,
- dont_trap);
- if (ft_type == MLX5_IB_FT_RX) {
- fn_type = MLX5_FLOW_NAMESPACE_BYPASS;
- prio = &dev->flow_db->prios[priority];
- if (!dev->is_rep && !esw_encap &&
- MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap))
- flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
- if (!dev->is_rep && !esw_encap &&
- MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
- reformat_l3_tunnel_to_l2))
- flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
- } else {
- max_table_size =
- BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev,
- log_max_ft_size));
- fn_type = MLX5_FLOW_NAMESPACE_EGRESS;
- prio = &dev->flow_db->egress_prios[priority];
- if (!dev->is_rep && !esw_encap &&
- MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat))
- flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
- }
- ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
- num_entries = MLX5_FS_MAX_ENTRIES;
- num_groups = MLX5_FS_MAX_TYPES;
- } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
- flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
- ns = mlx5_get_flow_namespace(dev->mdev,
- MLX5_FLOW_NAMESPACE_LEFTOVERS);
- build_leftovers_ft_param(&priority,
- &num_entries,
- &num_groups);
- prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
- } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
- if (!MLX5_CAP_FLOWTABLE(dev->mdev,
- allow_sniffer_and_nic_rx_shared_tir))
- return ERR_PTR(-ENOTSUPP);
-
- ns = mlx5_get_flow_namespace(dev->mdev, ft_type == MLX5_IB_FT_RX ?
- MLX5_FLOW_NAMESPACE_SNIFFER_RX :
- MLX5_FLOW_NAMESPACE_SNIFFER_TX);
-
- prio = &dev->flow_db->sniffer[ft_type];
- priority = 0;
- num_entries = 1;
- num_groups = 1;
- }
-
- if (!ns)
- return ERR_PTR(-ENOTSUPP);
-
- max_table_size = min_t(int, num_entries, max_table_size);
-
- ft = prio->flow_table;
- if (!ft)
- return _get_prio(ns, prio, priority, max_table_size, num_groups,
- flags);
-
- return prio;
-}
-
-static void set_underlay_qp(struct mlx5_ib_dev *dev,
- struct mlx5_flow_spec *spec,
- u32 underlay_qpn)
-{
- void *misc_params_c = MLX5_ADDR_OF(fte_match_param,
- spec->match_criteria,
- misc_parameters);
- void *misc_params_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
- misc_parameters);
-
- if (underlay_qpn &&
- MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
- ft_field_support.bth_dst_qp)) {
- MLX5_SET(fte_match_set_misc,
- misc_params_v, bth_dst_qp, underlay_qpn);
- MLX5_SET(fte_match_set_misc,
- misc_params_c, bth_dst_qp, 0xffffff);
- }
-}
-
-static int read_flow_counters(struct ib_device *ibdev,
- struct mlx5_read_counters_attr *read_attr)
-{
- struct mlx5_fc *fc = read_attr->hw_cntrs_hndl;
- struct mlx5_ib_dev *dev = to_mdev(ibdev);
-
- return mlx5_fc_query(dev->mdev, fc,
- &read_attr->out[IB_COUNTER_PACKETS],
- &read_attr->out[IB_COUNTER_BYTES]);
-}
-
-/* flow counters currently expose two counters packets and bytes */
-#define FLOW_COUNTERS_NUM 2
-static int counters_set_description(struct ib_counters *counters,
- enum mlx5_ib_counters_type counters_type,
- struct mlx5_ib_flow_counters_desc *desc_data,
- u32 ncounters)
-{
- struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
- u32 cntrs_max_index = 0;
- int i;
-
- if (counters_type != MLX5_IB_COUNTERS_FLOW)
- return -EINVAL;
-
- /* init the fields for the object */
- mcounters->type = counters_type;
- mcounters->read_counters = read_flow_counters;
- mcounters->counters_num = FLOW_COUNTERS_NUM;
- mcounters->ncounters = ncounters;
- /* each counter entry have both description and index pair */
- for (i = 0; i < ncounters; i++) {
- if (desc_data[i].description > IB_COUNTER_BYTES)
- return -EINVAL;
-
- if (cntrs_max_index <= desc_data[i].index)
- cntrs_max_index = desc_data[i].index + 1;
- }
-
- mutex_lock(&mcounters->mcntrs_mutex);
- mcounters->counters_data = desc_data;
- mcounters->cntrs_max_index = cntrs_max_index;
- mutex_unlock(&mcounters->mcntrs_mutex);
-
- return 0;
-}
-
-#define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2))
-static int flow_counters_set_data(struct ib_counters *ibcounters,
- struct mlx5_ib_create_flow *ucmd)
-{
- struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters);
- struct mlx5_ib_flow_counters_data *cntrs_data = NULL;
- struct mlx5_ib_flow_counters_desc *desc_data = NULL;
- bool hw_hndl = false;
- int ret = 0;
-
- if (ucmd && ucmd->ncounters_data != 0) {
- cntrs_data = ucmd->data;
- if (cntrs_data->ncounters > MAX_COUNTERS_NUM)
- return -EINVAL;
-
- desc_data = kcalloc(cntrs_data->ncounters,
- sizeof(*desc_data),
- GFP_KERNEL);
- if (!desc_data)
- return -ENOMEM;
-
- if (copy_from_user(desc_data,
- u64_to_user_ptr(cntrs_data->counters_data),
- sizeof(*desc_data) * cntrs_data->ncounters)) {
- ret = -EFAULT;
- goto free;
- }
- }
-
- if (!mcounters->hw_cntrs_hndl) {
- mcounters->hw_cntrs_hndl = mlx5_fc_create(
- to_mdev(ibcounters->device)->mdev, false);
- if (IS_ERR(mcounters->hw_cntrs_hndl)) {
- ret = PTR_ERR(mcounters->hw_cntrs_hndl);
- goto free;
- }
- hw_hndl = true;
- }
-
- if (desc_data) {
- /* counters already bound to at least one flow */
- if (mcounters->cntrs_max_index) {
- ret = -EINVAL;
- goto free_hndl;
- }
-
- ret = counters_set_description(ibcounters,
- MLX5_IB_COUNTERS_FLOW,
- desc_data,
- cntrs_data->ncounters);
- if (ret)
- goto free_hndl;
-
- } else if (!mcounters->cntrs_max_index) {
- /* counters not bound yet, must have udata passed */
- ret = -EINVAL;
- goto free_hndl;
- }
-
- return 0;
-
-free_hndl:
- if (hw_hndl) {
- mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev,
- mcounters->hw_cntrs_hndl);
- mcounters->hw_cntrs_hndl = NULL;
- }
-free:
- kfree(desc_data);
- return ret;
-}
-
-static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev,
- struct mlx5_flow_spec *spec,
- struct mlx5_eswitch_rep *rep)
-{
- struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
- void *misc;
-
- if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
- misc_parameters_2);
-
- MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
- mlx5_eswitch_get_vport_metadata_for_match(esw,
- rep->vport));
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
- misc_parameters_2);
-
- MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
- mlx5_eswitch_get_vport_metadata_mask());
- } else {
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
- misc_parameters);
-
- MLX5_SET(fte_match_set_misc, misc, source_port, rep->vport);
-
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
- misc_parameters);
-
- MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
- }
-}
-
-static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_prio *ft_prio,
- const struct ib_flow_attr *flow_attr,
- struct mlx5_flow_destination *dst,
- u32 underlay_qpn,
- struct mlx5_ib_create_flow *ucmd)
-{
- struct mlx5_flow_table *ft = ft_prio->flow_table;
- struct mlx5_ib_flow_handler *handler;
- struct mlx5_flow_act flow_act = {};
- struct mlx5_flow_spec *spec;
- struct mlx5_flow_destination dest_arr[2] = {};
- struct mlx5_flow_destination *rule_dst = dest_arr;
- const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
- unsigned int spec_index;
- u32 prev_type = 0;
- int err = 0;
- int dest_num = 0;
- bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
-
- if (!is_valid_attr(dev->mdev, flow_attr))
- return ERR_PTR(-EINVAL);
-
- if (dev->is_rep && is_egress)
- return ERR_PTR(-EINVAL);
-
- spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
- handler = kzalloc(sizeof(*handler), GFP_KERNEL);
- if (!handler || !spec) {
- err = -ENOMEM;
- goto free;
- }
-
- INIT_LIST_HEAD(&handler->list);
-
- for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
- err = parse_flow_attr(dev->mdev, spec,
- ib_flow, flow_attr, &flow_act,
- prev_type);
- if (err < 0)
- goto free;
-
- prev_type = ((union ib_flow_spec *)ib_flow)->type;
- ib_flow += ((union ib_flow_spec *)ib_flow)->size;
- }
-
- if (dst && !(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP)) {
- memcpy(&dest_arr[0], dst, sizeof(*dst));
- dest_num++;
- }
-
- if (!flow_is_multicast_only(flow_attr))
- set_underlay_qp(dev, spec, underlay_qpn);
-
- if (dev->is_rep) {
- struct mlx5_eswitch_rep *rep;
-
- rep = dev->port[flow_attr->port - 1].rep;
- if (!rep) {
- err = -EINVAL;
- goto free;
- }
-
- mlx5_ib_set_rule_source_port(dev, spec, rep);
- }
-
- spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
-
- if (is_egress &&
- !is_valid_spec(dev->mdev, spec, &flow_act, is_egress)) {
- err = -EINVAL;
- goto free;
- }
-
- if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
- struct mlx5_ib_mcounters *mcounters;
-
- err = flow_counters_set_data(flow_act.counters, ucmd);
- if (err)
- goto free;
-
- mcounters = to_mcounters(flow_act.counters);
- handler->ibcounters = flow_act.counters;
- dest_arr[dest_num].type =
- MLX5_FLOW_DESTINATION_TYPE_COUNTER;
- dest_arr[dest_num].counter_id =
- mlx5_fc_id(mcounters->hw_cntrs_hndl);
- dest_num++;
- }
-
- if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) {
- if (!dest_num)
- rule_dst = NULL;
- } else {
- if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)
- flow_act.action |=
- MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
- if (is_egress)
- flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
- else if (dest_num)
- flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- }
-
- if ((spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG) &&
- (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
- flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
- mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
- spec->flow_context.flow_tag, flow_attr->type);
- err = -EINVAL;
- goto free;
- }
- handler->rule = mlx5_add_flow_rules(ft, spec,
- &flow_act,
- rule_dst, dest_num);
-
- if (IS_ERR(handler->rule)) {
- err = PTR_ERR(handler->rule);
- goto free;
- }
-
- ft_prio->refcount++;
- handler->prio = ft_prio;
- handler->dev = dev;
-
- ft_prio->flow_table = ft;
-free:
- if (err && handler) {
- if (handler->ibcounters &&
- atomic_read(&handler->ibcounters->usecnt) == 1)
- counters_clear_description(handler->ibcounters);
- kfree(handler);
- }
- kvfree(spec);
- return err ? ERR_PTR(err) : handler;
-}
-
-static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_prio *ft_prio,
- const struct ib_flow_attr *flow_attr,
- struct mlx5_flow_destination *dst)
-{
- return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL);
-}
-
-enum {
- LEFTOVERS_MC,
- LEFTOVERS_UC,
-};
-
-static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_prio *ft_prio,
- struct ib_flow_attr *flow_attr,
- struct mlx5_flow_destination *dst)
-{
- struct mlx5_ib_flow_handler *handler_ucast = NULL;
- struct mlx5_ib_flow_handler *handler = NULL;
-
- static struct {
- struct ib_flow_attr flow_attr;
- struct ib_flow_spec_eth eth_flow;
- } leftovers_specs[] = {
- [LEFTOVERS_MC] = {
- .flow_attr = {
- .num_of_specs = 1,
- .size = sizeof(leftovers_specs[0])
- },
- .eth_flow = {
- .type = IB_FLOW_SPEC_ETH,
- .size = sizeof(struct ib_flow_spec_eth),
- .mask = {.dst_mac = {0x1} },
- .val = {.dst_mac = {0x1} }
- }
- },
- [LEFTOVERS_UC] = {
- .flow_attr = {
- .num_of_specs = 1,
- .size = sizeof(leftovers_specs[0])
- },
- .eth_flow = {
- .type = IB_FLOW_SPEC_ETH,
- .size = sizeof(struct ib_flow_spec_eth),
- .mask = {.dst_mac = {0x1} },
- .val = {.dst_mac = {} }
- }
- }
- };
-
- handler = create_flow_rule(dev, ft_prio,
- &leftovers_specs[LEFTOVERS_MC].flow_attr,
- dst);
- if (!IS_ERR(handler) &&
- flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) {
- handler_ucast = create_flow_rule(dev, ft_prio,
- &leftovers_specs[LEFTOVERS_UC].flow_attr,
- dst);
- if (IS_ERR(handler_ucast)) {
- mlx5_del_flow_rules(handler->rule);
- ft_prio->refcount--;
- kfree(handler);
- handler = handler_ucast;
- } else {
- list_add(&handler_ucast->list, &handler->list);
- }
- }
-
- return handler;
-}
-
-static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_prio *ft_rx,
- struct mlx5_ib_flow_prio *ft_tx,
- struct mlx5_flow_destination *dst)
-{
- struct mlx5_ib_flow_handler *handler_rx;
- struct mlx5_ib_flow_handler *handler_tx;
- int err;
- static const struct ib_flow_attr flow_attr = {
- .num_of_specs = 0,
- .size = sizeof(flow_attr)
- };
-
- handler_rx = create_flow_rule(dev, ft_rx, &flow_attr, dst);
- if (IS_ERR(handler_rx)) {
- err = PTR_ERR(handler_rx);
- goto err;
- }
-
- handler_tx = create_flow_rule(dev, ft_tx, &flow_attr, dst);
- if (IS_ERR(handler_tx)) {
- err = PTR_ERR(handler_tx);
- goto err_tx;
- }
-
- list_add(&handler_tx->list, &handler_rx->list);
-
- return handler_rx;
-
-err_tx:
- mlx5_del_flow_rules(handler_rx->rule);
- ft_rx->refcount--;
- kfree(handler_rx);
-err:
- return ERR_PTR(err);
-}
-
-static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
- struct ib_flow_attr *flow_attr,
- int domain,
- struct ib_udata *udata)
-{
- struct mlx5_ib_dev *dev = to_mdev(qp->device);
- struct mlx5_ib_qp *mqp = to_mqp(qp);
- struct mlx5_ib_flow_handler *handler = NULL;
- struct mlx5_flow_destination *dst = NULL;
- struct mlx5_ib_flow_prio *ft_prio_tx = NULL;
- struct mlx5_ib_flow_prio *ft_prio;
- bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
- struct mlx5_ib_create_flow *ucmd = NULL, ucmd_hdr;
- size_t min_ucmd_sz, required_ucmd_sz;
- int err;
- int underlay_qpn;
-
- if (udata && udata->inlen) {
- min_ucmd_sz = offsetof(typeof(ucmd_hdr), reserved) +
- sizeof(ucmd_hdr.reserved);
- if (udata->inlen < min_ucmd_sz)
- return ERR_PTR(-EOPNOTSUPP);
-
- err = ib_copy_from_udata(&ucmd_hdr, udata, min_ucmd_sz);
- if (err)
- return ERR_PTR(err);
-
- /* currently supports only one counters data */
- if (ucmd_hdr.ncounters_data > 1)
- return ERR_PTR(-EINVAL);
-
- required_ucmd_sz = min_ucmd_sz +
- sizeof(struct mlx5_ib_flow_counters_data) *
- ucmd_hdr.ncounters_data;
- if (udata->inlen > required_ucmd_sz &&
- !ib_is_udata_cleared(udata, required_ucmd_sz,
- udata->inlen - required_ucmd_sz))
- return ERR_PTR(-EOPNOTSUPP);
-
- ucmd = kzalloc(required_ucmd_sz, GFP_KERNEL);
- if (!ucmd)
- return ERR_PTR(-ENOMEM);
-
- err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz);
- if (err)
- goto free_ucmd;
- }
-
- if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) {
- err = -ENOMEM;
- goto free_ucmd;
- }
-
- if (domain != IB_FLOW_DOMAIN_USER ||
- flow_attr->port > dev->num_ports ||
- (flow_attr->flags & ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP |
- IB_FLOW_ATTR_FLAGS_EGRESS))) {
- err = -EINVAL;
- goto free_ucmd;
- }
-
- if (is_egress &&
- (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
- flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
- err = -EINVAL;
- goto free_ucmd;
- }
-
- dst = kzalloc(sizeof(*dst), GFP_KERNEL);
- if (!dst) {
- err = -ENOMEM;
- goto free_ucmd;
- }
-
- mutex_lock(&dev->flow_db->lock);
-
- ft_prio = get_flow_table(dev, flow_attr,
- is_egress ? MLX5_IB_FT_TX : MLX5_IB_FT_RX);
- if (IS_ERR(ft_prio)) {
- err = PTR_ERR(ft_prio);
- goto unlock;
- }
- if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
- ft_prio_tx = get_flow_table(dev, flow_attr, MLX5_IB_FT_TX);
- if (IS_ERR(ft_prio_tx)) {
- err = PTR_ERR(ft_prio_tx);
- ft_prio_tx = NULL;
- goto destroy_ft;
- }
- }
-
- if (is_egress) {
- dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT;
- } else {
- dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- if (mqp->is_rss)
- dst->tir_num = mqp->rss_qp.tirn;
- else
- dst->tir_num = mqp->raw_packet_qp.rq.tirn;
- }
-
- if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
- underlay_qpn = (mqp->flags & IB_QP_CREATE_SOURCE_QPN) ?
- mqp->underlay_qpn :
- 0;
- handler = _create_flow_rule(dev, ft_prio, flow_attr, dst,
- underlay_qpn, ucmd);
- } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
- flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
- handler = create_leftovers_rule(dev, ft_prio, flow_attr,
- dst);
- } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
- handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst);
- } else {
- err = -EINVAL;
- goto destroy_ft;
- }
-
- if (IS_ERR(handler)) {
- err = PTR_ERR(handler);
- handler = NULL;
- goto destroy_ft;
- }
-
- mutex_unlock(&dev->flow_db->lock);
- kfree(dst);
- kfree(ucmd);
-
- return &handler->ibflow;
-
-destroy_ft:
- put_flow_table(dev, ft_prio, false);
- if (ft_prio_tx)
- put_flow_table(dev, ft_prio_tx, false);
-unlock:
- mutex_unlock(&dev->flow_db->lock);
- kfree(dst);
-free_ucmd:
- kfree(ucmd);
- return ERR_PTR(err);
-}
-
-static struct mlx5_ib_flow_prio *
-_get_flow_table(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_matcher *fs_matcher,
- bool mcast)
-{
- struct mlx5_flow_namespace *ns = NULL;
- struct mlx5_ib_flow_prio *prio = NULL;
- int max_table_size = 0;
- bool esw_encap;
- u32 flags = 0;
- int priority;
-
- if (mcast)
- priority = MLX5_IB_FLOW_MCAST_PRIO;
- else
- priority = ib_prio_to_core_prio(fs_matcher->priority, false);
-
- esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
- DEVLINK_ESWITCH_ENCAP_MODE_NONE;
- if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) {
- max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
- log_max_ft_size));
- if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap) && !esw_encap)
- flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
- if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
- reformat_l3_tunnel_to_l2) &&
- !esw_encap)
- flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
- } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) {
- max_table_size = BIT(
- MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size));
- if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) && !esw_encap)
- flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
- } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) {
- max_table_size = BIT(
- MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size));
- if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, decap) && esw_encap)
- flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
- if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, reformat_l3_tunnel_to_l2) &&
- esw_encap)
- flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
- priority = FDB_BYPASS_PATH;
- } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) {
- max_table_size =
- BIT(MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
- log_max_ft_size));
- priority = fs_matcher->priority;
- } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) {
- max_table_size =
- BIT(MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
- log_max_ft_size));
- priority = fs_matcher->priority;
- }
-
- max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES);
-
- ns = mlx5_get_flow_namespace(dev->mdev, fs_matcher->ns_type);
- if (!ns)
- return ERR_PTR(-ENOTSUPP);
-
- if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS)
- prio = &dev->flow_db->prios[priority];
- else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS)
- prio = &dev->flow_db->egress_prios[priority];
- else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB)
- prio = &dev->flow_db->fdb;
- else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX)
- prio = &dev->flow_db->rdma_rx[priority];
- else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX)
- prio = &dev->flow_db->rdma_tx[priority];
-
- if (!prio)
- return ERR_PTR(-EINVAL);
-
- if (prio->flow_table)
- return prio;
-
- return _get_prio(ns, prio, priority, max_table_size,
- MLX5_FS_MAX_TYPES, flags);
-}
-
-static struct mlx5_ib_flow_handler *
-_create_raw_flow_rule(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_prio *ft_prio,
- struct mlx5_flow_destination *dst,
- struct mlx5_ib_flow_matcher *fs_matcher,
- struct mlx5_flow_context *flow_context,
- struct mlx5_flow_act *flow_act,
- void *cmd_in, int inlen,
- int dst_num)
-{
- struct mlx5_ib_flow_handler *handler;
- struct mlx5_flow_spec *spec;
- struct mlx5_flow_table *ft = ft_prio->flow_table;
- int err = 0;
-
- spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
- handler = kzalloc(sizeof(*handler), GFP_KERNEL);
- if (!handler || !spec) {
- err = -ENOMEM;
- goto free;
- }
-
- INIT_LIST_HEAD(&handler->list);
-
- memcpy(spec->match_value, cmd_in, inlen);
- memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params,
- fs_matcher->mask_len);
- spec->match_criteria_enable = fs_matcher->match_criteria_enable;
- spec->flow_context = *flow_context;
-
- handler->rule = mlx5_add_flow_rules(ft, spec,
- flow_act, dst, dst_num);
-
- if (IS_ERR(handler->rule)) {
- err = PTR_ERR(handler->rule);
- goto free;
- }
-
- ft_prio->refcount++;
- handler->prio = ft_prio;
- handler->dev = dev;
- ft_prio->flow_table = ft;
-
-free:
- if (err)
- kfree(handler);
- kvfree(spec);
- return err ? ERR_PTR(err) : handler;
-}
-
-static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher,
- void *match_v)
-{
- void *match_c;
- void *match_v_set_lyr_2_4, *match_c_set_lyr_2_4;
- void *dmac, *dmac_mask;
- void *ipv4, *ipv4_mask;
-
- if (!(fs_matcher->match_criteria_enable &
- (1 << MATCH_CRITERIA_ENABLE_OUTER_BIT)))
- return false;
-
- match_c = fs_matcher->matcher_mask.match_params;
- match_v_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_v,
- outer_headers);
- match_c_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_c,
- outer_headers);
-
- dmac = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
- dmac_47_16);
- dmac_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
- dmac_47_16);
-
- if (is_multicast_ether_addr(dmac) &&
- is_multicast_ether_addr(dmac_mask))
- return true;
-
- ipv4 = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
- dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
-
- ipv4_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
- dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
-
- if (ipv4_is_multicast(*(__be32 *)(ipv4)) &&
- ipv4_is_multicast(*(__be32 *)(ipv4_mask)))
- return true;
-
- return false;
-}
-
-struct mlx5_ib_flow_handler *
-mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_matcher *fs_matcher,
- struct mlx5_flow_context *flow_context,
- struct mlx5_flow_act *flow_act,
- u32 counter_id,
- void *cmd_in, int inlen, int dest_id,
- int dest_type)
-{
- struct mlx5_flow_destination *dst;
- struct mlx5_ib_flow_prio *ft_prio;
- struct mlx5_ib_flow_handler *handler;
- int dst_num = 0;
- bool mcast;
- int err;
-
- if (fs_matcher->flow_type != MLX5_IB_FLOW_TYPE_NORMAL)
- return ERR_PTR(-EOPNOTSUPP);
-
- if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO)
- return ERR_PTR(-ENOMEM);
-
- dst = kcalloc(2, sizeof(*dst), GFP_KERNEL);
- if (!dst)
- return ERR_PTR(-ENOMEM);
-
- mcast = raw_fs_is_multicast(fs_matcher, cmd_in);
- mutex_lock(&dev->flow_db->lock);
-
- ft_prio = _get_flow_table(dev, fs_matcher, mcast);
- if (IS_ERR(ft_prio)) {
- err = PTR_ERR(ft_prio);
- goto unlock;
- }
-
- if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) {
- dst[dst_num].type = dest_type;
- dst[dst_num++].tir_num = dest_id;
- flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) {
- dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
- dst[dst_num++].ft_num = dest_id;
- flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_PORT) {
- dst[dst_num++].type = MLX5_FLOW_DESTINATION_TYPE_PORT;
- flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
- }
-
-
- if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
- dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
- dst[dst_num].counter_id = counter_id;
- dst_num++;
- }
-
- handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher,
- flow_context, flow_act,
- cmd_in, inlen, dst_num);
-
- if (IS_ERR(handler)) {
- err = PTR_ERR(handler);
- goto destroy_ft;
- }
-
- mutex_unlock(&dev->flow_db->lock);
- atomic_inc(&fs_matcher->usecnt);
- handler->flow_matcher = fs_matcher;
-
- kfree(dst);
-
- return handler;
-
-destroy_ft:
- put_flow_table(dev, ft_prio, false);
-unlock:
- mutex_unlock(&dev->flow_db->lock);
- kfree(dst);
-
- return ERR_PTR(err);
-}
-
-static u32 mlx5_ib_flow_action_flags_to_accel_xfrm_flags(u32 mlx5_flags)
-{
- u32 flags = 0;
-
- if (mlx5_flags & MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA)
- flags |= MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA;
-
- return flags;
-}
-
-#define MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA
-static struct ib_flow_action *
-mlx5_ib_create_flow_action_esp(struct ib_device *device,
- const struct ib_flow_action_attrs_esp *attr,
- struct uverbs_attr_bundle *attrs)
-{
- struct mlx5_ib_dev *mdev = to_mdev(device);
- struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm;
- struct mlx5_accel_esp_xfrm_attrs accel_attrs = {};
- struct mlx5_ib_flow_action *action;
- u64 action_flags;
- u64 flags;
- int err = 0;
-
- err = uverbs_get_flags64(
- &action_flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
- ((MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1) - 1));
- if (err)
- return ERR_PTR(err);
-
- flags = mlx5_ib_flow_action_flags_to_accel_xfrm_flags(action_flags);
-
- /* We current only support a subset of the standard features. Only a
- * keymat of type AES_GCM, with icv_len == 16, iv_algo == SEQ and esn
- * (with overlap). Full offload mode isn't supported.
- */
- if (!attr->keymat || attr->replay || attr->encap ||
- attr->spi || attr->seq || attr->tfc_pad ||
- attr->hard_limit_pkts ||
- (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
- IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT)))
- return ERR_PTR(-EOPNOTSUPP);
-
- if (attr->keymat->protocol !=
- IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM)
- return ERR_PTR(-EOPNOTSUPP);
-
- aes_gcm = &attr->keymat->keymat.aes_gcm;
-
- if (aes_gcm->icv_len != 16 ||
- aes_gcm->iv_algo != IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ)
- return ERR_PTR(-EOPNOTSUPP);
-
- action = kmalloc(sizeof(*action), GFP_KERNEL);
- if (!action)
- return ERR_PTR(-ENOMEM);
-
- action->esp_aes_gcm.ib_flags = attr->flags;
- memcpy(&accel_attrs.keymat.aes_gcm.aes_key, &aes_gcm->aes_key,
- sizeof(accel_attrs.keymat.aes_gcm.aes_key));
- accel_attrs.keymat.aes_gcm.key_len = aes_gcm->key_len * 8;
- memcpy(&accel_attrs.keymat.aes_gcm.salt, &aes_gcm->salt,
- sizeof(accel_attrs.keymat.aes_gcm.salt));
- memcpy(&accel_attrs.keymat.aes_gcm.seq_iv, &aes_gcm->iv,
- sizeof(accel_attrs.keymat.aes_gcm.seq_iv));
- accel_attrs.keymat.aes_gcm.icv_len = aes_gcm->icv_len * 8;
- accel_attrs.keymat.aes_gcm.iv_algo = MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ;
- accel_attrs.keymat_type = MLX5_ACCEL_ESP_KEYMAT_AES_GCM;
-
- accel_attrs.esn = attr->esn;
- if (attr->flags & IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED)
- accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED;
- if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)
- accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
-
- if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT)
- accel_attrs.action |= MLX5_ACCEL_ESP_ACTION_ENCRYPT;
-
- action->esp_aes_gcm.ctx =
- mlx5_accel_esp_create_xfrm(mdev->mdev, &accel_attrs, flags);
- if (IS_ERR(action->esp_aes_gcm.ctx)) {
- err = PTR_ERR(action->esp_aes_gcm.ctx);
- goto err_parse;
- }
-
- action->esp_aes_gcm.ib_flags = attr->flags;
-
- return &action->ib_action;
-
-err_parse:
- kfree(action);
- return ERR_PTR(err);
-}
-
-static int
-mlx5_ib_modify_flow_action_esp(struct ib_flow_action *action,
- const struct ib_flow_action_attrs_esp *attr,
- struct uverbs_attr_bundle *attrs)
-{
- struct mlx5_ib_flow_action *maction = to_mflow_act(action);
- struct mlx5_accel_esp_xfrm_attrs accel_attrs;
- int err = 0;
-
- if (attr->keymat || attr->replay || attr->encap ||
- attr->spi || attr->seq || attr->tfc_pad ||
- attr->hard_limit_pkts ||
- (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
- IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS |
- IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)))
- return -EOPNOTSUPP;
-
- /* Only the ESN value or the MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP can
- * be modified.
- */
- if (!(maction->esp_aes_gcm.ib_flags &
- IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) &&
- attr->flags & (IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
- IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW))
- return -EINVAL;
-
- memcpy(&accel_attrs, &maction->esp_aes_gcm.ctx->attrs,
- sizeof(accel_attrs));
-
- accel_attrs.esn = attr->esn;
- if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)
- accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
- else
- accel_attrs.flags &= ~MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
-
- err = mlx5_accel_esp_modify_xfrm(maction->esp_aes_gcm.ctx,
- &accel_attrs);
- if (err)
- return err;
-
- maction->esp_aes_gcm.ib_flags &=
- ~IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW;
- maction->esp_aes_gcm.ib_flags |=
- attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW;
-
- return 0;
-}
-
-static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action)
-{
- struct mlx5_ib_flow_action *maction = to_mflow_act(action);
-
- switch (action->type) {
- case IB_FLOW_ACTION_ESP:
- /*
- * We only support aes_gcm by now, so we implicitly know this is
- * the underline crypto.
- */
- mlx5_accel_esp_destroy_xfrm(maction->esp_aes_gcm.ctx);
- break;
- case IB_FLOW_ACTION_UNSPECIFIED:
- mlx5_ib_destroy_flow_action_raw(maction);
- break;
- default:
- WARN_ON(true);
- break;
- }
-
- kfree(maction);
- return 0;
-}
-
static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
@@ -4848,137 +3020,6 @@ static int get_port_caps(struct mlx5_ib_dev *dev, u8 port)
return __get_port_caps(dev, port);
}
-static void destroy_umrc_res(struct mlx5_ib_dev *dev)
-{
- int err;
-
- err = mlx5_mr_cache_cleanup(dev);
- if (err)
- mlx5_ib_warn(dev, "mr cache cleanup failed\n");
-
- if (dev->umrc.qp)
- mlx5_ib_destroy_qp(dev->umrc.qp, NULL);
- if (dev->umrc.cq)
- ib_free_cq(dev->umrc.cq);
- if (dev->umrc.pd)
- ib_dealloc_pd(dev->umrc.pd);
-}
-
-enum {
- MAX_UMR_WR = 128,
-};
-
-static int create_umr_res(struct mlx5_ib_dev *dev)
-{
- struct ib_qp_init_attr *init_attr = NULL;
- struct ib_qp_attr *attr = NULL;
- struct ib_pd *pd;
- struct ib_cq *cq;
- struct ib_qp *qp;
- int ret;
-
- attr = kzalloc(sizeof(*attr), GFP_KERNEL);
- init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL);
- if (!attr || !init_attr) {
- ret = -ENOMEM;
- goto error_0;
- }
-
- pd = ib_alloc_pd(&dev->ib_dev, 0);
- if (IS_ERR(pd)) {
- mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
- ret = PTR_ERR(pd);
- goto error_0;
- }
-
- cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ);
- if (IS_ERR(cq)) {
- mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
- ret = PTR_ERR(cq);
- goto error_2;
- }
-
- init_attr->send_cq = cq;
- init_attr->recv_cq = cq;
- init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
- init_attr->cap.max_send_wr = MAX_UMR_WR;
- init_attr->cap.max_send_sge = 1;
- init_attr->qp_type = MLX5_IB_QPT_REG_UMR;
- init_attr->port_num = 1;
- qp = mlx5_ib_create_qp(pd, init_attr, NULL);
- if (IS_ERR(qp)) {
- mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
- ret = PTR_ERR(qp);
- goto error_3;
- }
- qp->device = &dev->ib_dev;
- qp->real_qp = qp;
- qp->uobject = NULL;
- qp->qp_type = MLX5_IB_QPT_REG_UMR;
- qp->send_cq = init_attr->send_cq;
- qp->recv_cq = init_attr->recv_cq;
-
- attr->qp_state = IB_QPS_INIT;
- attr->port_num = 1;
- ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX |
- IB_QP_PORT, NULL);
- if (ret) {
- mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
- goto error_4;
- }
-
- memset(attr, 0, sizeof(*attr));
- attr->qp_state = IB_QPS_RTR;
- attr->path_mtu = IB_MTU_256;
-
- ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
- if (ret) {
- mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n");
- goto error_4;
- }
-
- memset(attr, 0, sizeof(*attr));
- attr->qp_state = IB_QPS_RTS;
- ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
- if (ret) {
- mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n");
- goto error_4;
- }
-
- dev->umrc.qp = qp;
- dev->umrc.cq = cq;
- dev->umrc.pd = pd;
-
- sema_init(&dev->umrc.sem, MAX_UMR_WR);
- ret = mlx5_mr_cache_init(dev);
- if (ret) {
- mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
- goto error_4;
- }
-
- kfree(attr);
- kfree(init_attr);
-
- return 0;
-
-error_4:
- mlx5_ib_destroy_qp(qp, NULL);
- dev->umrc.qp = NULL;
-
-error_3:
- ib_free_cq(cq);
- dev->umrc.cq = NULL;
-
-error_2:
- ib_dealloc_pd(pd);
- dev->umrc.pd = NULL;
-
-error_0:
- kfree(attr);
- kfree(init_attr);
- return ret;
-}
-
static u8 mlx5_get_umr_fence(u8 umr_fence_cap)
{
switch (umr_fence_cap) {
@@ -4991,18 +3032,20 @@ static u8 mlx5_get_umr_fence(u8 umr_fence_cap)
}
}
-static int create_dev_resources(struct mlx5_ib_resources *devr)
+static int mlx5_ib_dev_res_init(struct mlx5_ib_dev *dev)
{
+ struct mlx5_ib_resources *devr = &dev->devr;
struct ib_srq_init_attr attr;
- struct mlx5_ib_dev *dev;
struct ib_device *ibdev;
struct ib_cq_init_attr cq_attr = {.cqe = 1};
int port;
int ret = 0;
- dev = container_of(devr, struct mlx5_ib_dev, devr);
ibdev = &dev->ib_dev;
+ if (!MLX5_CAP_GEN(dev->mdev, xrc))
+ return -EOPNOTSUPP;
+
mutex_init(&devr->mutex);
devr->p0 = rdma_zalloc_drv_obj(ibdev, ib_pd);
@@ -5030,34 +3073,19 @@ static int create_dev_resources(struct mlx5_ib_resources *devr)
if (ret)
goto err_create_cq;
- devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL);
- if (IS_ERR(devr->x0)) {
- ret = PTR_ERR(devr->x0);
+ ret = mlx5_cmd_xrcd_alloc(dev->mdev, &devr->xrcdn0, 0);
+ if (ret)
goto error2;
- }
- devr->x0->device = &dev->ib_dev;
- devr->x0->inode = NULL;
- atomic_set(&devr->x0->usecnt, 0);
- mutex_init(&devr->x0->tgt_qp_mutex);
- INIT_LIST_HEAD(&devr->x0->tgt_qp_list);
-
- devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL);
- if (IS_ERR(devr->x1)) {
- ret = PTR_ERR(devr->x1);
+
+ ret = mlx5_cmd_xrcd_alloc(dev->mdev, &devr->xrcdn1, 0);
+ if (ret)
goto error3;
- }
- devr->x1->device = &dev->ib_dev;
- devr->x1->inode = NULL;
- atomic_set(&devr->x1->usecnt, 0);
- mutex_init(&devr->x1->tgt_qp_mutex);
- INIT_LIST_HEAD(&devr->x1->tgt_qp_list);
memset(&attr, 0, sizeof(attr));
attr.attr.max_sge = 1;
attr.attr.max_wr = 1;
attr.srq_type = IB_SRQT_XRC;
attr.ext.cq = devr->c0;
- attr.ext.xrc.xrcd = devr->x0;
devr->s0 = rdma_zalloc_drv_obj(ibdev, ib_srq);
if (!devr->s0) {
@@ -5068,13 +3096,11 @@ static int create_dev_resources(struct mlx5_ib_resources *devr)
devr->s0->device = &dev->ib_dev;
devr->s0->pd = devr->p0;
devr->s0->srq_type = IB_SRQT_XRC;
- devr->s0->ext.xrc.xrcd = devr->x0;
devr->s0->ext.cq = devr->c0;
ret = mlx5_ib_create_srq(devr->s0, &attr, NULL);
if (ret)
goto err_create;
- atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt);
atomic_inc(&devr->s0->ext.cq->usecnt);
atomic_inc(&devr->p0->usecnt);
atomic_set(&devr->s0->usecnt, 0);
@@ -5116,9 +3142,9 @@ error5:
err_create:
kfree(devr->s0);
error4:
- mlx5_ib_dealloc_xrcd(devr->x1, NULL);
+ mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn1, 0);
error3:
- mlx5_ib_dealloc_xrcd(devr->x0, NULL);
+ mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn0, 0);
error2:
mlx5_ib_destroy_cq(devr->c0, NULL);
err_create_cq:
@@ -5130,16 +3156,17 @@ error0:
return ret;
}
-static void destroy_dev_resources(struct mlx5_ib_resources *devr)
+static void mlx5_ib_dev_res_cleanup(struct mlx5_ib_dev *dev)
{
+ struct mlx5_ib_resources *devr = &dev->devr;
int port;
mlx5_ib_destroy_srq(devr->s1, NULL);
kfree(devr->s1);
mlx5_ib_destroy_srq(devr->s0, NULL);
kfree(devr->s0);
- mlx5_ib_dealloc_xrcd(devr->x0, NULL);
- mlx5_ib_dealloc_xrcd(devr->x1, NULL);
+ mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn1, 0);
+ mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn0, 0);
mlx5_ib_destroy_cq(devr->c0, NULL);
kfree(devr->c0);
mlx5_ib_dealloc_pd(devr->p0, NULL);
@@ -5332,466 +3359,6 @@ static void mlx5_disable_eth(struct mlx5_ib_dev *dev)
mlx5_nic_vport_disable_roce(dev->mdev);
}
-struct mlx5_ib_counter {
- const char *name;
- size_t offset;
-};
-
-#define INIT_Q_COUNTER(_name) \
- { .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)}
-
-static const struct mlx5_ib_counter basic_q_cnts[] = {
- INIT_Q_COUNTER(rx_write_requests),
- INIT_Q_COUNTER(rx_read_requests),
- INIT_Q_COUNTER(rx_atomic_requests),
- INIT_Q_COUNTER(out_of_buffer),
-};
-
-static const struct mlx5_ib_counter out_of_seq_q_cnts[] = {
- INIT_Q_COUNTER(out_of_sequence),
-};
-
-static const struct mlx5_ib_counter retrans_q_cnts[] = {
- INIT_Q_COUNTER(duplicate_request),
- INIT_Q_COUNTER(rnr_nak_retry_err),
- INIT_Q_COUNTER(packet_seq_err),
- INIT_Q_COUNTER(implied_nak_seq_err),
- INIT_Q_COUNTER(local_ack_timeout_err),
-};
-
-#define INIT_CONG_COUNTER(_name) \
- { .name = #_name, .offset = \
- MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)}
-
-static const struct mlx5_ib_counter cong_cnts[] = {
- INIT_CONG_COUNTER(rp_cnp_ignored),
- INIT_CONG_COUNTER(rp_cnp_handled),
- INIT_CONG_COUNTER(np_ecn_marked_roce_packets),
- INIT_CONG_COUNTER(np_cnp_sent),
-};
-
-static const struct mlx5_ib_counter extended_err_cnts[] = {
- INIT_Q_COUNTER(resp_local_length_error),
- INIT_Q_COUNTER(resp_cqe_error),
- INIT_Q_COUNTER(req_cqe_error),
- INIT_Q_COUNTER(req_remote_invalid_request),
- INIT_Q_COUNTER(req_remote_access_errors),
- INIT_Q_COUNTER(resp_remote_access_errors),
- INIT_Q_COUNTER(resp_cqe_flush_error),
- INIT_Q_COUNTER(req_cqe_flush_error),
-};
-
-static const struct mlx5_ib_counter roce_accl_cnts[] = {
- INIT_Q_COUNTER(roce_adp_retrans),
- INIT_Q_COUNTER(roce_adp_retrans_to),
- INIT_Q_COUNTER(roce_slow_restart),
- INIT_Q_COUNTER(roce_slow_restart_cnps),
- INIT_Q_COUNTER(roce_slow_restart_trans),
-};
-
-#define INIT_EXT_PPCNT_COUNTER(_name) \
- { .name = #_name, .offset = \
- MLX5_BYTE_OFF(ppcnt_reg, \
- counter_set.eth_extended_cntrs_grp_data_layout._name##_high)}
-
-static const struct mlx5_ib_counter ext_ppcnt_cnts[] = {
- INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated),
-};
-
-static bool is_mdev_switchdev_mode(const struct mlx5_core_dev *mdev)
-{
- return MLX5_ESWITCH_MANAGER(mdev) &&
- mlx5_ib_eswitch_mode(mdev->priv.eswitch) ==
- MLX5_ESWITCH_OFFLOADS;
-}
-
-static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
-{
- u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
- int num_cnt_ports;
- int i;
-
- num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports;
-
- MLX5_SET(dealloc_q_counter_in, in, opcode,
- MLX5_CMD_OP_DEALLOC_Q_COUNTER);
-
- for (i = 0; i < num_cnt_ports; i++) {
- if (dev->port[i].cnts.set_id) {
- MLX5_SET(dealloc_q_counter_in, in, counter_set_id,
- dev->port[i].cnts.set_id);
- mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
- }
- kfree(dev->port[i].cnts.names);
- kfree(dev->port[i].cnts.offsets);
- }
-}
-
-static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
- struct mlx5_ib_counters *cnts)
-{
- u32 num_counters;
-
- num_counters = ARRAY_SIZE(basic_q_cnts);
-
- if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
- num_counters += ARRAY_SIZE(out_of_seq_q_cnts);
-
- if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
- num_counters += ARRAY_SIZE(retrans_q_cnts);
-
- if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters))
- num_counters += ARRAY_SIZE(extended_err_cnts);
-
- if (MLX5_CAP_GEN(dev->mdev, roce_accl))
- num_counters += ARRAY_SIZE(roce_accl_cnts);
-
- cnts->num_q_counters = num_counters;
-
- if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
- cnts->num_cong_counters = ARRAY_SIZE(cong_cnts);
- num_counters += ARRAY_SIZE(cong_cnts);
- }
- if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
- cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts);
- num_counters += ARRAY_SIZE(ext_ppcnt_cnts);
- }
- cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL);
- if (!cnts->names)
- return -ENOMEM;
-
- cnts->offsets = kcalloc(num_counters,
- sizeof(cnts->offsets), GFP_KERNEL);
- if (!cnts->offsets)
- goto err_names;
-
- return 0;
-
-err_names:
- kfree(cnts->names);
- cnts->names = NULL;
- return -ENOMEM;
-}
-
-static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
- const char **names,
- size_t *offsets)
-{
- int i;
- int j = 0;
-
- for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) {
- names[j] = basic_q_cnts[i].name;
- offsets[j] = basic_q_cnts[i].offset;
- }
-
- if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
- for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) {
- names[j] = out_of_seq_q_cnts[i].name;
- offsets[j] = out_of_seq_q_cnts[i].offset;
- }
- }
-
- if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
- for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) {
- names[j] = retrans_q_cnts[i].name;
- offsets[j] = retrans_q_cnts[i].offset;
- }
- }
-
- if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) {
- for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) {
- names[j] = extended_err_cnts[i].name;
- offsets[j] = extended_err_cnts[i].offset;
- }
- }
-
- if (MLX5_CAP_GEN(dev->mdev, roce_accl)) {
- for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) {
- names[j] = roce_accl_cnts[i].name;
- offsets[j] = roce_accl_cnts[i].offset;
- }
- }
-
- if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
- for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) {
- names[j] = cong_cnts[i].name;
- offsets[j] = cong_cnts[i].offset;
- }
- }
-
- if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
- for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) {
- names[j] = ext_ppcnt_cnts[i].name;
- offsets[j] = ext_ppcnt_cnts[i].offset;
- }
- }
-}
-
-static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
-{
- u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
- u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
- int num_cnt_ports;
- int err = 0;
- int i;
- bool is_shared;
-
- MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
- is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0;
- num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports;
-
- for (i = 0; i < num_cnt_ports; i++) {
- err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts);
- if (err)
- goto err_alloc;
-
- mlx5_ib_fill_counters(dev, dev->port[i].cnts.names,
- dev->port[i].cnts.offsets);
-
- MLX5_SET(alloc_q_counter_in, in, uid,
- is_shared ? MLX5_SHARED_RESOURCE_UID : 0);
-
- err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
- if (err) {
- mlx5_ib_warn(dev,
- "couldn't allocate queue counter for port %d, err %d\n",
- i + 1, err);
- goto err_alloc;
- }
-
- dev->port[i].cnts.set_id =
- MLX5_GET(alloc_q_counter_out, out, counter_set_id);
- }
- return 0;
-
-err_alloc:
- mlx5_ib_dealloc_counters(dev);
- return err;
-}
-
-static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev,
- u8 port_num)
-{
- return is_mdev_switchdev_mode(dev->mdev) ? &dev->port[0].cnts :
- &dev->port[port_num].cnts;
-}
-
-/**
- * mlx5_ib_get_counters_id - Returns counters id to use for device+port
- * @dev: Pointer to mlx5 IB device
- * @port_num: Zero based port number
- *
- * mlx5_ib_get_counters_id() Returns counters set id to use for given
- * device port combination in switchdev and non switchdev mode of the
- * parent device.
- */
-u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num)
-{
- const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
-
- return cnts->set_id;
-}
-
-static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
- u8 port_num)
-{
- struct mlx5_ib_dev *dev = to_mdev(ibdev);
- const struct mlx5_ib_counters *cnts;
- bool is_switchdev = is_mdev_switchdev_mode(dev->mdev);
-
- if ((is_switchdev && port_num) || (!is_switchdev && !port_num))
- return NULL;
-
- cnts = get_counters(dev, port_num - 1);
-
- return rdma_alloc_hw_stats_struct(cnts->names,
- cnts->num_q_counters +
- cnts->num_cong_counters +
- cnts->num_ext_ppcnt_counters,
- RDMA_HW_STATS_DEFAULT_LIFESPAN);
-}
-
-static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev,
- const struct mlx5_ib_counters *cnts,
- struct rdma_hw_stats *stats,
- u16 set_id)
-{
- u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
- u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
- __be32 val;
- int ret, i;
-
- MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
- MLX5_SET(query_q_counter_in, in, counter_set_id, set_id);
- ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out);
- if (ret)
- return ret;
-
- for (i = 0; i < cnts->num_q_counters; i++) {
- val = *(__be32 *)((void *)out + cnts->offsets[i]);
- stats->value[i] = (u64)be32_to_cpu(val);
- }
-
- return 0;
-}
-
-static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev,
- const struct mlx5_ib_counters *cnts,
- struct rdma_hw_stats *stats)
-{
- int offset = cnts->num_q_counters + cnts->num_cong_counters;
- int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
- int ret, i;
- void *out;
-
- out = kvzalloc(sz, GFP_KERNEL);
- if (!out)
- return -ENOMEM;
-
- ret = mlx5_cmd_query_ext_ppcnt_counters(dev->mdev, out);
- if (ret)
- goto free;
-
- for (i = 0; i < cnts->num_ext_ppcnt_counters; i++)
- stats->value[i + offset] =
- be64_to_cpup((__be64 *)(out +
- cnts->offsets[i + offset]));
-free:
- kvfree(out);
- return ret;
-}
-
-static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
- struct rdma_hw_stats *stats,
- u8 port_num, int index)
-{
- struct mlx5_ib_dev *dev = to_mdev(ibdev);
- const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1);
- struct mlx5_core_dev *mdev;
- int ret, num_counters;
- u8 mdev_port_num;
-
- if (!stats)
- return -EINVAL;
-
- num_counters = cnts->num_q_counters +
- cnts->num_cong_counters +
- cnts->num_ext_ppcnt_counters;
-
- /* q_counters are per IB device, query the master mdev */
- ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats, cnts->set_id);
- if (ret)
- return ret;
-
- if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
- ret = mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats);
- if (ret)
- return ret;
- }
-
- if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
- mdev = mlx5_ib_get_native_port_mdev(dev, port_num,
- &mdev_port_num);
- if (!mdev) {
- /* If port is not affiliated yet, its in down state
- * which doesn't have any counters yet, so it would be
- * zero. So no need to read from the HCA.
- */
- goto done;
- }
- ret = mlx5_lag_query_cong_counters(dev->mdev,
- stats->value +
- cnts->num_q_counters,
- cnts->num_cong_counters,
- cnts->offsets +
- cnts->num_q_counters);
-
- mlx5_ib_put_native_port_mdev(dev, port_num);
- if (ret)
- return ret;
- }
-
-done:
- return num_counters;
-}
-
-static struct rdma_hw_stats *
-mlx5_ib_counter_alloc_stats(struct rdma_counter *counter)
-{
- struct mlx5_ib_dev *dev = to_mdev(counter->device);
- const struct mlx5_ib_counters *cnts =
- get_counters(dev, counter->port - 1);
-
- return rdma_alloc_hw_stats_struct(cnts->names,
- cnts->num_q_counters +
- cnts->num_cong_counters +
- cnts->num_ext_ppcnt_counters,
- RDMA_HW_STATS_DEFAULT_LIFESPAN);
-}
-
-static int mlx5_ib_counter_update_stats(struct rdma_counter *counter)
-{
- struct mlx5_ib_dev *dev = to_mdev(counter->device);
- const struct mlx5_ib_counters *cnts =
- get_counters(dev, counter->port - 1);
-
- return mlx5_ib_query_q_counters(dev->mdev, cnts,
- counter->stats, counter->id);
-}
-
-static int mlx5_ib_counter_dealloc(struct rdma_counter *counter)
-{
- struct mlx5_ib_dev *dev = to_mdev(counter->device);
- u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
-
- if (!counter->id)
- return 0;
-
- MLX5_SET(dealloc_q_counter_in, in, opcode,
- MLX5_CMD_OP_DEALLOC_Q_COUNTER);
- MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id);
- return mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
-}
-
-static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
- struct ib_qp *qp)
-{
- struct mlx5_ib_dev *dev = to_mdev(qp->device);
- int err;
-
- if (!counter->id) {
- u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
- u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
-
- MLX5_SET(alloc_q_counter_in, in, opcode,
- MLX5_CMD_OP_ALLOC_Q_COUNTER);
- MLX5_SET(alloc_q_counter_in, in, uid, MLX5_SHARED_RESOURCE_UID);
- err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
- if (err)
- return err;
- counter->id =
- MLX5_GET(alloc_q_counter_out, out, counter_set_id);
- }
-
- err = mlx5_ib_qp_set_counter(qp, counter);
- if (err)
- goto fail_set_counter;
-
- return 0;
-
-fail_set_counter:
- mlx5_ib_counter_dealloc(counter);
- counter->id = 0;
-
- return err;
-}
-
-static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp)
-{
- return mlx5_ib_qp_set_counter(qp, NULL);
-}
-
static int mlx5_ib_rn_get_params(struct ib_device *device, u8 port_num,
enum rdma_netdev_t type,
struct rdma_netdev_alloc_params *params)
@@ -5802,23 +3369,6 @@ static int mlx5_ib_rn_get_params(struct ib_device *device, u8 port_num,
return mlx5_rdma_rn_get_params(to_mdev(device)->mdev, device, params);
}
-static void delay_drop_debugfs_cleanup(struct mlx5_ib_dev *dev)
-{
- if (!dev->delay_drop.dir_debugfs)
- return;
- debugfs_remove_recursive(dev->delay_drop.dir_debugfs);
- dev->delay_drop.dir_debugfs = NULL;
-}
-
-static void cancel_delay_drop(struct mlx5_ib_dev *dev)
-{
- if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP))
- return;
-
- cancel_work_sync(&dev->delay_drop.delay_drop_work);
- delay_drop_debugfs_cleanup(dev);
-}
-
static ssize_t delay_drop_timeout_read(struct file *filp, char __user *buf,
size_t count, loff_t *pos)
{
@@ -5858,40 +3408,6 @@ static const struct file_operations fops_delay_drop_timeout = {
.read = delay_drop_timeout_read,
};
-static void delay_drop_debugfs_init(struct mlx5_ib_dev *dev)
-{
- struct dentry *root;
-
- if (!mlx5_debugfs_root)
- return;
-
- root = debugfs_create_dir("delay_drop", dev->mdev->priv.dbg_root);
- dev->delay_drop.dir_debugfs = root;
-
- debugfs_create_atomic_t("num_timeout_events", 0400, root,
- &dev->delay_drop.events_cnt);
- debugfs_create_atomic_t("num_rqs", 0400, root,
- &dev->delay_drop.rqs_cnt);
- debugfs_create_file("timeout", 0600, root, &dev->delay_drop,
- &fops_delay_drop_timeout);
-}
-
-static void init_delay_drop(struct mlx5_ib_dev *dev)
-{
- if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP))
- return;
-
- mutex_init(&dev->delay_drop.lock);
- dev->delay_drop.dev = dev;
- dev->delay_drop.activate = false;
- dev->delay_drop.timeout = MLX5_MAX_DELAY_DROP_TIMEOUT_MS * 1000;
- INIT_WORK(&dev->delay_drop.delay_drop_work, delay_drop_handler);
- atomic_set(&dev->delay_drop.rqs_cnt, 0);
- atomic_set(&dev->delay_drop.events_cnt, 0);
-
- delay_drop_debugfs_init(dev);
-}
-
static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
struct mlx5_ib_multiport_info *mpi)
{
@@ -6385,90 +3901,32 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE(
UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
enum mlx5_ib_uapi_flow_action_flags));
+ADD_UVERBS_ATTRIBUTES_SIMPLE(
+ mlx5_ib_query_context,
+ UVERBS_OBJECT_DEVICE,
+ UVERBS_METHOD_QUERY_CONTEXT,
+ UVERBS_ATTR_PTR_OUT(
+ MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX,
+ UVERBS_ATTR_STRUCT(struct mlx5_ib_alloc_ucontext_resp,
+ dump_fill_mkey),
+ UA_MANDATORY));
+
static const struct uapi_definition mlx5_ib_defs[] = {
UAPI_DEF_CHAIN(mlx5_ib_devx_defs),
UAPI_DEF_CHAIN(mlx5_ib_flow_defs),
UAPI_DEF_CHAIN(mlx5_ib_qos_defs),
+ UAPI_DEF_CHAIN(mlx5_ib_std_types_defs),
UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
&mlx5_ib_flow_action),
UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DM, &mlx5_ib_dm),
+ UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DEVICE, &mlx5_ib_query_context),
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_VAR,
UAPI_DEF_IS_OBJ_SUPPORTED(var_is_supported)),
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_UAR),
{}
};
-static int mlx5_ib_read_counters(struct ib_counters *counters,
- struct ib_counters_read_attr *read_attr,
- struct uverbs_attr_bundle *attrs)
-{
- struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
- struct mlx5_read_counters_attr mread_attr = {};
- struct mlx5_ib_flow_counters_desc *desc;
- int ret, i;
-
- mutex_lock(&mcounters->mcntrs_mutex);
- if (mcounters->cntrs_max_index > read_attr->ncounters) {
- ret = -EINVAL;
- goto err_bound;
- }
-
- mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64),
- GFP_KERNEL);
- if (!mread_attr.out) {
- ret = -ENOMEM;
- goto err_bound;
- }
-
- mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl;
- mread_attr.flags = read_attr->flags;
- ret = mcounters->read_counters(counters->device, &mread_attr);
- if (ret)
- goto err_read;
-
- /* do the pass over the counters data array to assign according to the
- * descriptions and indexing pairs
- */
- desc = mcounters->counters_data;
- for (i = 0; i < mcounters->ncounters; i++)
- read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description];
-
-err_read:
- kfree(mread_attr.out);
-err_bound:
- mutex_unlock(&mcounters->mcntrs_mutex);
- return ret;
-}
-
-static int mlx5_ib_destroy_counters(struct ib_counters *counters)
-{
- struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
-
- counters_clear_description(counters);
- if (mcounters->hw_cntrs_hndl)
- mlx5_fc_destroy(to_mdev(counters->device)->mdev,
- mcounters->hw_cntrs_hndl);
-
- kfree(mcounters);
-
- return 0;
-}
-
-static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device,
- struct uverbs_attr_bundle *attrs)
-{
- struct mlx5_ib_mcounters *mcounters;
-
- mcounters = kzalloc(sizeof(*mcounters), GFP_KERNEL);
- if (!mcounters)
- return ERR_PTR(-ENOMEM);
-
- mutex_init(&mcounters->mcntrs_mutex);
-
- return &mcounters->ibcntrs;
-}
-
static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
{
mlx5_ib_cleanup_multiport_master(dev);
@@ -6547,21 +4005,16 @@ err_mp:
return -ENOMEM;
}
-static int mlx5_ib_stage_flow_db_init(struct mlx5_ib_dev *dev)
+static int mlx5_ib_enable_driver(struct ib_device *dev)
{
- dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL);
-
- if (!dev->flow_db)
- return -ENOMEM;
-
- mutex_init(&dev->flow_db->lock);
+ struct mlx5_ib_dev *mdev = to_mdev(dev);
+ int ret;
- return 0;
-}
+ ret = mlx5_ib_test_wc(mdev);
+ mlx5_ib_dbg(mdev, "Write-Combining %s",
+ mdev->wc_support ? "supported" : "not supported");
-static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev)
-{
- kfree(dev->flow_db);
+ return ret;
}
static const struct ib_device_ops mlx5_ib_dev_ops = {
@@ -6577,9 +4030,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
.attach_mcast = mlx5_ib_mcg_attach,
.check_mr_status = mlx5_ib_check_mr_status,
.create_ah = mlx5_ib_create_ah,
- .create_counters = mlx5_ib_create_counters,
.create_cq = mlx5_ib_create_cq,
- .create_flow = mlx5_ib_create_flow,
.create_qp = mlx5_ib_create_qp,
.create_srq = mlx5_ib_create_srq,
.dealloc_pd = mlx5_ib_dealloc_pd,
@@ -6587,10 +4038,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
.del_gid = mlx5_ib_del_gid,
.dereg_mr = mlx5_ib_dereg_mr,
.destroy_ah = mlx5_ib_destroy_ah,
- .destroy_counters = mlx5_ib_destroy_counters,
.destroy_cq = mlx5_ib_destroy_cq,
- .destroy_flow = mlx5_ib_destroy_flow,
- .destroy_flow_action = mlx5_ib_destroy_flow_action,
.destroy_qp = mlx5_ib_destroy_qp,
.destroy_srq = mlx5_ib_destroy_srq,
.detach_mcast = mlx5_ib_mcg_detach,
@@ -6598,8 +4046,6 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
.drain_rq = mlx5_ib_drain_rq,
.drain_sq = mlx5_ib_drain_sq,
.enable_driver = mlx5_ib_enable_driver,
- .fill_res_entry = mlx5_ib_fill_res_entry,
- .fill_stat_entry = mlx5_ib_fill_stat_entry,
.get_dev_fw_str = get_dev_fw_str,
.get_dma_mr = mlx5_ib_get_dma_mr,
.get_link_layer = mlx5_ib_port_link_layer,
@@ -6623,24 +4069,20 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
.query_pkey = mlx5_ib_query_pkey,
.query_qp = mlx5_ib_query_qp,
.query_srq = mlx5_ib_query_srq,
- .read_counters = mlx5_ib_read_counters,
+ .query_ucontext = mlx5_ib_query_ucontext,
.reg_user_mr = mlx5_ib_reg_user_mr,
.req_notify_cq = mlx5_ib_arm_cq,
.rereg_user_mr = mlx5_ib_rereg_user_mr,
.resize_cq = mlx5_ib_resize_cq,
INIT_RDMA_OBJ_SIZE(ib_ah, mlx5_ib_ah, ibah),
+ INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs),
INIT_RDMA_OBJ_SIZE(ib_cq, mlx5_ib_cq, ibcq),
INIT_RDMA_OBJ_SIZE(ib_pd, mlx5_ib_pd, ibpd),
INIT_RDMA_OBJ_SIZE(ib_srq, mlx5_ib_srq, ibsrq),
INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx5_ib_ucontext, ibucontext),
};
-static const struct ib_device_ops mlx5_ib_dev_flow_ipsec_ops = {
- .create_flow_action_esp = mlx5_ib_create_flow_action_esp,
- .modify_flow_action_esp = mlx5_ib_modify_flow_action_esp,
-};
-
static const struct ib_device_ops mlx5_ib_dev_ipoib_enhanced_ops = {
.rdma_netdev_get_params = mlx5_ib_rn_get_params,
};
@@ -6661,6 +4103,8 @@ static const struct ib_device_ops mlx5_ib_dev_mw_ops = {
static const struct ib_device_ops mlx5_ib_dev_xrc_ops = {
.alloc_xrcd = mlx5_ib_alloc_xrcd,
.dealloc_xrcd = mlx5_ib_dealloc_xrcd,
+
+ INIT_RDMA_OBJ_SIZE(ib_xrcd, mlx5_ib_xrcd, ibxrcd),
};
static const struct ib_device_ops mlx5_ib_dev_dm_ops = {
@@ -6769,9 +4213,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM)
ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_dm_ops);
- if (mlx5_accel_ipsec_device_caps(dev->mdev) &
- MLX5_ACCEL_IPSEC_CAP_DEVICE)
- ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_flow_ipsec_ops);
ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_ops);
if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS))
@@ -6829,65 +4270,36 @@ static const struct ib_device_ops mlx5_ib_dev_common_roce_ops = {
.modify_wq = mlx5_ib_modify_wq,
};
-static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev)
-{
- u8 port_num;
-
- dev->ib_dev.uverbs_ex_cmd_mask |=
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
- (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
- ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_common_roce_ops);
-
- port_num = mlx5_core_native_port_num(dev->mdev) - 1;
-
- /* Register only for native ports */
- return mlx5_add_netdev_notifier(dev, port_num);
-}
-
-static void mlx5_ib_stage_common_roce_cleanup(struct mlx5_ib_dev *dev)
-{
- u8 port_num = mlx5_core_native_port_num(dev->mdev) - 1;
-
- mlx5_remove_netdev_notifier(dev, port_num);
-}
-
-static int mlx5_ib_stage_raw_eth_roce_init(struct mlx5_ib_dev *dev)
-{
- struct mlx5_core_dev *mdev = dev->mdev;
- enum rdma_link_layer ll;
- int port_type_cap;
- int err = 0;
-
- port_type_cap = MLX5_CAP_GEN(mdev, port_type);
- ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
-
- if (ll == IB_LINK_LAYER_ETHERNET)
- err = mlx5_ib_stage_common_roce_init(dev);
-
- return err;
-}
-
-static void mlx5_ib_stage_raw_eth_roce_cleanup(struct mlx5_ib_dev *dev)
-{
- mlx5_ib_stage_common_roce_cleanup(dev);
-}
-
-static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev)
+static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
enum rdma_link_layer ll;
int port_type_cap;
+ u8 port_num = 0;
int err;
port_type_cap = MLX5_CAP_GEN(mdev, port_type);
ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
if (ll == IB_LINK_LAYER_ETHERNET) {
- err = mlx5_ib_stage_common_roce_init(dev);
- if (err)
+ dev->ib_dev.uverbs_ex_cmd_mask |=
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_common_roce_ops);
+
+ port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+
+ /* Register only for native ports */
+ err = mlx5_add_netdev_notifier(dev, port_num);
+ if (err || dev->is_rep || !mlx5_is_roce_enabled(mdev))
+ /*
+ * We don't enable ETH interface for
+ * 1. IB representors
+ * 2. User disabled ROCE through devlink interface
+ */
return err;
err = mlx5_enable_eth(dev);
@@ -6897,71 +4309,27 @@ static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev)
return 0;
cleanup:
- mlx5_ib_stage_common_roce_cleanup(dev);
-
+ mlx5_remove_netdev_notifier(dev, port_num);
return err;
}
-static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev)
+static void mlx5_ib_roce_cleanup(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
enum rdma_link_layer ll;
int port_type_cap;
+ u8 port_num;
port_type_cap = MLX5_CAP_GEN(mdev, port_type);
ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
if (ll == IB_LINK_LAYER_ETHERNET) {
- mlx5_disable_eth(dev);
- mlx5_ib_stage_common_roce_cleanup(dev);
- }
-}
-
-static int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev)
-{
- return create_dev_resources(&dev->devr);
-}
+ if (!dev->is_rep)
+ mlx5_disable_eth(dev);
-static void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev)
-{
- destroy_dev_resources(&dev->devr);
-}
-
-static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev)
-{
- return mlx5_ib_odp_init_one(dev);
-}
-
-static void mlx5_ib_stage_odp_cleanup(struct mlx5_ib_dev *dev)
-{
- mlx5_ib_odp_cleanup_one(dev);
-}
-
-static const struct ib_device_ops mlx5_ib_dev_hw_stats_ops = {
- .alloc_hw_stats = mlx5_ib_alloc_hw_stats,
- .get_hw_stats = mlx5_ib_get_hw_stats,
- .counter_bind_qp = mlx5_ib_counter_bind_qp,
- .counter_unbind_qp = mlx5_ib_counter_unbind_qp,
- .counter_dealloc = mlx5_ib_counter_dealloc,
- .counter_alloc_stats = mlx5_ib_counter_alloc_stats,
- .counter_update_stats = mlx5_ib_counter_update_stats,
-};
-
-static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
-{
- if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
- ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_hw_stats_ops);
-
- return mlx5_ib_alloc_counters(dev);
+ port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+ mlx5_remove_netdev_notifier(dev, port_num);
}
-
- return 0;
-}
-
-static void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev)
-{
- if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
- mlx5_ib_dealloc_counters(dev);
}
static int mlx5_ib_stage_cong_debugfs_init(struct mlx5_ib_dev *dev)
@@ -7023,7 +4391,18 @@ static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)
{
- destroy_umrc_res(dev);
+ int err;
+
+ err = mlx5_mr_cache_cleanup(dev);
+ if (err)
+ mlx5_ib_warn(dev, "mr cache cleanup failed\n");
+
+ if (dev->umrc.qp)
+ mlx5_ib_destroy_qp(dev->umrc.qp, NULL);
+ if (dev->umrc.cq)
+ ib_free_cq(dev->umrc.cq);
+ if (dev->umrc.pd)
+ ib_dealloc_pd(dev->umrc.pd);
}
static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
@@ -7031,21 +4410,162 @@ static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
ib_unregister_device(&dev->ib_dev);
}
+enum {
+ MAX_UMR_WR = 128,
+};
+
static int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev)
{
- return create_umr_res(dev);
+ struct ib_qp_init_attr *init_attr = NULL;
+ struct ib_qp_attr *attr = NULL;
+ struct ib_pd *pd;
+ struct ib_cq *cq;
+ struct ib_qp *qp;
+ int ret;
+
+ attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+ init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL);
+ if (!attr || !init_attr) {
+ ret = -ENOMEM;
+ goto error_0;
+ }
+
+ pd = ib_alloc_pd(&dev->ib_dev, 0);
+ if (IS_ERR(pd)) {
+ mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
+ ret = PTR_ERR(pd);
+ goto error_0;
+ }
+
+ cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ);
+ if (IS_ERR(cq)) {
+ mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
+ ret = PTR_ERR(cq);
+ goto error_2;
+ }
+
+ init_attr->send_cq = cq;
+ init_attr->recv_cq = cq;
+ init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
+ init_attr->cap.max_send_wr = MAX_UMR_WR;
+ init_attr->cap.max_send_sge = 1;
+ init_attr->qp_type = MLX5_IB_QPT_REG_UMR;
+ init_attr->port_num = 1;
+ qp = mlx5_ib_create_qp(pd, init_attr, NULL);
+ if (IS_ERR(qp)) {
+ mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
+ ret = PTR_ERR(qp);
+ goto error_3;
+ }
+ qp->device = &dev->ib_dev;
+ qp->real_qp = qp;
+ qp->uobject = NULL;
+ qp->qp_type = MLX5_IB_QPT_REG_UMR;
+ qp->send_cq = init_attr->send_cq;
+ qp->recv_cq = init_attr->recv_cq;
+
+ attr->qp_state = IB_QPS_INIT;
+ attr->port_num = 1;
+ ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX |
+ IB_QP_PORT, NULL);
+ if (ret) {
+ mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
+ goto error_4;
+ }
+
+ memset(attr, 0, sizeof(*attr));
+ attr->qp_state = IB_QPS_RTR;
+ attr->path_mtu = IB_MTU_256;
+
+ ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
+ if (ret) {
+ mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n");
+ goto error_4;
+ }
+
+ memset(attr, 0, sizeof(*attr));
+ attr->qp_state = IB_QPS_RTS;
+ ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
+ if (ret) {
+ mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n");
+ goto error_4;
+ }
+
+ dev->umrc.qp = qp;
+ dev->umrc.cq = cq;
+ dev->umrc.pd = pd;
+
+ sema_init(&dev->umrc.sem, MAX_UMR_WR);
+ ret = mlx5_mr_cache_init(dev);
+ if (ret) {
+ mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
+ goto error_4;
+ }
+
+ kfree(attr);
+ kfree(init_attr);
+
+ return 0;
+
+error_4:
+ mlx5_ib_destroy_qp(qp, NULL);
+ dev->umrc.qp = NULL;
+
+error_3:
+ ib_free_cq(cq);
+ dev->umrc.cq = NULL;
+
+error_2:
+ ib_dealloc_pd(pd);
+ dev->umrc.pd = NULL;
+
+error_0:
+ kfree(attr);
+ kfree(init_attr);
+ return ret;
}
static int mlx5_ib_stage_delay_drop_init(struct mlx5_ib_dev *dev)
{
- init_delay_drop(dev);
+ struct dentry *root;
+
+ if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP))
+ return 0;
+
+ mutex_init(&dev->delay_drop.lock);
+ dev->delay_drop.dev = dev;
+ dev->delay_drop.activate = false;
+ dev->delay_drop.timeout = MLX5_MAX_DELAY_DROP_TIMEOUT_MS * 1000;
+ INIT_WORK(&dev->delay_drop.delay_drop_work, delay_drop_handler);
+ atomic_set(&dev->delay_drop.rqs_cnt, 0);
+ atomic_set(&dev->delay_drop.events_cnt, 0);
+
+ if (!mlx5_debugfs_root)
+ return 0;
+
+ root = debugfs_create_dir("delay_drop", dev->mdev->priv.dbg_root);
+ dev->delay_drop.dir_debugfs = root;
+ debugfs_create_atomic_t("num_timeout_events", 0400, root,
+ &dev->delay_drop.events_cnt);
+ debugfs_create_atomic_t("num_rqs", 0400, root,
+ &dev->delay_drop.rqs_cnt);
+ debugfs_create_file("timeout", 0600, root, &dev->delay_drop,
+ &fops_delay_drop_timeout);
return 0;
}
static void mlx5_ib_stage_delay_drop_cleanup(struct mlx5_ib_dev *dev)
{
- cancel_delay_drop(dev);
+ if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP))
+ return;
+
+ cancel_work_sync(&dev->delay_drop.delay_drop_work);
+ if (!dev->delay_drop.dir_debugfs)
+ return;
+
+ debugfs_remove_recursive(dev->delay_drop.dir_debugfs);
+ dev->delay_drop.dir_debugfs = NULL;
}
static int mlx5_ib_stage_dev_notifier_init(struct mlx5_ib_dev *dev)
@@ -7060,38 +4580,6 @@ static void mlx5_ib_stage_dev_notifier_cleanup(struct mlx5_ib_dev *dev)
mlx5_notifier_unregister(dev->mdev, &dev->mdev_events);
}
-static int mlx5_ib_stage_devx_init(struct mlx5_ib_dev *dev)
-{
- int uid;
-
- uid = mlx5_ib_devx_create(dev, false);
- if (uid > 0) {
- dev->devx_whitelist_uid = uid;
- mlx5_ib_devx_init_event_table(dev);
- }
-
- return 0;
-}
-static void mlx5_ib_stage_devx_cleanup(struct mlx5_ib_dev *dev)
-{
- if (dev->devx_whitelist_uid) {
- mlx5_ib_devx_cleanup_event_table(dev);
- mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid);
- }
-}
-
-int mlx5_ib_enable_driver(struct ib_device *dev)
-{
- struct mlx5_ib_dev *mdev = to_mdev(dev);
- int ret;
-
- ret = mlx5_ib_test_wc(mdev);
- mlx5_ib_dbg(mdev, "Write-Combining %s",
- mdev->wc_support ? "supported" : "not supported");
-
- return ret;
-}
-
void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
const struct mlx5_ib_profile *profile,
int stage)
@@ -7139,9 +4627,9 @@ static const struct mlx5_ib_profile pf_profile = {
STAGE_CREATE(MLX5_IB_STAGE_INIT,
mlx5_ib_stage_init_init,
mlx5_ib_stage_init_cleanup),
- STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
- mlx5_ib_stage_flow_db_init,
- mlx5_ib_stage_flow_db_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_FS,
+ mlx5_ib_fs_init,
+ mlx5_ib_fs_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_CAPS,
mlx5_ib_stage_caps_init,
mlx5_ib_stage_caps_cleanup),
@@ -7149,8 +4637,8 @@ static const struct mlx5_ib_profile pf_profile = {
mlx5_ib_stage_non_default_cb,
NULL),
STAGE_CREATE(MLX5_IB_STAGE_ROCE,
- mlx5_ib_stage_roce_init,
- mlx5_ib_stage_roce_cleanup),
+ mlx5_ib_roce_init,
+ mlx5_ib_roce_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_QP,
mlx5_init_qp_table,
mlx5_cleanup_qp_table),
@@ -7158,17 +4646,17 @@ static const struct mlx5_ib_profile pf_profile = {
mlx5_init_srq_table,
mlx5_cleanup_srq_table),
STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
- mlx5_ib_stage_dev_res_init,
- mlx5_ib_stage_dev_res_cleanup),
+ mlx5_ib_dev_res_init,
+ mlx5_ib_dev_res_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER,
mlx5_ib_stage_dev_notifier_init,
mlx5_ib_stage_dev_notifier_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_ODP,
- mlx5_ib_stage_odp_init,
- mlx5_ib_stage_odp_cleanup),
+ mlx5_ib_odp_init_one,
+ mlx5_ib_odp_cleanup_one),
STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
- mlx5_ib_stage_counters_init,
- mlx5_ib_stage_counters_cleanup),
+ mlx5_ib_counters_init,
+ mlx5_ib_counters_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_CONG_DEBUGFS,
mlx5_ib_stage_cong_debugfs_init,
mlx5_ib_stage_cong_debugfs_cleanup),
@@ -7182,8 +4670,8 @@ static const struct mlx5_ib_profile pf_profile = {
NULL,
mlx5_ib_stage_pre_ib_reg_umr_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID,
- mlx5_ib_stage_devx_init,
- mlx5_ib_stage_devx_cleanup),
+ mlx5_ib_devx_init,
+ mlx5_ib_devx_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
mlx5_ib_stage_ib_reg_init,
mlx5_ib_stage_ib_reg_cleanup),
@@ -7193,15 +4681,18 @@ static const struct mlx5_ib_profile pf_profile = {
STAGE_CREATE(MLX5_IB_STAGE_DELAY_DROP,
mlx5_ib_stage_delay_drop_init,
mlx5_ib_stage_delay_drop_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_RESTRACK,
+ mlx5_ib_restrack_init,
+ NULL),
};
const struct mlx5_ib_profile raw_eth_profile = {
STAGE_CREATE(MLX5_IB_STAGE_INIT,
mlx5_ib_stage_init_init,
mlx5_ib_stage_init_cleanup),
- STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
- mlx5_ib_stage_flow_db_init,
- mlx5_ib_stage_flow_db_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_FS,
+ mlx5_ib_fs_init,
+ mlx5_ib_fs_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_CAPS,
mlx5_ib_stage_caps_init,
mlx5_ib_stage_caps_cleanup),
@@ -7209,8 +4700,8 @@ const struct mlx5_ib_profile raw_eth_profile = {
mlx5_ib_stage_raw_eth_non_default_cb,
NULL),
STAGE_CREATE(MLX5_IB_STAGE_ROCE,
- mlx5_ib_stage_raw_eth_roce_init,
- mlx5_ib_stage_raw_eth_roce_cleanup),
+ mlx5_ib_roce_init,
+ mlx5_ib_roce_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_QP,
mlx5_init_qp_table,
mlx5_cleanup_qp_table),
@@ -7218,14 +4709,14 @@ const struct mlx5_ib_profile raw_eth_profile = {
mlx5_init_srq_table,
mlx5_cleanup_srq_table),
STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
- mlx5_ib_stage_dev_res_init,
- mlx5_ib_stage_dev_res_cleanup),
+ mlx5_ib_dev_res_init,
+ mlx5_ib_dev_res_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER,
mlx5_ib_stage_dev_notifier_init,
mlx5_ib_stage_dev_notifier_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
- mlx5_ib_stage_counters_init,
- mlx5_ib_stage_counters_cleanup),
+ mlx5_ib_counters_init,
+ mlx5_ib_counters_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_CONG_DEBUGFS,
mlx5_ib_stage_cong_debugfs_init,
mlx5_ib_stage_cong_debugfs_cleanup),
@@ -7239,14 +4730,17 @@ const struct mlx5_ib_profile raw_eth_profile = {
NULL,
mlx5_ib_stage_pre_ib_reg_umr_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID,
- mlx5_ib_stage_devx_init,
- mlx5_ib_stage_devx_cleanup),
+ mlx5_ib_devx_init,
+ mlx5_ib_devx_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
mlx5_ib_stage_ib_reg_init,
mlx5_ib_stage_ib_reg_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR,
mlx5_ib_stage_post_ib_reg_umr_init,
NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_RESTRACK,
+ mlx5_ib_restrack_init,
+ NULL),
};
static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev)
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 5dbe3eb0d9cb..5287fc868662 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -1,33 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/*
- * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
*/
#ifndef MLX5_IB_H
@@ -730,8 +703,8 @@ struct mlx5_ib_port_resources {
struct mlx5_ib_resources {
struct ib_cq *c0;
- struct ib_xrcd *x0;
- struct ib_xrcd *x1;
+ u32 xrcdn0;
+ u32 xrcdn1;
struct ib_pd *p0;
struct ib_srq *s0;
struct ib_srq *s1;
@@ -832,7 +805,7 @@ struct mlx5_ib_delay_drop {
enum mlx5_ib_stages {
MLX5_IB_STAGE_INIT,
- MLX5_IB_STAGE_FLOW_DB,
+ MLX5_IB_STAGE_FS,
MLX5_IB_STAGE_CAPS,
MLX5_IB_STAGE_NON_DEFAULT_CB,
MLX5_IB_STAGE_ROCE,
@@ -850,7 +823,7 @@ enum mlx5_ib_stages {
MLX5_IB_STAGE_IB_REG,
MLX5_IB_STAGE_POST_IB_REG_UMR,
MLX5_IB_STAGE_DELAY_DROP,
- MLX5_IB_STAGE_CLASS_ATTR,
+ MLX5_IB_STAGE_RESTRACK,
MLX5_IB_STAGE_MAX,
};
@@ -1078,11 +1051,6 @@ static inline struct mlx5_ib_rwq *to_mibrwq(struct mlx5_core_qp *core_qp)
return container_of(core_qp, struct mlx5_ib_rwq, core_qp);
}
-static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mkey *mmkey)
-{
- return container_of(mmkey, struct mlx5_ib_mr, mmkey);
-}
-
static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd)
{
return container_of(ibpd, struct mlx5_ib_pd, ibpd);
@@ -1210,7 +1178,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
struct ib_pd *pd, struct ib_udata *udata);
int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata);
+ u32 max_num_sg);
struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd,
u32 max_num_sg,
u32 max_num_meta_sg);
@@ -1224,9 +1192,8 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
const struct ib_wc *in_wc, const struct ib_grh *in_grh,
const struct ib_mad *in, struct ib_mad *out,
size_t *out_mad_size, u16 *out_mad_pkey_index);
-struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
- struct ib_udata *udata);
-int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata);
+int mlx5_ib_alloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata);
+void mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata);
int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset);
int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port);
int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev,
@@ -1375,46 +1342,12 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *dev,
u8 *native_port_num);
void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev,
u8 port_num);
-int mlx5_ib_fill_res_entry(struct sk_buff *msg,
- struct rdma_restrack_entry *res);
-int mlx5_ib_fill_stat_entry(struct sk_buff *msg,
- struct rdma_restrack_entry *res);
extern const struct uapi_definition mlx5_ib_devx_defs[];
extern const struct uapi_definition mlx5_ib_flow_defs[];
extern const struct uapi_definition mlx5_ib_qos_defs[];
+extern const struct uapi_definition mlx5_ib_std_types_defs[];
-#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
-int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user);
-void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid);
-void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev);
-void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev);
-struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add(
- struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher,
- struct mlx5_flow_context *flow_context,
- struct mlx5_flow_act *flow_act, u32 counter_id,
- void *cmd_in, int inlen, int dest_id, int dest_type);
-bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type);
-bool mlx5_ib_devx_is_flow_counter(void *obj, u32 offset, u32 *counter_id);
-void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction);
-#else
-static inline int
-mlx5_ib_devx_create(struct mlx5_ib_dev *dev,
- bool is_user) { return -EOPNOTSUPP; }
-static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) {}
-static inline void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev) {}
-static inline void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev) {}
-static inline bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id,
- int *dest_type)
-{
- return false;
-}
-static inline void
-mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction)
-{
- return;
-};
-#endif
static inline void init_query_mad(struct ib_smp *mad)
{
mad->base_version = 1;
@@ -1423,15 +1356,6 @@ static inline void init_query_mad(struct ib_smp *mad)
mad->method = IB_MGMT_METHOD_GET;
}
-static inline u8 convert_access(int acc)
-{
- return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC : 0) |
- (acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) |
- (acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) |
- (acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) |
- MLX5_PERM_LOCAL_READ;
-}
-
static inline int is_qp1(enum ib_qp_type qp_type)
{
return qp_type == MLX5_IB_QPT_HW_GSI;
@@ -1518,9 +1442,6 @@ int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
struct mlx5_bfreg_info *bfregi, u32 bfregn,
bool dyn_bfreg);
-int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter);
-u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num);
-
static inline bool mlx5_ib_can_use_umr(struct mlx5_ib_dev *dev,
bool do_modify_atomic, int access_flags)
{
@@ -1533,14 +1454,18 @@ static inline bool mlx5_ib_can_use_umr(struct mlx5_ib_dev *dev,
return false;
if (access_flags & IB_ACCESS_RELAXED_ORDERING &&
- (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) ||
- MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read)))
+ MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) &&
+ !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
+ return false;
+
+ if (access_flags & IB_ACCESS_RELAXED_ORDERING &&
+ MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) &&
+ !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
return false;
return true;
}
-int mlx5_ib_enable_driver(struct ib_device *dev);
int mlx5_ib_test_wc(struct mlx5_ib_dev *dev);
static inline bool mlx5_ib_lag_should_assign_affinity(struct mlx5_ib_dev *dev)
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 44683073be0c..3e6f2f9c6655 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1961,7 +1961,7 @@ err_free:
}
struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata)
+ u32 max_num_sg)
{
return __mlx5_ib_alloc_mr(pd, mr_type, max_num_sg, 0);
}
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 7d2ec9ee5097..cfd7efab114e 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -601,6 +601,23 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
*/
synchronize_srcu(&dev->odp_srcu);
+ /*
+ * All work on the prefetch list must be completed, xa_erase() prevented
+ * new work from being created.
+ */
+ wait_event(imr->q_deferred_work, !atomic_read(&imr->num_deferred_work));
+
+ /*
+ * At this point it is forbidden for any other thread to enter
+ * pagefault_mr() on this imr. It is already forbidden to call
+ * pagefault_mr() on an implicit child. Due to this additions to
+ * implicit_children are prevented.
+ */
+
+ /*
+ * Block destroy_unused_implicit_child_mr() from incrementing
+ * num_deferred_work.
+ */
xa_lock(&imr->implicit_children);
xa_for_each (&imr->implicit_children, idx, mtt) {
__xa_erase(&imr->implicit_children, idx);
@@ -609,9 +626,8 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
xa_unlock(&imr->implicit_children);
/*
- * num_deferred_work can only be incremented inside the odp_srcu, or
- * under xa_lock while the child is in the xarray. Thus at this point
- * it is only decreasing, and all work holding it is now on the wq.
+ * Wait for any concurrent destroy_unused_implicit_child_mr() to
+ * complete.
*/
wait_event(imr->q_deferred_work, !atomic_read(&imr->num_deferred_work));
@@ -800,6 +816,7 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt,
{
struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
+ lockdep_assert_held(&mr->dev->odp_srcu);
if (unlikely(io_virt < mr->mmkey.iova))
return -EFAULT;
@@ -913,11 +930,6 @@ next_mr:
if (ret < 0)
goto srcu_unlock;
- /*
- * When prefetching a page, page fault is generated
- * in order to bring the page to the main memory.
- * In the current flow, page faults are being counted.
- */
mlx5_update_odp_stats(mr, faults, ret);
npages += ret;
@@ -1754,13 +1766,26 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *w)
{
struct prefetch_mr_work *work =
container_of(w, struct prefetch_mr_work, work);
+ struct mlx5_ib_dev *dev;
u32 bytes_mapped = 0;
+ int srcu_key;
+ int ret;
u32 i;
- for (i = 0; i < work->num_sge; ++i)
- pagefault_mr(work->frags[i].mr, work->frags[i].io_virt,
- work->frags[i].length, &bytes_mapped,
- work->pf_flags);
+ /* We rely on IB/core that work is executed if we have num_sge != 0 only. */
+ WARN_ON(!work->num_sge);
+ dev = work->frags[0].mr->dev;
+ /* SRCU should be held when calling to mlx5_odp_populate_xlt() */
+ srcu_key = srcu_read_lock(&dev->odp_srcu);
+ for (i = 0; i < work->num_sge; ++i) {
+ ret = pagefault_mr(work->frags[i].mr, work->frags[i].io_virt,
+ work->frags[i].length, &bytes_mapped,
+ work->pf_flags);
+ if (ret <= 0)
+ continue;
+ mlx5_update_odp_stats(work->frags[i].mr, prefetch, ret);
+ }
+ srcu_read_unlock(&dev->odp_srcu, srcu_key);
destroy_prefetch_work(work);
}
@@ -1781,9 +1806,7 @@ static bool init_prefetch_work(struct ib_pd *pd,
work->frags[i].mr =
get_prefetchable_mr(pd, advice, sg_list[i].lkey);
if (!work->frags[i].mr) {
- work->num_sge = i - 1;
- if (i)
- destroy_prefetch_work(work);
+ work->num_sge = i;
return false;
}
@@ -1818,6 +1841,7 @@ static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd,
&bytes_mapped, pf_flags);
if (ret < 0)
goto out;
+ mlx5_update_odp_stats(mr, prefetch, ret);
}
ret = 0;
@@ -1849,6 +1873,7 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
srcu_key = srcu_read_lock(&dev->odp_srcu);
if (!init_prefetch_work(pd, advice, pf_flags, work, sg_list, num_sge)) {
srcu_read_unlock(&dev->odp_srcu, srcu_key);
+ destroy_prefetch_work(work);
return -EINVAL;
}
queue_work(system_unbound_wq, &work->work);
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 81bf6b975e0e..59fce5fac7a3 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -38,6 +38,7 @@
#include <linux/mlx5/fs.h>
#include "mlx5_ib.h"
#include "ib_rep.h"
+#include "counters.h"
#include "cmd.h"
#include "qp.h"
#include "wr.h"
@@ -1766,15 +1767,14 @@ err:
}
static void configure_requester_scat_cqe(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_qp *qp,
struct ib_qp_init_attr *init_attr,
- struct mlx5_ib_create_qp *ucmd,
void *qpc)
{
int scqe_sz;
bool allow_scat_cqe = false;
- if (ucmd)
- allow_scat_cqe = ucmd->flags & MLX5_QP_FLAG_ALLOW_SCATTER_CQE;
+ allow_scat_cqe = qp->flags_en & MLX5_QP_FLAG_ALLOW_SCATTER_CQE;
if (!allow_scat_cqe && init_attr->sq_sig_type != IB_SIGNAL_ALL_WR)
return;
@@ -1853,8 +1853,6 @@ static int create_xrc_tgt_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
u32 *in;
int err;
- mutex_init(&qp->mutex);
-
if (attr->sq_sig_type == IB_SIGNAL_ALL_WR)
qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
@@ -1862,7 +1860,7 @@ static int create_xrc_tgt_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
if (!in)
return -ENOMEM;
- if (MLX5_CAP_GEN(mdev, ece_support))
+ if (MLX5_CAP_GEN(mdev, ece_support) && ucmd)
MLX5_SET(create_qp_in, in, ece, ucmd->ece_options);
qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
@@ -1938,7 +1936,6 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
u32 *in;
int err;
- mutex_init(&qp->mutex);
spin_lock_init(&qp->sq.lock);
spin_lock_init(&qp->rq.lock);
@@ -2012,7 +2009,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
}
if ((qp->flags_en & MLX5_QP_FLAG_SCATTER_CQE) &&
(qp->type == MLX5_IB_QPT_DCI || qp->type == IB_QPT_RC))
- configure_requester_scat_cqe(dev, init_attr, ucmd, qpc);
+ configure_requester_scat_cqe(dev, qp, init_attr, qpc);
if (qp->rq.wqe_cnt) {
MLX5_SET(qpc, qpc, log_rq_stride, qp->rq.wqe_shift - 4);
@@ -2035,15 +2032,15 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
switch (init_attr->qp_type) {
case IB_QPT_XRC_INI:
MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn);
- MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn);
+ MLX5_SET(qpc, qpc, xrcd, devr->xrcdn1);
MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s0)->msrq.srqn);
break;
default:
if (init_attr->srq) {
- MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x0)->xrcdn);
+ MLX5_SET(qpc, qpc, xrcd, devr->xrcdn0);
MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(init_attr->srq)->msrq.srqn);
} else {
- MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn);
+ MLX5_SET(qpc, qpc, xrcd, devr->xrcdn1);
MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s1)->msrq.srqn);
}
}
@@ -2129,7 +2126,6 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
u32 *in;
int err;
- mutex_init(&qp->mutex);
spin_lock_init(&qp->sq.lock);
spin_lock_init(&qp->rq.lock);
@@ -2183,11 +2179,11 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
MLX5_SET(qpc, qpc, no_sq, 1);
if (attr->srq) {
- MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x0)->xrcdn);
+ MLX5_SET(qpc, qpc, xrcd, devr->xrcdn0);
MLX5_SET(qpc, qpc, srqn_rmpn_xrqn,
to_msrq(attr->srq)->msrq.srqn);
} else {
- MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn);
+ MLX5_SET(qpc, qpc, xrcd, devr->xrcdn1);
MLX5_SET(qpc, qpc, srqn_rmpn_xrqn,
to_msrq(devr->s1)->msrq.srqn);
}
@@ -2341,18 +2337,18 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
unsigned long flags;
int err;
- if (qp->ibqp.rwq_ind_tbl) {
+ if (qp->is_rss) {
destroy_rss_raw_qp_tir(dev, qp);
return;
}
- base = (qp->ibqp.qp_type == IB_QPT_RAW_PACKET ||
+ base = (qp->type == IB_QPT_RAW_PACKET ||
qp->flags & IB_QP_CREATE_SOURCE_QPN) ?
- &qp->raw_packet_qp.rq.base :
- &qp->trans_qp.base;
+ &qp->raw_packet_qp.rq.base :
+ &qp->trans_qp.base;
if (qp->state != IB_QPS_RESET) {
- if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET &&
+ if (qp->type != IB_QPT_RAW_PACKET &&
!(qp->flags & IB_QP_CREATE_SOURCE_QPN)) {
err = mlx5_core_qp_modify(dev, MLX5_CMD_OP_2RST_QP, 0,
NULL, &base->mqp, NULL);
@@ -2368,8 +2364,8 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
base->mqp.qpn);
}
- get_cqs(qp->ibqp.qp_type, qp->ibqp.send_cq, qp->ibqp.recv_cq,
- &send_cq, &recv_cq);
+ get_cqs(qp->type, qp->ibqp.send_cq, qp->ibqp.recv_cq, &send_cq,
+ &recv_cq);
spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
mlx5_ib_lock_cqs(send_cq, recv_cq);
@@ -2391,7 +2387,7 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
mlx5_ib_unlock_cqs(send_cq, recv_cq);
spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
- if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET ||
+ if (qp->type == IB_QPT_RAW_PACKET ||
qp->flags & IB_QP_CREATE_SOURCE_QPN) {
destroy_raw_packet_qp(dev, qp);
} else {
@@ -2543,13 +2539,18 @@ static void process_vendor_flag(struct mlx5_ib_dev *dev, int *flags, int flag,
return;
}
- if (flag == MLX5_QP_FLAG_SCATTER_CQE) {
+ switch (flag) {
+ case MLX5_QP_FLAG_SCATTER_CQE:
+ case MLX5_QP_FLAG_ALLOW_SCATTER_CQE:
/*
- * We don't return error if this flag was provided,
- * and mlx5 doesn't have right capability.
- */
- *flags &= ~MLX5_QP_FLAG_SCATTER_CQE;
+ * We don't return error if these flags were provided,
+ * and mlx5 doesn't have right capability.
+ */
+ *flags &= ~(MLX5_QP_FLAG_SCATTER_CQE |
+ MLX5_QP_FLAG_ALLOW_SCATTER_CQE);
return;
+ default:
+ break;
}
mlx5_ib_dbg(dev, "Vendor create QP flag 0x%X is not supported\n", flag);
}
@@ -2589,6 +2590,8 @@ static int process_vendor_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
process_vendor_flag(dev, &flags, MLX5_QP_FLAG_SIGNATURE, true, qp);
process_vendor_flag(dev, &flags, MLX5_QP_FLAG_SCATTER_CQE,
MLX5_CAP_GEN(mdev, sctr_data_cqe), qp);
+ process_vendor_flag(dev, &flags, MLX5_QP_FLAG_ALLOW_SCATTER_CQE,
+ MLX5_CAP_GEN(mdev, sctr_data_cqe), qp);
if (qp->type == IB_QPT_RAW_PACKET) {
cond = MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan) ||
@@ -2668,6 +2671,13 @@ static int process_create_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
if (qp_type == IB_QPT_RAW_PACKET && attr->rwq_ind_tbl)
return (create_flags) ? -EINVAL : 0;
+ process_create_flag(dev, &create_flags, IB_QP_CREATE_NETIF_QP,
+ mlx5_get_flow_namespace(dev->mdev,
+ MLX5_FLOW_NAMESPACE_BYPASS),
+ qp);
+ process_create_flag(dev, &create_flags,
+ IB_QP_CREATE_INTEGRITY_EN,
+ MLX5_CAP_GEN(mdev, sho), qp);
process_create_flag(dev, &create_flags,
IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
MLX5_CAP_GEN(mdev, block_lb_mc), qp);
@@ -2873,7 +2883,6 @@ static int mlx5_ib_destroy_dct(struct mlx5_ib_qp *mqp)
static int check_ucmd_data(struct mlx5_ib_dev *dev,
struct mlx5_create_qp_params *params)
{
- struct ib_qp_init_attr *attr = params->attr;
struct ib_udata *udata = params->udata;
size_t size, last;
int ret;
@@ -2885,14 +2894,7 @@ static int check_ucmd_data(struct mlx5_ib_dev *dev,
*/
last = sizeof(struct mlx5_ib_create_qp_rss);
else
- /* IB_QPT_RAW_PACKET doesn't have ECE data */
- switch (attr->qp_type) {
- case IB_QPT_RAW_PACKET:
- last = offsetof(struct mlx5_ib_create_qp, ece_options);
- break;
- default:
- last = offsetof(struct mlx5_ib_create_qp, reserved);
- }
+ last = offsetof(struct mlx5_ib_create_qp, reserved);
if (udata->inlen <= last)
return 0;
@@ -2907,7 +2909,7 @@ static int check_ucmd_data(struct mlx5_ib_dev *dev,
if (!ret)
mlx5_ib_dbg(
dev,
- "udata is not cleared, inlen = %lu, ucmd = %lu, last = %lu, size = %lu\n",
+ "udata is not cleared, inlen = %zu, ucmd = %zu, last = %zu, size = %zu\n",
udata->inlen, params->ucmd_size, last, size);
return ret ? 0 : -EINVAL;
}
@@ -2964,6 +2966,7 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attr,
goto free_ucmd;
}
+ mutex_init(&qp->mutex);
qp->type = type;
if (udata) {
err = process_vendor_flags(dev, qp, params.ucmd, attr);
@@ -3002,10 +3005,19 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attr,
return &qp->ibqp;
destroy_qp:
- if (qp->type == MLX5_IB_QPT_DCT)
+ if (qp->type == MLX5_IB_QPT_DCT) {
mlx5_ib_destroy_dct(qp);
- else
+ } else {
+ /*
+ * These lines below are temp solution till QP allocation
+ * will be moved to be under IB/core responsiblity.
+ */
+ qp->ibqp.send_cq = attr->send_cq;
+ qp->ibqp.recv_cq = attr->recv_cq;
+ qp->ibqp.pd = pd;
destroy_qp_common(dev, qp, udata);
+ }
+
qp = NULL;
free_qp:
kfree(qp);
@@ -3543,7 +3555,7 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
switch (raw_qp_param->operation) {
case MLX5_CMD_OP_RST2INIT_QP:
rq_state = MLX5_RQC_STATE_RDY;
- sq_state = MLX5_SQC_STATE_RDY;
+ sq_state = MLX5_SQC_STATE_RST;
break;
case MLX5_CMD_OP_2ERR_QP:
rq_state = MLX5_RQC_STATE_ERR;
@@ -3555,13 +3567,11 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
break;
case MLX5_CMD_OP_RTR2RTS_QP:
case MLX5_CMD_OP_RTS2RTS_QP:
- if (raw_qp_param->set_mask ==
- MLX5_RAW_QP_RATE_LIMIT) {
- modify_rq = 0;
- sq_state = sq->state;
- } else {
- return raw_qp_param->set_mask ? -EINVAL : 0;
- }
+ if (raw_qp_param->set_mask & ~MLX5_RAW_QP_RATE_LIMIT)
+ return -EINVAL;
+
+ modify_rq = 0;
+ sq_state = MLX5_SQC_STATE_RDY;
break;
case MLX5_CMD_OP_INIT2INIT_QP:
case MLX5_CMD_OP_INIT2RTR_QP:
@@ -4103,9 +4113,9 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
struct mlx5_ib_qp *qp = to_mqp(ibqp);
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
enum ib_qp_state cur_state, new_state;
- int err = 0;
int required = IB_QP_STATE;
void *dctc;
+ int err;
if (!(attr_mask & IB_QP_STATE))
return -EINVAL;
@@ -4162,8 +4172,6 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (udata->outlen < min_resp_len)
return -EINVAL;
- resp.response_length = min_resp_len;
-
/*
* If we don't have enough space for the ECE options,
* simply indicate it with resp.response_length.
@@ -4199,11 +4207,9 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
mlx5_ib_warn(dev, "Modify DCT: Invalid transition from %d to %d\n", cur_state, new_state);
return -EINVAL;
}
- if (err)
- qp->state = IB_QPS_ERR;
- else
- qp->state = new_state;
- return err;
+
+ qp->state = new_state;
+ return 0;
}
int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
@@ -4384,8 +4390,7 @@ static void to_rdma_ah_attr(struct mlx5_ib_dev *ibdev,
MLX5_GET(ads, path, src_addr_index),
MLX5_GET(ads, path, hop_limit),
MLX5_GET(ads, path, tclass));
- memcpy(ah_attr, MLX5_ADDR_OF(ads, path, rgid_rip),
- MLX5_FLD_SZ_BYTES(ads, rgid_rip));
+ rdma_ah_set_dgid_raw(ah_attr, MLX5_ADDR_OF(ads, path, rgid_rip));
}
}
@@ -4442,7 +4447,7 @@ static int sqrq_state_to_qp_state(u8 sq_state, u8 rq_state,
[MLX5_SQ_STATE_NA] = IB_QPS_RESET,
},
[MLX5_RQC_STATE_RDY] = {
- [MLX5_SQC_STATE_RST] = MLX5_QP_STATE_BAD,
+ [MLX5_SQC_STATE_RST] = MLX5_QP_STATE,
[MLX5_SQC_STATE_RDY] = MLX5_QP_STATE,
[MLX5_SQC_STATE_ERR] = IB_QPS_SQE,
[MLX5_SQ_STATE_NA] = MLX5_QP_STATE,
@@ -4454,7 +4459,7 @@ static int sqrq_state_to_qp_state(u8 sq_state, u8 rq_state,
[MLX5_SQ_STATE_NA] = IB_QPS_ERR,
},
[MLX5_RQ_STATE_NA] = {
- [MLX5_SQC_STATE_RST] = IB_QPS_RESET,
+ [MLX5_SQC_STATE_RST] = MLX5_QP_STATE,
[MLX5_SQC_STATE_RDY] = MLX5_QP_STATE,
[MLX5_SQC_STATE_ERR] = MLX5_QP_STATE,
[MLX5_SQ_STATE_NA] = MLX5_QP_STATE_BAD,
@@ -4700,41 +4705,23 @@ out:
return err;
}
-struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
- struct ib_udata *udata)
+int mlx5_ib_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata)
{
- struct mlx5_ib_dev *dev = to_mdev(ibdev);
- struct mlx5_ib_xrcd *xrcd;
- int err;
+ struct mlx5_ib_dev *dev = to_mdev(ibxrcd->device);
+ struct mlx5_ib_xrcd *xrcd = to_mxrcd(ibxrcd);
if (!MLX5_CAP_GEN(dev->mdev, xrc))
- return ERR_PTR(-ENOSYS);
-
- xrcd = kmalloc(sizeof(*xrcd), GFP_KERNEL);
- if (!xrcd)
- return ERR_PTR(-ENOMEM);
-
- err = mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, 0);
- if (err) {
- kfree(xrcd);
- return ERR_PTR(-ENOMEM);
- }
+ return -EOPNOTSUPP;
- return &xrcd->ibxrcd;
+ return mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, 0);
}
-int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
+void mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(xrcd->device);
u32 xrcdn = to_mxrcd(xrcd)->xrcdn;
- int err;
- err = mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, 0);
- if (err)
- mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn);
-
- kfree(xrcd);
- return 0;
+ mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, 0);
}
static void mlx5_ib_wq_event(struct mlx5_core_qp *core_qp, int type)
diff --git a/drivers/infiniband/hw/mlx5/qp.h b/drivers/infiniband/hw/mlx5/qp.h
index 82ea2b94dfa6..ba899df44c5b 100644
--- a/drivers/infiniband/hw/mlx5/qp.h
+++ b/drivers/infiniband/hw/mlx5/qp.h
@@ -43,4 +43,5 @@ void mlx5_core_res_put(struct mlx5_core_rsc_common *res);
int mlx5_core_xrcd_alloc(struct mlx5_ib_dev *dev, u32 *xrcdn);
int mlx5_core_xrcd_dealloc(struct mlx5_ib_dev *dev, u32 xrcdn);
+int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter);
#endif /* _MLX5_IB_QP_H */
diff --git a/drivers/infiniband/hw/mlx5/qpc.c b/drivers/infiniband/hw/mlx5/qpc.c
index c19d91d6dce8..7c3968ef9cd1 100644
--- a/drivers/infiniband/hw/mlx5/qpc.c
+++ b/drivers/infiniband/hw/mlx5/qpc.c
@@ -346,6 +346,9 @@ static int get_ece_from_mbox(void *out, u16 opcode)
int ece = 0;
switch (opcode) {
+ case MLX5_CMD_OP_INIT2INIT_QP:
+ ece = MLX5_GET(init2init_qp_out, out, ece);
+ break;
case MLX5_CMD_OP_INIT2RTR_QP:
ece = MLX5_GET(init2rtr_qp_out, out, ece);
break;
@@ -355,6 +358,9 @@ static int get_ece_from_mbox(void *out, u16 opcode)
case MLX5_CMD_OP_RTS2RTS_QP:
ece = MLX5_GET(rts2rts_qp_out, out, ece);
break;
+ case MLX5_CMD_OP_RST2INIT_QP:
+ ece = MLX5_GET(rst2init_qp_out, out, ece);
+ break;
default:
break;
}
@@ -406,6 +412,7 @@ static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn,
return -ENOMEM;
MOD_QP_IN_SET_QPC(rst2init_qp, mbox->in, opcode, qpn,
opt_param_mask, qpc, uid);
+ MLX5_SET(rst2init_qp_in, mbox->in, ece, ece);
break;
case MLX5_CMD_OP_INIT2RTR_QP:
if (MBOX_ALLOC(mbox, init2rtr_qp))
@@ -439,6 +446,7 @@ static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn,
return -ENOMEM;
MOD_QP_IN_SET_QPC(init2init_qp, mbox->in, opcode, qpn,
opt_param_mask, qpc, uid);
+ MLX5_SET(init2init_qp_in, mbox->in, ece, ece);
break;
default:
return -EINVAL;
diff --git a/drivers/infiniband/hw/mlx5/restrack.c b/drivers/infiniband/hw/mlx5/restrack.c
index 8f6c04f12531..887270dd3ce2 100644
--- a/drivers/infiniband/hw/mlx5/restrack.c
+++ b/drivers/infiniband/hw/mlx5/restrack.c
@@ -1,17 +1,85 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
- * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved.
+ * Copyright (c) 2019-2020, Mellanox Technologies Ltd. All rights reserved.
*/
#include <uapi/rdma/rdma_netlink.h>
+#include <linux/mlx5/rsc_dump.h>
#include <rdma/ib_umem_odp.h>
#include <rdma/restrack.h>
#include "mlx5_ib.h"
+#include "restrack.h"
-static int fill_stat_mr_entry(struct sk_buff *msg,
- struct rdma_restrack_entry *res)
+#define MAX_DUMP_SIZE 1024
+
+static int dump_rsc(struct mlx5_core_dev *dev, enum mlx5_sgmt_type type,
+ int index, void *data, int *data_len)
+{
+ struct mlx5_core_dev *mdev = dev;
+ struct mlx5_rsc_dump_cmd *cmd;
+ struct mlx5_rsc_key key = {};
+ struct page *page;
+ int offset = 0;
+ int err = 0;
+ int cmd_err;
+ int size;
+
+ page = alloc_page(GFP_KERNEL);
+ if (!page)
+ return -ENOMEM;
+
+ key.size = PAGE_SIZE;
+ key.rsc = type;
+ key.index1 = index;
+ key.num_of_obj1 = 1;
+
+ cmd = mlx5_rsc_dump_cmd_create(mdev, &key);
+ if (IS_ERR(cmd)) {
+ err = PTR_ERR(cmd);
+ goto free_page;
+ }
+
+ do {
+ cmd_err = mlx5_rsc_dump_next(mdev, cmd, page, &size);
+ if (cmd_err < 0 || size + offset > MAX_DUMP_SIZE) {
+ err = cmd_err;
+ goto destroy_cmd;
+ }
+ memcpy(data + offset, page_address(page), size);
+ offset += size;
+ } while (cmd_err > 0);
+ *data_len = offset;
+
+destroy_cmd:
+ mlx5_rsc_dump_cmd_destroy(cmd);
+free_page:
+ __free_page(page);
+ return err;
+}
+
+static int fill_res_raw(struct sk_buff *msg, struct mlx5_ib_dev *dev,
+ enum mlx5_sgmt_type type, u32 key)
+{
+ int len = 0;
+ void *data;
+ int err;
+
+ data = kzalloc(MAX_DUMP_SIZE, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ err = dump_rsc(dev->mdev, type, key, data, &len);
+ if (err)
+ goto out;
+
+ err = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, len, data);
+out:
+ kfree(data);
+ return err;
+}
+
+static int fill_stat_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr)
{
- struct ib_mr *ibmr = container_of(res, struct ib_mr, res);
struct mlx5_ib_mr *mr = to_mmr(ibmr);
struct nlattr *table_attr;
@@ -31,6 +99,9 @@ static int fill_stat_mr_entry(struct sk_buff *msg,
msg, "page_invalidations",
atomic64_read(&mr->odp_stats.invalidations)))
goto err_table;
+ if (rdma_nl_stat_hwcounter_entry(msg, "page_prefetch",
+ atomic64_read(&mr->odp_stats.prefetch)))
+ goto err_table;
nla_nest_end(msg, table_attr);
return 0;
@@ -41,10 +112,16 @@ err:
return -EMSGSIZE;
}
-static int fill_res_mr_entry(struct sk_buff *msg,
- struct rdma_restrack_entry *res)
+static int fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ibmr)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+
+ return fill_res_raw(msg, mr->dev, MLX5_SGMT_TYPE_PRM_QUERY_MKEY,
+ mlx5_mkey_to_idx(mr->mmkey.key));
+}
+
+static int fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr)
{
- struct ib_mr *ibmr = container_of(res, struct ib_mr, res);
struct mlx5_ib_mr *mr = to_mmr(ibmr);
struct nlattr *table_attr;
@@ -71,20 +148,32 @@ err:
return -EMSGSIZE;
}
-int mlx5_ib_fill_res_entry(struct sk_buff *msg,
- struct rdma_restrack_entry *res)
+static int fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ibcq)
{
- if (res->type == RDMA_RESTRACK_MR)
- return fill_res_mr_entry(msg, res);
+ struct mlx5_ib_dev *dev = to_mdev(ibcq->device);
+ struct mlx5_ib_cq *cq = to_mcq(ibcq);
- return 0;
+ return fill_res_raw(msg, dev, MLX5_SGMT_TYPE_PRM_QUERY_CQ, cq->mcq.cqn);
}
-int mlx5_ib_fill_stat_entry(struct sk_buff *msg,
- struct rdma_restrack_entry *res)
+static int fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ibqp)
{
- if (res->type == RDMA_RESTRACK_MR)
- return fill_stat_mr_entry(msg, res);
+ struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+
+ return fill_res_raw(msg, dev, MLX5_SGMT_TYPE_PRM_QUERY_QP,
+ ibqp->qp_num);
+}
+static const struct ib_device_ops restrack_ops = {
+ .fill_res_cq_entry_raw = fill_res_cq_entry_raw,
+ .fill_res_mr_entry = fill_res_mr_entry,
+ .fill_res_mr_entry_raw = fill_res_mr_entry_raw,
+ .fill_res_qp_entry_raw = fill_res_qp_entry_raw,
+ .fill_stat_mr_entry = fill_stat_mr_entry,
+};
+
+int mlx5_ib_restrack_init(struct mlx5_ib_dev *dev)
+{
+ ib_set_device_ops(&dev->ib_dev, &restrack_ops);
return 0;
}
diff --git a/drivers/infiniband/hw/mlx5/restrack.h b/drivers/infiniband/hw/mlx5/restrack.h
new file mode 100644
index 000000000000..e8d81270f1b6
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/restrack.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2013-2020, Mellanox Technologies Ltd. All rights reserved.
+ */
+
+#ifndef _MLX5_IB_RESTRACK_H
+#define _MLX5_IB_RESTRACK_H
+
+#include "mlx5_ib.h"
+
+int mlx5_ib_restrack_init(struct mlx5_ib_dev *dev);
+
+#endif /* _MLX5_IB_RESTRACK_H */
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 6d1ff13d2283..7e10cbcb6d5c 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -274,10 +274,10 @@ int mlx5_ib_create_srq(struct ib_srq *ib_srq,
if (srq->wq_sig)
in.flags |= MLX5_SRQ_FLAG_WQ_SIG;
- if (init_attr->srq_type == IB_SRQT_XRC)
+ if (init_attr->srq_type == IB_SRQT_XRC && init_attr->ext.xrc.xrcd)
in.xrcd = to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn;
else
- in.xrcd = to_mxrcd(dev->devr.x0)->xrcdn;
+ in.xrcd = dev->devr.xrcdn0;
if (init_attr->srq_type == IB_SRQT_TM) {
in.tm_log_list_size =
diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c
index 6f5eadc4d183..37aaacebd3f2 100644
--- a/drivers/infiniband/hw/mlx5/srq_cmd.c
+++ b/drivers/infiniband/hw/mlx5/srq_cmd.c
@@ -83,11 +83,11 @@ struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn)
struct mlx5_srq_table *table = &dev->srq_table;
struct mlx5_core_srq *srq;
- xa_lock(&table->array);
+ xa_lock_irq(&table->array);
srq = xa_load(&table->array, srqn);
if (srq)
refcount_inc(&srq->common.refcount);
- xa_unlock(&table->array);
+ xa_unlock_irq(&table->array);
return srq;
}
diff --git a/drivers/infiniband/hw/mlx5/std_types.c b/drivers/infiniband/hw/mlx5/std_types.c
new file mode 100644
index 000000000000..16145fda68d0
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/std_types.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <rdma/uverbs_ioctl.h>
+#include <rdma/mlx5_user_ioctl_cmds.h>
+#include <rdma/mlx5_user_ioctl_verbs.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_ib.h"
+
+#define UVERBS_MODULE_NAME mlx5_ib
+#include <rdma/uverbs_named_ioctl.h>
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_PD_QUERY)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_pd *pd =
+ uverbs_attr_get_obj(attrs, MLX5_IB_ATTR_QUERY_PD_HANDLE);
+ struct mlx5_ib_pd *mpd = to_mpd(pd);
+
+ return uverbs_copy_to(attrs, MLX5_IB_ATTR_QUERY_PD_RESP_PDN,
+ &mpd->pdn, sizeof(mpd->pdn));
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_PD_QUERY,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_QUERY_PD_HANDLE,
+ UVERBS_OBJECT_PD,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_QUERY_PD_RESP_PDN,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY));
+
+ADD_UVERBS_METHODS(mlx5_ib_pd,
+ UVERBS_OBJECT_PD,
+ &UVERBS_METHOD(MLX5_IB_METHOD_PD_QUERY));
+
+const struct uapi_definition mlx5_ib_std_types_defs[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE(
+ UVERBS_OBJECT_PD,
+ &mlx5_ib_pd),
+ {},
+};
diff --git a/drivers/infiniband/hw/mlx5/wr.c b/drivers/infiniband/hw/mlx5/wr.c
index 2c6df1c43b55..43880973a512 100644
--- a/drivers/infiniband/hw/mlx5/wr.c
+++ b/drivers/infiniband/hw/mlx5/wr.c
@@ -263,7 +263,9 @@ static __be64 get_umr_update_translation_mask(void)
return cpu_to_be64(result);
}
-static __be64 get_umr_update_access_mask(int atomic)
+static __be64 get_umr_update_access_mask(int atomic,
+ int relaxed_ordering_write,
+ int relaxed_ordering_read)
{
u64 result;
@@ -275,6 +277,12 @@ static __be64 get_umr_update_access_mask(int atomic)
if (atomic)
result |= MLX5_MKEY_MASK_A;
+ if (relaxed_ordering_write)
+ result |= MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE;
+
+ if (relaxed_ordering_read)
+ result |= MLX5_MKEY_MASK_RELAXED_ORDERING_READ;
+
return cpu_to_be64(result);
}
@@ -289,17 +297,28 @@ static __be64 get_umr_update_pd_mask(void)
static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask)
{
- if ((mask & MLX5_MKEY_MASK_PAGE_SIZE &&
- MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) ||
- (mask & MLX5_MKEY_MASK_A &&
- MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)))
+ if (mask & MLX5_MKEY_MASK_PAGE_SIZE &&
+ MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
return -EPERM;
+
+ if (mask & MLX5_MKEY_MASK_A &&
+ MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
+ return -EPERM;
+
+ if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE &&
+ !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
+ return -EPERM;
+
+ if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_READ &&
+ !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
+ return -EPERM;
+
return 0;
}
static int set_reg_umr_segment(struct mlx5_ib_dev *dev,
struct mlx5_wqe_umr_ctrl_seg *umr,
- const struct ib_send_wr *wr, int atomic)
+ const struct ib_send_wr *wr)
{
const struct mlx5_umr_wr *umrwr = umr_wr(wr);
@@ -325,7 +344,10 @@ static int set_reg_umr_segment(struct mlx5_ib_dev *dev,
if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION)
umr->mkey_mask |= get_umr_update_translation_mask();
if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS) {
- umr->mkey_mask |= get_umr_update_access_mask(atomic);
+ umr->mkey_mask |= get_umr_update_access_mask(
+ !!(MLX5_CAP_GEN(dev->mdev, atomic)),
+ !!(MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)),
+ !!(MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)));
umr->mkey_mask |= get_umr_update_pd_mask();
}
if (wr->send_flags & MLX5_IB_SEND_UMR_ENABLE_MR)
@@ -383,20 +405,31 @@ static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg,
memset(seg, 0, sizeof(*seg));
if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR)
- seg->status = MLX5_MKEY_STATUS_FREE;
+ MLX5_SET(mkc, seg, free, 1);
+
+ MLX5_SET(mkc, seg, a,
+ !!(umrwr->access_flags & IB_ACCESS_REMOTE_ATOMIC));
+ MLX5_SET(mkc, seg, rw,
+ !!(umrwr->access_flags & IB_ACCESS_REMOTE_WRITE));
+ MLX5_SET(mkc, seg, rr, !!(umrwr->access_flags & IB_ACCESS_REMOTE_READ));
+ MLX5_SET(mkc, seg, lw, !!(umrwr->access_flags & IB_ACCESS_LOCAL_WRITE));
+ MLX5_SET(mkc, seg, lr, 1);
+ MLX5_SET(mkc, seg, relaxed_ordering_write,
+ !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING));
+ MLX5_SET(mkc, seg, relaxed_ordering_read,
+ !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING));
- seg->flags = convert_access(umrwr->access_flags);
if (umrwr->pd)
- seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn);
+ MLX5_SET(mkc, seg, pd, to_mpd(umrwr->pd)->pdn);
if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION &&
!umrwr->length)
- seg->flags_pd |= cpu_to_be32(MLX5_MKEY_LEN64);
+ MLX5_SET(mkc, seg, length64, 1);
- seg->start_addr = cpu_to_be64(umrwr->virt_addr);
- seg->len = cpu_to_be64(umrwr->length);
- seg->log2_page_size = umrwr->page_shift;
- seg->qpn_mkey7_0 = cpu_to_be32(0xffffff00 |
- mlx5_mkey_variant(umrwr->mkey));
+ MLX5_SET64(mkc, seg, start_addr, umrwr->virt_addr);
+ MLX5_SET64(mkc, seg, len, umrwr->length);
+ MLX5_SET(mkc, seg, log_page_size, umrwr->page_shift);
+ MLX5_SET(mkc, seg, qpn, 0xffffff);
+ MLX5_SET(mkc, seg, mkey_7_0, mlx5_mkey_variant(umrwr->mkey));
}
static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg,
@@ -1224,8 +1257,7 @@ static int handle_qpt_reg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
(*ctrl)->imm = cpu_to_be32(umr_wr(wr)->mkey);
- err = set_reg_umr_segment(dev, *seg, wr,
- !!(MLX5_CAP_GEN(dev->mdev, atomic)));
+ err = set_reg_umr_segment(dev, *seg, wr);
if (unlikely(err))
goto out;
*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
@@ -1249,7 +1281,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
struct mlx5_wqe_xrc_seg *xrc;
struct mlx5_bf *bf;
void *cur_edge;
- int uninitialized_var(size);
+ int size;
unsigned long flags;
unsigned int idx;
int err = 0;
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index d04c245359eb..c6e95d0d760a 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -1639,8 +1639,8 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
* without initializing f0 and size0, and they are in fact
* never used uninitialized.
*/
- int uninitialized_var(size0);
- u32 uninitialized_var(f0);
+ int size0;
+ u32 f0;
int ind;
u8 op0 = 0;
@@ -1835,7 +1835,7 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
* without initializing size0, and it is in fact never used
* uninitialized.
*/
- int uninitialized_var(size0);
+ int size0;
int ind;
void *wqe;
void *prev_wqe;
@@ -1943,8 +1943,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
* without initializing f0 and size0, and they are in fact
* never used uninitialized.
*/
- int uninitialized_var(size0);
- u32 uninitialized_var(f0);
+ int size0;
+ u32 f0;
int ind;
u8 op0 = 0;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index d11c74390a12..6cdbec13756a 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -2901,7 +2901,7 @@ int ocrdma_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags cq_flags)
}
struct ib_mr *ocrdma_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata)
+ u32 max_num_sg)
{
int status;
struct ocrdma_mr *mr;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
index 3a5010881be5..df8e3b923a44 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
@@ -101,7 +101,7 @@ struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *, int acc);
struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *, u64 start, u64 length,
u64 virt, int acc, struct ib_udata *);
struct ib_mr *ocrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata);
+ u32 max_num_sg);
int ocrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset);
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index ccaedfd53e49..d85f992bac29 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -110,7 +110,6 @@ static int qedr_iw_port_immutable(struct ib_device *ibdev, u8 port_num,
if (err)
return err;
- immutable->pkey_tbl_len = 1;
immutable->gid_tbl_len = 1;
immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
immutable->max_mad_size = 0;
@@ -179,6 +178,7 @@ static int qedr_iw_register_device(struct qedr_dev *dev)
static const struct ib_device_ops qedr_roce_dev_ops = {
.get_port_immutable = qedr_roce_port_immutable,
+ .query_pkey = qedr_query_pkey,
};
static void qedr_roce_register_device(struct qedr_dev *dev)
@@ -221,7 +221,6 @@ static const struct ib_device_ops qedr_dev_ops = {
.post_srq_recv = qedr_post_srq_recv,
.process_mad = qedr_process_mad,
.query_device = qedr_query_device,
- .query_pkey = qedr_query_pkey,
.query_port = qedr_query_port,
.query_qp = qedr_query_qp,
.query_srq = qedr_query_srq,
@@ -346,9 +345,14 @@ static void qedr_free_resources(struct qedr_dev *dev)
static int qedr_alloc_resources(struct qedr_dev *dev)
{
+ struct qed_chain_init_params params = {
+ .mode = QED_CHAIN_MODE_PBL,
+ .intended_use = QED_CHAIN_USE_TO_CONSUME,
+ .cnt_type = QED_CHAIN_CNT_TYPE_U16,
+ .elem_size = sizeof(struct regpair *),
+ };
struct qedr_cnq *cnq;
__le16 *cons_pi;
- u16 n_entries;
int i, rc;
dev->sgid_tbl = kcalloc(QEDR_MAX_SGID, sizeof(union ib_gid),
@@ -382,7 +386,9 @@ static int qedr_alloc_resources(struct qedr_dev *dev)
dev->sb_start = dev->ops->rdma_get_start_sb(dev->cdev);
/* Allocate CNQ PBLs */
- n_entries = min_t(u32, QED_RDMA_MAX_CNQ_SIZE, QEDR_ROCE_MAX_CNQ_SIZE);
+ params.num_elems = min_t(u32, QED_RDMA_MAX_CNQ_SIZE,
+ QEDR_ROCE_MAX_CNQ_SIZE);
+
for (i = 0; i < dev->num_cnq; i++) {
cnq = &dev->cnq_array[i];
@@ -391,13 +397,8 @@ static int qedr_alloc_resources(struct qedr_dev *dev)
if (rc)
goto err3;
- rc = dev->ops->common->chain_alloc(dev->cdev,
- QED_CHAIN_USE_TO_CONSUME,
- QED_CHAIN_MODE_PBL,
- QED_CHAIN_CNT_TYPE_U16,
- n_entries,
- sizeof(struct regpair *),
- &cnq->pbl, NULL);
+ rc = dev->ops->common->chain_alloc(dev->cdev, &cnq->pbl,
+ &params);
if (rc)
goto err4;
diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h
index fdf90ecb2699..460292179b32 100644
--- a/drivers/infiniband/hw/qedr/qedr.h
+++ b/drivers/infiniband/hw/qedr/qedr.h
@@ -235,6 +235,7 @@ struct qedr_ucontext {
u32 dpi_size;
u16 dpi;
bool db_rec;
+ u8 edpm_mode;
};
union db_prod32 {
@@ -344,10 +345,10 @@ struct qedr_srq_hwq_info {
u32 wqe_prod;
u32 sge_prod;
u32 wr_prod_cnt;
- u32 wr_cons_cnt;
+ atomic_t wr_cons_cnt;
u32 num_elems;
- u32 *virt_prod_pair_addr;
+ struct rdma_srq_producers *virt_prod_pair_addr;
dma_addr_t phy_prod_pair_addr;
};
diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c
index 792eecd206b6..97fc7dd353b0 100644
--- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c
+++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c
@@ -150,8 +150,17 @@ qedr_iw_issue_event(void *context,
if (params->cm_info) {
event.ird = params->cm_info->ird;
event.ord = params->cm_info->ord;
- event.private_data_len = params->cm_info->private_data_len;
- event.private_data = (void *)params->cm_info->private_data;
+ /* Only connect_request and reply have valid private data
+ * the rest of the events this may be left overs from
+ * connection establishment. CONNECT_REQUEST is issued via
+ * qedr_iw_mpa_request
+ */
+ if (event_type == IW_CM_EVENT_CONNECT_REPLY) {
+ event.private_data_len =
+ params->cm_info->private_data_len;
+ event.private_data =
+ (void *)params->cm_info->private_data;
+ }
}
if (ep->cm_id)
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 9b9e80266367..4ce4e2eef6cc 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -239,7 +239,6 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
attr->ip_gids = true;
if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
attr->gid_tbl_len = 1;
- attr->pkey_tbl_len = 1;
} else {
attr->gid_tbl_len = QEDR_MAX_SGID;
attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN;
@@ -275,7 +274,8 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
DP_ERR(dev, "Problem copying data from user space\n");
return -EFAULT;
}
-
+ ctx->edpm_mode = !!(ureq.context_flags &
+ QEDR_ALLOC_UCTX_EDPM_MODE);
ctx->db_rec = !!(ureq.context_flags & QEDR_ALLOC_UCTX_DB_REC);
}
@@ -316,11 +316,15 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
uresp.dpm_flags = QEDR_DPM_TYPE_IWARP_LEGACY;
else
uresp.dpm_flags = QEDR_DPM_TYPE_ROCE_ENHANCED |
- QEDR_DPM_TYPE_ROCE_LEGACY;
+ QEDR_DPM_TYPE_ROCE_LEGACY |
+ QEDR_DPM_TYPE_ROCE_EDPM_MODE;
- uresp.dpm_flags |= QEDR_DPM_SIZES_SET;
- uresp.ldpm_limit_size = QEDR_LDPM_MAX_SIZE;
- uresp.edpm_trans_size = QEDR_EDPM_TRANS_SIZE;
+ if (ureq.context_flags & QEDR_SUPPORT_DPM_SIZES) {
+ uresp.dpm_flags |= QEDR_DPM_SIZES_SET;
+ uresp.ldpm_limit_size = QEDR_LDPM_MAX_SIZE;
+ uresp.edpm_trans_size = QEDR_EDPM_TRANS_SIZE;
+ uresp.edpm_limit_size = QEDR_EDPM_MAX_SIZE;
+ }
uresp.wids_enabled = 1;
uresp.wid_count = oparams.wid_count;
@@ -891,6 +895,12 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
udata, struct qedr_ucontext, ibucontext);
struct qed_rdma_destroy_cq_out_params destroy_oparams;
struct qed_rdma_destroy_cq_in_params destroy_iparams;
+ struct qed_chain_init_params chain_params = {
+ .mode = QED_CHAIN_MODE_PBL,
+ .intended_use = QED_CHAIN_USE_TO_CONSUME,
+ .cnt_type = QED_CHAIN_CNT_TYPE_U32,
+ .elem_size = sizeof(union rdma_cqe),
+ };
struct qedr_dev *dev = get_qedr_dev(ibdev);
struct qed_rdma_create_cq_in_params params;
struct qedr_create_cq_ureq ureq = {};
@@ -917,6 +927,7 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
chain_entries = qedr_align_cq_entries(entries);
chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES);
+ chain_params.num_elems = chain_entries;
/* calc db offset. user will add DPI base, kernel will add db addr */
db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
@@ -951,13 +962,8 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
} else {
cq->cq_type = QEDR_CQ_TYPE_KERNEL;
- rc = dev->ops->common->chain_alloc(dev->cdev,
- QED_CHAIN_USE_TO_CONSUME,
- QED_CHAIN_MODE_PBL,
- QED_CHAIN_CNT_TYPE_U32,
- chain_entries,
- sizeof(union rdma_cqe),
- &cq->pbl, NULL);
+ rc = dev->ops->common->chain_alloc(dev->cdev, &cq->pbl,
+ &chain_params);
if (rc)
goto err0;
@@ -1446,6 +1452,12 @@ static int qedr_alloc_srq_kernel_params(struct qedr_srq *srq,
struct ib_srq_init_attr *init_attr)
{
struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq;
+ struct qed_chain_init_params params = {
+ .mode = QED_CHAIN_MODE_PBL,
+ .intended_use = QED_CHAIN_USE_TO_CONSUME_PRODUCE,
+ .cnt_type = QED_CHAIN_CNT_TYPE_U32,
+ .elem_size = QEDR_SRQ_WQE_ELEM_SIZE,
+ };
dma_addr_t phy_prod_pair_addr;
u32 num_elems;
void *va;
@@ -1464,13 +1476,9 @@ static int qedr_alloc_srq_kernel_params(struct qedr_srq *srq,
hw_srq->virt_prod_pair_addr = va;
num_elems = init_attr->attr.max_wr * RDMA_MAX_SRQ_WQE_SIZE;
- rc = dev->ops->common->chain_alloc(dev->cdev,
- QED_CHAIN_USE_TO_CONSUME_PRODUCE,
- QED_CHAIN_MODE_PBL,
- QED_CHAIN_CNT_TYPE_U32,
- num_elems,
- QEDR_SRQ_WQE_ELEM_SIZE,
- &hw_srq->pbl, NULL);
+ params.num_elems = num_elems;
+
+ rc = dev->ops->common->chain_alloc(dev->cdev, &hw_srq->pbl, &params);
if (rc)
goto err0;
@@ -1750,7 +1758,7 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
struct qed_rdma_create_qp_out_params out_params;
struct qedr_pd *pd = get_qedr_pd(ibpd);
struct qedr_create_qp_uresp uresp;
- struct qedr_ucontext *ctx = NULL;
+ struct qedr_ucontext *ctx = pd ? pd->uctx : NULL;
struct qedr_create_qp_ureq ureq;
int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1);
int rc = -EINVAL;
@@ -1788,6 +1796,9 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
}
+ if (ctx)
+ SET_FIELD(in_params.flags, QED_ROCE_EDPM_MODE, ctx->edpm_mode);
+
qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
&in_params, &out_params);
@@ -1901,29 +1912,28 @@ qedr_roce_create_kernel_qp(struct qedr_dev *dev,
u32 n_sq_elems, u32 n_rq_elems)
{
struct qed_rdma_create_qp_out_params out_params;
+ struct qed_chain_init_params params = {
+ .mode = QED_CHAIN_MODE_PBL,
+ .cnt_type = QED_CHAIN_CNT_TYPE_U32,
+ };
int rc;
- rc = dev->ops->common->chain_alloc(dev->cdev,
- QED_CHAIN_USE_TO_PRODUCE,
- QED_CHAIN_MODE_PBL,
- QED_CHAIN_CNT_TYPE_U32,
- n_sq_elems,
- QEDR_SQE_ELEMENT_SIZE,
- &qp->sq.pbl, NULL);
+ params.intended_use = QED_CHAIN_USE_TO_PRODUCE;
+ params.num_elems = n_sq_elems;
+ params.elem_size = QEDR_SQE_ELEMENT_SIZE;
+ rc = dev->ops->common->chain_alloc(dev->cdev, &qp->sq.pbl, &params);
if (rc)
return rc;
in_params->sq_num_pages = qed_chain_get_page_cnt(&qp->sq.pbl);
in_params->sq_pbl_ptr = qed_chain_get_pbl_phys(&qp->sq.pbl);
- rc = dev->ops->common->chain_alloc(dev->cdev,
- QED_CHAIN_USE_TO_CONSUME_PRODUCE,
- QED_CHAIN_MODE_PBL,
- QED_CHAIN_CNT_TYPE_U32,
- n_rq_elems,
- QEDR_RQE_ELEMENT_SIZE,
- &qp->rq.pbl, NULL);
+ params.intended_use = QED_CHAIN_USE_TO_CONSUME_PRODUCE;
+ params.num_elems = n_rq_elems;
+ params.elem_size = QEDR_RQE_ELEMENT_SIZE;
+
+ rc = dev->ops->common->chain_alloc(dev->cdev, &qp->rq.pbl, &params);
if (rc)
return rc;
@@ -1949,14 +1959,19 @@ qedr_iwarp_create_kernel_qp(struct qedr_dev *dev,
u32 n_sq_elems, u32 n_rq_elems)
{
struct qed_rdma_create_qp_out_params out_params;
- struct qed_chain_ext_pbl ext_pbl;
+ struct qed_chain_init_params params = {
+ .mode = QED_CHAIN_MODE_PBL,
+ .cnt_type = QED_CHAIN_CNT_TYPE_U32,
+ };
int rc;
in_params->sq_num_pages = QED_CHAIN_PAGE_CNT(n_sq_elems,
QEDR_SQE_ELEMENT_SIZE,
+ QED_CHAIN_PAGE_SIZE,
QED_CHAIN_MODE_PBL);
in_params->rq_num_pages = QED_CHAIN_PAGE_CNT(n_rq_elems,
QEDR_RQE_ELEMENT_SIZE,
+ QED_CHAIN_PAGE_SIZE,
QED_CHAIN_MODE_PBL);
qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
@@ -1966,31 +1981,24 @@ qedr_iwarp_create_kernel_qp(struct qedr_dev *dev,
return -EINVAL;
/* Now we allocate the chain */
- ext_pbl.p_pbl_virt = out_params.sq_pbl_virt;
- ext_pbl.p_pbl_phys = out_params.sq_pbl_phys;
- rc = dev->ops->common->chain_alloc(dev->cdev,
- QED_CHAIN_USE_TO_PRODUCE,
- QED_CHAIN_MODE_PBL,
- QED_CHAIN_CNT_TYPE_U32,
- n_sq_elems,
- QEDR_SQE_ELEMENT_SIZE,
- &qp->sq.pbl, &ext_pbl);
+ params.intended_use = QED_CHAIN_USE_TO_PRODUCE;
+ params.num_elems = n_sq_elems;
+ params.elem_size = QEDR_SQE_ELEMENT_SIZE;
+ params.ext_pbl_virt = out_params.sq_pbl_virt;
+ params.ext_pbl_phys = out_params.sq_pbl_phys;
+ rc = dev->ops->common->chain_alloc(dev->cdev, &qp->sq.pbl, &params);
if (rc)
goto err;
- ext_pbl.p_pbl_virt = out_params.rq_pbl_virt;
- ext_pbl.p_pbl_phys = out_params.rq_pbl_phys;
-
- rc = dev->ops->common->chain_alloc(dev->cdev,
- QED_CHAIN_USE_TO_CONSUME_PRODUCE,
- QED_CHAIN_MODE_PBL,
- QED_CHAIN_CNT_TYPE_U32,
- n_rq_elems,
- QEDR_RQE_ELEMENT_SIZE,
- &qp->rq.pbl, &ext_pbl);
+ params.intended_use = QED_CHAIN_USE_TO_CONSUME_PRODUCE;
+ params.num_elems = n_rq_elems;
+ params.elem_size = QEDR_RQE_ELEMENT_SIZE;
+ params.ext_pbl_virt = out_params.rq_pbl_virt;
+ params.ext_pbl_phys = out_params.rq_pbl_phys;
+ rc = dev->ops->common->chain_alloc(dev->cdev, &qp->rq.pbl, &params);
if (rc)
goto err;
@@ -3003,7 +3011,7 @@ err0:
}
struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata)
+ u32 max_num_sg)
{
struct qedr_mr *mr;
@@ -3686,7 +3694,7 @@ static u32 qedr_srq_elem_left(struct qedr_srq_hwq_info *hw_srq)
* count and consumer count and subtract it from max
* work request supported so that we get elements left.
*/
- used = hw_srq->wr_prod_cnt - hw_srq->wr_cons_cnt;
+ used = hw_srq->wr_prod_cnt - (u32)atomic_read(&hw_srq->wr_cons_cnt);
return hw_srq->max_wr - used;
}
@@ -3701,7 +3709,6 @@ int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
unsigned long flags;
int status = 0;
u32 num_sge;
- u32 offset;
spin_lock_irqsave(&srq->lock, flags);
@@ -3714,7 +3721,8 @@ int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
if (!qedr_srq_elem_left(hw_srq) ||
wr->num_sge > srq->hw_srq.max_sges) {
DP_ERR(dev, "Can't post WR (%d,%d) || (%d > %d)\n",
- hw_srq->wr_prod_cnt, hw_srq->wr_cons_cnt,
+ hw_srq->wr_prod_cnt,
+ atomic_read(&hw_srq->wr_cons_cnt),
wr->num_sge, srq->hw_srq.max_sges);
status = -ENOMEM;
*bad_wr = wr;
@@ -3748,22 +3756,20 @@ int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
hw_srq->sge_prod++;
}
- /* Flush WQE and SGE information before
+ /* Update WQE and SGE information before
* updating producer.
*/
- wmb();
+ dma_wmb();
/* SRQ producer is 8 bytes. Need to update SGE producer index
* in first 4 bytes and need to update WQE producer in
* next 4 bytes.
*/
- *srq->hw_srq.virt_prod_pair_addr = hw_srq->sge_prod;
- offset = offsetof(struct rdma_srq_producers, wqe_prod);
- *((u8 *)srq->hw_srq.virt_prod_pair_addr + offset) =
- hw_srq->wqe_prod;
+ srq->hw_srq.virt_prod_pair_addr->sge_prod = hw_srq->sge_prod;
+ /* Make sure sge producer is updated first */
+ dma_wmb();
+ srq->hw_srq.virt_prod_pair_addr->wqe_prod = hw_srq->wqe_prod;
- /* Flush producer after updating it. */
- wmb();
wr = wr->next;
}
@@ -4182,7 +4188,7 @@ static int process_resp_one_srq(struct qedr_dev *dev, struct qedr_qp *qp,
} else {
__process_resp_one(dev, qp, cq, wc, resp, wr_id);
}
- srq->hw_srq.wr_cons_cnt++;
+ atomic_inc(&srq->hw_srq.wr_cons_cnt);
return 1;
}
diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h
index 5e02387e068d..39dd6286ba39 100644
--- a/drivers/infiniband/hw/qedr/verbs.h
+++ b/drivers/infiniband/hw/qedr/verbs.h
@@ -84,7 +84,7 @@ int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
int sg_nents, unsigned int *sg_offset);
struct ib_mr *qedr_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata);
+ u32 max_num_sg);
int qedr_poll_cq(struct ib_cq *, int num_entries, struct ib_wc *wc);
int qedr_post_send(struct ib_qp *, const struct ib_send_wr *,
const struct ib_send_wr **bad_wr);
diff --git a/drivers/infiniband/hw/usnic/usnic_fwd.c b/drivers/infiniband/hw/usnic/usnic_fwd.c
index 7875883621f4..398c4c00b932 100644
--- a/drivers/infiniband/hw/usnic/usnic_fwd.c
+++ b/drivers/infiniband/hw/usnic/usnic_fwd.c
@@ -214,7 +214,7 @@ usnic_fwd_alloc_flow(struct usnic_fwd_dev *ufdev, struct filter *filter,
if (!flow)
return ERR_PTR(-ENOMEM);
- tlv = pci_alloc_consistent(pdev, tlv_size, &tlv_pa);
+ tlv = dma_alloc_coherent(&pdev->dev, tlv_size, &tlv_pa, GFP_ATOMIC);
if (!tlv) {
usnic_err("Failed to allocate memory\n");
status = -ENOMEM;
@@ -258,7 +258,7 @@ usnic_fwd_alloc_flow(struct usnic_fwd_dev *ufdev, struct filter *filter,
out_free_tlv:
spin_unlock(&ufdev->lock);
- pci_free_consistent(pdev, tlv_size, tlv, tlv_pa);
+ dma_free_coherent(&pdev->dev, tlv_size, tlv, tlv_pa);
if (!status)
return flow;
out_free_flow:
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
index b039f1f00e05..77a010e68208 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
@@ -202,7 +202,7 @@ err_umem:
* @return: ib_mr pointer on success, otherwise returns an errno.
*/
struct ib_mr *pvrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata)
+ u32 max_num_sg)
{
struct pvrdma_dev *dev = to_vdev(pd->device);
struct pvrdma_user_mr *mr;
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
index 267702226f10..699b20849a7e 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
@@ -406,7 +406,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
struct ib_udata *udata);
int pvrdma_dereg_mr(struct ib_mr *mr, struct ib_udata *udata);
struct ib_mr *pvrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata);
+ u32 max_num_sg);
int pvrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
int sg_nents, unsigned int *sg_offset);
int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,