diff options
Diffstat (limited to 'drivers/infiniband/hw')
108 files changed, 6424 insertions, 2364 deletions
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index a76e206704d4..bbfb86eb2d24 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -166,7 +166,8 @@ int bnxt_re_query_device(struct ib_device *ibdev, | IB_DEVICE_MEM_WINDOW | IB_DEVICE_MEM_WINDOW_TYPE_2B | IB_DEVICE_MEM_MGT_EXTENSIONS; - ib_attr->max_sge = dev_attr->max_qp_sges; + ib_attr->max_send_sge = dev_attr->max_qp_sges; + ib_attr->max_recv_sge = dev_attr->max_qp_sges; ib_attr->max_sge_rd = dev_attr->max_qp_sges; ib_attr->max_cq = dev_attr->max_cq; ib_attr->max_cqe = dev_attr->max_cq_wqes; @@ -243,8 +244,8 @@ int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num, port_attr->gid_tbl_len = dev_attr->max_sgid; port_attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP | IB_PORT_DEVICE_MGMT_SUP | - IB_PORT_VENDOR_CLASS_SUP | - IB_PORT_IP_BASED_GIDS; + IB_PORT_VENDOR_CLASS_SUP; + port_attr->ip_gids = true; port_attr->max_msg_sz = (u32)BNXT_RE_MAX_MR_SIZE_LOW; port_attr->bad_pkey_cntr = 0; @@ -364,8 +365,7 @@ int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context) return rc; } -int bnxt_re_add_gid(const union ib_gid *gid, - const struct ib_gid_attr *attr, void **context) +int bnxt_re_add_gid(const struct ib_gid_attr *attr, void **context) { int rc; u32 tbl_idx = 0; @@ -377,7 +377,7 @@ int bnxt_re_add_gid(const union ib_gid *gid, if ((attr->ndev) && is_vlan_dev(attr->ndev)) vlan_id = vlan_dev_vlan_id(attr->ndev); - rc = bnxt_qplib_add_sgid(sgid_tbl, (struct bnxt_qplib_gid *)gid, + rc = bnxt_qplib_add_sgid(sgid_tbl, (struct bnxt_qplib_gid *)&attr->gid, rdev->qplib_res.netdev->dev_addr, vlan_id, true, &tbl_idx); if (rc == -EALREADY) { @@ -673,8 +673,6 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd, int rc; u8 nw_type; - struct ib_gid_attr sgid_attr; - if (!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) { dev_err(rdev_to_dev(rdev), "Failed to alloc AH: GRH not set"); return ERR_PTR(-EINVAL); @@ -705,20 +703,11 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd, grh->dgid.raw) && !rdma_link_local_addr((struct in6_addr *) grh->dgid.raw)) { - union ib_gid sgid; + const struct ib_gid_attr *sgid_attr; - rc = ib_get_cached_gid(&rdev->ibdev, 1, - grh->sgid_index, &sgid, - &sgid_attr); - if (rc) { - dev_err(rdev_to_dev(rdev), - "Failed to query gid at index %d", - grh->sgid_index); - goto fail; - } - dev_put(sgid_attr.ndev); + sgid_attr = grh->sgid_attr; /* Get network header type for this GID */ - nw_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid); + nw_type = rdma_gid_attr_network_type(sgid_attr); switch (nw_type) { case RDMA_NETWORK_IPV4: ah->qplib_ah.nw_type = CMDQ_CREATE_AH_TYPE_V2IPV4; @@ -1408,7 +1397,7 @@ struct ib_srq *bnxt_re_create_srq(struct ib_pd *ib_pd, } if (srq_init_attr->srq_type != IB_SRQT_BASIC) { - rc = -ENOTSUPP; + rc = -EOPNOTSUPP; goto exit; } @@ -1530,8 +1519,8 @@ int bnxt_re_query_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr) return 0; } -int bnxt_re_post_srq_recv(struct ib_srq *ib_srq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int bnxt_re_post_srq_recv(struct ib_srq *ib_srq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct bnxt_re_srq *srq = container_of(ib_srq, struct bnxt_re_srq, ib_srq); @@ -1599,9 +1588,6 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; enum ib_qp_state curr_qp_state, new_qp_state; int rc, entries; - int status; - union ib_gid sgid; - struct ib_gid_attr sgid_attr; unsigned int flags; u8 nw_type; @@ -1668,6 +1654,7 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, if (qp_attr_mask & IB_QP_AV) { const struct ib_global_route *grh = rdma_ah_read_grh(&qp_attr->ah_attr); + const struct ib_gid_attr *sgid_attr; qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_DGID | CMDQ_MODIFY_QP_MODIFY_MASK_FLOW_LABEL | @@ -1691,29 +1678,23 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, ether_addr_copy(qp->qplib_qp.ah.dmac, qp_attr->ah_attr.roce.dmac); - status = ib_get_cached_gid(&rdev->ibdev, 1, - grh->sgid_index, - &sgid, &sgid_attr); - if (!status) { - memcpy(qp->qplib_qp.smac, sgid_attr.ndev->dev_addr, - ETH_ALEN); - dev_put(sgid_attr.ndev); - nw_type = ib_gid_to_network_type(sgid_attr.gid_type, - &sgid); - switch (nw_type) { - case RDMA_NETWORK_IPV4: - qp->qplib_qp.nw_type = - CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV4; - break; - case RDMA_NETWORK_IPV6: - qp->qplib_qp.nw_type = - CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV6; - break; - default: - qp->qplib_qp.nw_type = - CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV1; - break; - } + sgid_attr = qp_attr->ah_attr.grh.sgid_attr; + memcpy(qp->qplib_qp.smac, sgid_attr->ndev->dev_addr, + ETH_ALEN); + nw_type = rdma_gid_attr_network_type(sgid_attr); + switch (nw_type) { + case RDMA_NETWORK_IPV4: + qp->qplib_qp.nw_type = + CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV4; + break; + case RDMA_NETWORK_IPV6: + qp->qplib_qp.nw_type = + CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV6; + break; + default: + qp->qplib_qp.nw_type = + CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV1; + break; } } @@ -1895,19 +1876,17 @@ out: /* Routine for sending QP1 packets for RoCE V1 an V2 */ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp, - struct ib_send_wr *wr, + const struct ib_send_wr *wr, struct bnxt_qplib_swqe *wqe, int payload_size) { - struct ib_device *ibdev = &qp->rdev->ibdev; struct bnxt_re_ah *ah = container_of(ud_wr(wr)->ah, struct bnxt_re_ah, ib_ah); struct bnxt_qplib_ah *qplib_ah = &ah->qplib_ah; + const struct ib_gid_attr *sgid_attr = ah->ib_ah.sgid_attr; struct bnxt_qplib_sge sge; - union ib_gid sgid; u8 nw_type; u16 ether_type; - struct ib_gid_attr sgid_attr; union ib_gid dgid; bool is_eth = false; bool is_vlan = false; @@ -1920,22 +1899,10 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp, memset(&qp->qp1_hdr, 0, sizeof(qp->qp1_hdr)); - rc = ib_get_cached_gid(ibdev, 1, - qplib_ah->host_sgid_index, &sgid, - &sgid_attr); - if (rc) { - dev_err(rdev_to_dev(qp->rdev), - "Failed to query gid at index %d", - qplib_ah->host_sgid_index); - return rc; - } - if (sgid_attr.ndev) { - if (is_vlan_dev(sgid_attr.ndev)) - vlan_id = vlan_dev_vlan_id(sgid_attr.ndev); - dev_put(sgid_attr.ndev); - } + if (is_vlan_dev(sgid_attr->ndev)) + vlan_id = vlan_dev_vlan_id(sgid_attr->ndev); /* Get network header type for this GID */ - nw_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid); + nw_type = rdma_gid_attr_network_type(sgid_attr); switch (nw_type) { case RDMA_NETWORK_IPV4: nw_type = BNXT_RE_ROCEV2_IPV4_PACKET; @@ -1948,9 +1915,9 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp, break; } memcpy(&dgid.raw, &qplib_ah->dgid, 16); - is_udp = sgid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP; + is_udp = sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP; if (is_udp) { - if (ipv6_addr_v4mapped((struct in6_addr *)&sgid)) { + if (ipv6_addr_v4mapped((struct in6_addr *)&sgid_attr->gid)) { ip_version = 4; ether_type = ETH_P_IP; } else { @@ -1983,9 +1950,10 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp, } if (is_grh || (ip_version == 6)) { - memcpy(qp->qp1_hdr.grh.source_gid.raw, sgid.raw, sizeof(sgid)); + memcpy(qp->qp1_hdr.grh.source_gid.raw, sgid_attr->gid.raw, + sizeof(sgid_attr->gid)); memcpy(qp->qp1_hdr.grh.destination_gid.raw, qplib_ah->dgid.data, - sizeof(sgid)); + sizeof(sgid_attr->gid)); qp->qp1_hdr.grh.hop_limit = qplib_ah->hop_limit; } @@ -1995,7 +1963,7 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp, qp->qp1_hdr.ip4.frag_off = htons(IP_DF); qp->qp1_hdr.ip4.ttl = qplib_ah->hop_limit; - memcpy(&qp->qp1_hdr.ip4.saddr, sgid.raw + 12, 4); + memcpy(&qp->qp1_hdr.ip4.saddr, sgid_attr->gid.raw + 12, 4); memcpy(&qp->qp1_hdr.ip4.daddr, qplib_ah->dgid.data + 12, 4); qp->qp1_hdr.ip4.check = ib_ud_ip4_csum(&qp->qp1_hdr); } @@ -2080,7 +2048,7 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp, * and the MAD datagram out to the provided SGE. */ static int bnxt_re_build_qp1_shadow_qp_recv(struct bnxt_re_qp *qp, - struct ib_recv_wr *wr, + const struct ib_recv_wr *wr, struct bnxt_qplib_swqe *wqe, int payload_size) { @@ -2125,7 +2093,7 @@ static int is_ud_qp(struct bnxt_re_qp *qp) } static int bnxt_re_build_send_wqe(struct bnxt_re_qp *qp, - struct ib_send_wr *wr, + const struct ib_send_wr *wr, struct bnxt_qplib_swqe *wqe) { struct bnxt_re_ah *ah = NULL; @@ -2163,7 +2131,7 @@ static int bnxt_re_build_send_wqe(struct bnxt_re_qp *qp, return 0; } -static int bnxt_re_build_rdma_wqe(struct ib_send_wr *wr, +static int bnxt_re_build_rdma_wqe(const struct ib_send_wr *wr, struct bnxt_qplib_swqe *wqe) { switch (wr->opcode) { @@ -2195,7 +2163,7 @@ static int bnxt_re_build_rdma_wqe(struct ib_send_wr *wr, return 0; } -static int bnxt_re_build_atomic_wqe(struct ib_send_wr *wr, +static int bnxt_re_build_atomic_wqe(const struct ib_send_wr *wr, struct bnxt_qplib_swqe *wqe) { switch (wr->opcode) { @@ -2222,7 +2190,7 @@ static int bnxt_re_build_atomic_wqe(struct ib_send_wr *wr, return 0; } -static int bnxt_re_build_inv_wqe(struct ib_send_wr *wr, +static int bnxt_re_build_inv_wqe(const struct ib_send_wr *wr, struct bnxt_qplib_swqe *wqe) { wqe->type = BNXT_QPLIB_SWQE_TYPE_LOCAL_INV; @@ -2241,7 +2209,7 @@ static int bnxt_re_build_inv_wqe(struct ib_send_wr *wr, return 0; } -static int bnxt_re_build_reg_wqe(struct ib_reg_wr *wr, +static int bnxt_re_build_reg_wqe(const struct ib_reg_wr *wr, struct bnxt_qplib_swqe *wqe) { struct bnxt_re_mr *mr = container_of(wr->mr, struct bnxt_re_mr, ib_mr); @@ -2283,7 +2251,7 @@ static int bnxt_re_build_reg_wqe(struct ib_reg_wr *wr, } static int bnxt_re_copy_inline_data(struct bnxt_re_dev *rdev, - struct ib_send_wr *wr, + const struct ib_send_wr *wr, struct bnxt_qplib_swqe *wqe) { /* Copy the inline data to the data field */ @@ -2313,7 +2281,7 @@ static int bnxt_re_copy_inline_data(struct bnxt_re_dev *rdev, } static int bnxt_re_copy_wr_payload(struct bnxt_re_dev *rdev, - struct ib_send_wr *wr, + const struct ib_send_wr *wr, struct bnxt_qplib_swqe *wqe) { int payload_sz = 0; @@ -2345,7 +2313,7 @@ static void bnxt_ud_qp_hw_stall_workaround(struct bnxt_re_qp *qp) static int bnxt_re_post_send_shadow_qp(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp, - struct ib_send_wr *wr) + const struct ib_send_wr *wr) { struct bnxt_qplib_swqe wqe; int rc = 0, payload_sz = 0; @@ -2393,8 +2361,8 @@ bad: return rc; } -int bnxt_re_post_send(struct ib_qp *ib_qp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +int bnxt_re_post_send(struct ib_qp *ib_qp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp); struct bnxt_qplib_swqe wqe; @@ -2441,7 +2409,7 @@ int bnxt_re_post_send(struct ib_qp *ib_qp, struct ib_send_wr *wr, default: break; } - /* Fall thru to build the wqe */ + /* fall through */ case IB_WR_SEND_WITH_INV: rc = bnxt_re_build_send_wqe(qp, wr, &wqe); break; @@ -2493,7 +2461,7 @@ bad: static int bnxt_re_post_recv_shadow_qp(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp, - struct ib_recv_wr *wr) + const struct ib_recv_wr *wr) { struct bnxt_qplib_swqe wqe; int rc = 0; @@ -2526,8 +2494,8 @@ static int bnxt_re_post_recv_shadow_qp(struct bnxt_re_dev *rdev, return rc; } -int bnxt_re_post_recv(struct ib_qp *ib_qp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int bnxt_re_post_recv(struct ib_qp *ib_qp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp); struct bnxt_qplib_swqe wqe; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index 5c6414cad4af..aa33e7b82c84 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -158,8 +158,7 @@ void bnxt_re_query_fw_str(struct ib_device *ibdev, char *str); int bnxt_re_query_pkey(struct ib_device *ibdev, u8 port_num, u16 index, u16 *pkey); int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context); -int bnxt_re_add_gid(const union ib_gid *gid, - const struct ib_gid_attr *attr, void **context); +int bnxt_re_add_gid(const struct ib_gid_attr *attr, void **context); int bnxt_re_query_gid(struct ib_device *ibdev, u8 port_num, int index, union ib_gid *gid); enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev, @@ -182,8 +181,8 @@ int bnxt_re_modify_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr, struct ib_udata *udata); int bnxt_re_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); int bnxt_re_destroy_srq(struct ib_srq *srq); -int bnxt_re_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr); +int bnxt_re_post_srq_recv(struct ib_srq *srq, const struct ib_recv_wr *recv_wr, + const struct ib_recv_wr **bad_recv_wr); struct ib_qp *bnxt_re_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr, struct ib_udata *udata); @@ -192,10 +191,10 @@ int bnxt_re_modify_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int bnxt_re_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); int bnxt_re_destroy_qp(struct ib_qp *qp); -int bnxt_re_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr, - struct ib_send_wr **bad_send_wr); -int bnxt_re_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr); +int bnxt_re_post_send(struct ib_qp *qp, const struct ib_send_wr *send_wr, + const struct ib_send_wr **bad_send_wr); +int bnxt_re_post_recv(struct ib_qp *qp, const struct ib_recv_wr *recv_wr, + const struct ib_recv_wr **bad_recv_wr); struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, struct ib_ucontext *context, diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 50d8f1fc98d5..e426b990c1dd 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -2354,7 +2354,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq, srq = qp->srq; if (!srq) return -EINVAL; - if (wr_id_idx > srq->hwq.max_elements) { + if (wr_id_idx >= srq->hwq.max_elements) { dev_err(&cq->hwq.pdev->dev, "QPLIB: FP: CQ Process RC "); dev_err(&cq->hwq.pdev->dev, @@ -2369,7 +2369,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq, *pcqe = cqe; } else { rq = &qp->rq; - if (wr_id_idx > rq->hwq.max_elements) { + if (wr_id_idx >= rq->hwq.max_elements) { dev_err(&cq->hwq.pdev->dev, "QPLIB: FP: CQ Process RC "); dev_err(&cq->hwq.pdev->dev, @@ -2437,7 +2437,7 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq, if (!srq) return -EINVAL; - if (wr_id_idx > srq->hwq.max_elements) { + if (wr_id_idx >= srq->hwq.max_elements) { dev_err(&cq->hwq.pdev->dev, "QPLIB: FP: CQ Process UD "); dev_err(&cq->hwq.pdev->dev, @@ -2452,7 +2452,7 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq, *pcqe = cqe; } else { rq = &qp->rq; - if (wr_id_idx > rq->hwq.max_elements) { + if (wr_id_idx >= rq->hwq.max_elements) { dev_err(&cq->hwq.pdev->dev, "QPLIB: FP: CQ Process UD "); dev_err(&cq->hwq.pdev->dev, @@ -2546,7 +2546,7 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq, "QPLIB: FP: SRQ used but not defined??"); return -EINVAL; } - if (wr_id_idx > srq->hwq.max_elements) { + if (wr_id_idx >= srq->hwq.max_elements) { dev_err(&cq->hwq.pdev->dev, "QPLIB: FP: CQ Process Raw/QP1 "); dev_err(&cq->hwq.pdev->dev, @@ -2561,7 +2561,7 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq, *pcqe = cqe; } else { rq = &qp->rq; - if (wr_id_idx > rq->hwq.max_elements) { + if (wr_id_idx >= rq->hwq.max_elements) { dev_err(&cq->hwq.pdev->dev, "QPLIB: FP: CQ Process Raw/QP1 RQ wr_id "); dev_err(&cq->hwq.pdev->dev, diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 2f3f32eaa1d5..4097f3fa25c5 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -197,7 +197,7 @@ int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res, struct bnxt_qplib_sgid_tbl *sgid_tbl, int index, struct bnxt_qplib_gid *gid) { - if (index > sgid_tbl->max) { + if (index >= sgid_tbl->max) { dev_err(&res->pdev->dev, "QPLIB: Index %d exceeded SGID table max (%d)", index, sgid_tbl->max); @@ -402,7 +402,7 @@ int bnxt_qplib_get_pkey(struct bnxt_qplib_res *res, *pkey = 0xFFFF; return 0; } - if (index > pkey_tbl->max) { + if (index >= pkey_tbl->max) { dev_err(&res->pdev->dev, "QPLIB: Index %d exceeded PKEY table max (%d)", index, pkey_tbl->max); diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c index 0a8542c20804..a098c0140580 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cq.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c @@ -32,38 +32,16 @@ #include "iwch_provider.h" #include "iwch.h" -/* - * Get one cq entry from cxio and map it to openib. - * - * Returns: - * 0 EMPTY; - * 1 cqe returned - * -EAGAIN caller must try again - * any other -errno fatal error - */ -static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, - struct ib_wc *wc) +static int __iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, + struct iwch_qp *qhp, struct ib_wc *wc) { - struct iwch_qp *qhp = NULL; - struct t3_cqe cqe, *rd_cqe; - struct t3_wq *wq; + struct t3_wq *wq = qhp ? &qhp->wq : NULL; + struct t3_cqe cqe; u32 credit = 0; u8 cqe_flushed; u64 cookie; int ret = 1; - rd_cqe = cxio_next_cqe(&chp->cq); - - if (!rd_cqe) - return 0; - - qhp = get_qhp(rhp, CQE_QPID(*rd_cqe)); - if (!qhp) - wq = NULL; - else { - spin_lock(&qhp->lock); - wq = &(qhp->wq); - } ret = cxio_poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit); if (t3a_device(chp->rhp) && credit) { @@ -79,7 +57,7 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, ret = 1; wc->wr_id = cookie; - wc->qp = &qhp->ibqp; + wc->qp = qhp ? &qhp->ibqp : NULL; wc->vendor_err = CQE_STATUS(cqe); wc->wc_flags = 0; @@ -182,8 +160,38 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, } } out: - if (wq) + return ret; +} + +/* + * Get one cq entry from cxio and map it to openib. + * + * Returns: + * 0 EMPTY; + * 1 cqe returned + * -EAGAIN caller must try again + * any other -errno fatal error + */ +static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, + struct ib_wc *wc) +{ + struct iwch_qp *qhp; + struct t3_cqe *rd_cqe; + int ret; + + rd_cqe = cxio_next_cqe(&chp->cq); + + if (!rd_cqe) + return 0; + + qhp = get_qhp(rhp, CQE_QPID(*rd_cqe)); + if (qhp) { + spin_lock(&qhp->lock); + ret = __iwch_poll_cq_one(rhp, chp, qhp, wc); spin_unlock(&qhp->lock); + } else { + ret = __iwch_poll_cq_one(rhp, chp, NULL, wc); + } return ret; } diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index be097c6723c0..1b9ff21aa1d5 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -61,42 +61,6 @@ #include <rdma/cxgb3-abi.h> #include "common.h" -static struct ib_ah *iwch_ah_create(struct ib_pd *pd, - struct rdma_ah_attr *ah_attr, - struct ib_udata *udata) -{ - return ERR_PTR(-ENOSYS); -} - -static int iwch_ah_destroy(struct ib_ah *ah) -{ - return -ENOSYS; -} - -static int iwch_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) -{ - return -ENOSYS; -} - -static int iwch_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) -{ - return -ENOSYS; -} - -static int iwch_process_mad(struct ib_device *ibdev, - int mad_flags, - u8 port_num, - const struct ib_wc *in_wc, - const struct ib_grh *in_grh, - const struct ib_mad_hdr *in_mad, - size_t in_mad_size, - struct ib_mad_hdr *out_mad, - size_t *out_mad_size, - u16 *out_mad_pkey_index) -{ - return -ENOSYS; -} - static int iwch_dealloc_ucontext(struct ib_ucontext *context) { struct iwch_dev *rhp = to_iwch_dev(context->device); @@ -1103,7 +1067,8 @@ static int iwch_query_device(struct ib_device *ibdev, struct ib_device_attr *pro props->max_mr_size = dev->attr.max_mr_size; props->max_qp = dev->attr.max_qps; props->max_qp_wr = dev->attr.max_wrs; - props->max_sge = dev->attr.max_sge_per_wr; + props->max_send_sge = dev->attr.max_sge_per_wr; + props->max_recv_sge = dev->attr.max_sge_per_wr; props->max_sge_rd = 1; props->max_qp_rd_atom = dev->attr.max_rdma_reads_per_qp; props->max_qp_init_rd_atom = dev->attr.max_rdma_reads_per_qp; @@ -1398,8 +1363,6 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.mmap = iwch_mmap; dev->ibdev.alloc_pd = iwch_allocate_pd; dev->ibdev.dealloc_pd = iwch_deallocate_pd; - dev->ibdev.create_ah = iwch_ah_create; - dev->ibdev.destroy_ah = iwch_ah_destroy; dev->ibdev.create_qp = iwch_create_qp; dev->ibdev.modify_qp = iwch_ib_modify_qp; dev->ibdev.destroy_qp = iwch_destroy_qp; @@ -1414,9 +1377,6 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.dealloc_mw = iwch_dealloc_mw; dev->ibdev.alloc_mr = iwch_alloc_mr; dev->ibdev.map_mr_sg = iwch_map_mr_sg; - dev->ibdev.attach_mcast = iwch_multicast_attach; - dev->ibdev.detach_mcast = iwch_multicast_detach; - dev->ibdev.process_mad = iwch_process_mad; dev->ibdev.req_notify_cq = iwch_arm_cq; dev->ibdev.post_send = iwch_post_send; dev->ibdev.post_recv = iwch_post_receive; diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h index 2e38ddefea8a..8adbe9658935 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.h +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h @@ -326,10 +326,10 @@ enum iwch_qp_query_flags { }; u16 iwch_rqes_posted(struct iwch_qp *qhp); -int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr); -int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); +int iwch_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr); +int iwch_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg); int iwch_post_zb_read(struct iwch_ep *ep); diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index 3871e1fd8395..c649faad63f9 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -39,8 +39,8 @@ #define NO_SUPPORT -1 -static int build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr, - u8 * flit_cnt) +static int build_rdma_send(union t3_wr *wqe, const struct ib_send_wr *wr, + u8 *flit_cnt) { int i; u32 plen; @@ -84,8 +84,8 @@ static int build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr, return 0; } -static int build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr, - u8 *flit_cnt) +static int build_rdma_write(union t3_wr *wqe, const struct ib_send_wr *wr, + u8 *flit_cnt) { int i; u32 plen; @@ -125,8 +125,8 @@ static int build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr, return 0; } -static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr, - u8 *flit_cnt) +static int build_rdma_read(union t3_wr *wqe, const struct ib_send_wr *wr, + u8 *flit_cnt) { if (wr->num_sge > 1) return -EINVAL; @@ -146,8 +146,8 @@ static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr, return 0; } -static int build_memreg(union t3_wr *wqe, struct ib_reg_wr *wr, - u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq) +static int build_memreg(union t3_wr *wqe, const struct ib_reg_wr *wr, + u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq) { struct iwch_mr *mhp = to_iwch_mr(wr->mr); int i; @@ -189,8 +189,8 @@ static int build_memreg(union t3_wr *wqe, struct ib_reg_wr *wr, return 0; } -static int build_inv_stag(union t3_wr *wqe, struct ib_send_wr *wr, - u8 *flit_cnt) +static int build_inv_stag(union t3_wr *wqe, const struct ib_send_wr *wr, + u8 *flit_cnt) { wqe->local_inv.stag = cpu_to_be32(wr->ex.invalidate_rkey); wqe->local_inv.reserved = 0; @@ -246,7 +246,7 @@ static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list, } static int build_rdma_recv(struct iwch_qp *qhp, union t3_wr *wqe, - struct ib_recv_wr *wr) + const struct ib_recv_wr *wr) { int i, err = 0; u32 pbl_addr[T3_MAX_SGE]; @@ -286,7 +286,7 @@ static int build_rdma_recv(struct iwch_qp *qhp, union t3_wr *wqe, } static int build_zero_stag_recv(struct iwch_qp *qhp, union t3_wr *wqe, - struct ib_recv_wr *wr) + const struct ib_recv_wr *wr) { int i; u32 pbl_addr; @@ -348,8 +348,8 @@ static int build_zero_stag_recv(struct iwch_qp *qhp, union t3_wr *wqe, return 0; } -int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +int iwch_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { int err = 0; u8 uninitialized_var(t3_wr_flit_cnt); @@ -463,8 +463,8 @@ out: return err; } -int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int iwch_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { int err = 0; struct iwch_qp *qhp; diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 0912fa026327..0f83cbec33f3 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -587,24 +587,29 @@ static int send_flowc(struct c4iw_ep *ep) { struct fw_flowc_wr *flowc; struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list); - int i; u16 vlan = ep->l2t->vlan; int nparams; + int flowclen, flowclen16; if (WARN_ON(!skb)) return -ENOMEM; if (vlan == CPL_L2T_VLAN_NONE) - nparams = 8; - else nparams = 9; + else + nparams = 10; - flowc = __skb_put(skb, FLOWC_LEN); + flowclen = offsetof(struct fw_flowc_wr, mnemval[nparams]); + flowclen16 = DIV_ROUND_UP(flowclen, 16); + flowclen = flowclen16 * 16; + + flowc = __skb_put(skb, flowclen); + memset(flowc, 0, flowclen); flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) | FW_FLOWC_WR_NPARAMS_V(nparams)); - flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(FLOWC_LEN, - 16)) | FW_WR_FLOWID_V(ep->hwtid)); + flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(flowclen16) | + FW_WR_FLOWID_V(ep->hwtid)); flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN; flowc->mnemval[0].val = cpu_to_be32(FW_PFVF_CMD_PFN_V @@ -623,21 +628,13 @@ static int send_flowc(struct c4iw_ep *ep) flowc->mnemval[6].val = cpu_to_be32(ep->snd_win); flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS; flowc->mnemval[7].val = cpu_to_be32(ep->emss); - if (nparams == 9) { + flowc->mnemval[8].mnemonic = FW_FLOWC_MNEM_RCV_SCALE; + flowc->mnemval[8].val = cpu_to_be32(ep->snd_wscale); + if (nparams == 10) { u16 pri; - pri = (vlan & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; - flowc->mnemval[8].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS; - flowc->mnemval[8].val = cpu_to_be32(pri); - } else { - /* Pad WR to 16 byte boundary */ - flowc->mnemval[8].mnemonic = 0; - flowc->mnemval[8].val = 0; - } - for (i = 0; i < 9; i++) { - flowc->mnemval[i].r4[0] = 0; - flowc->mnemval[i].r4[1] = 0; - flowc->mnemval[i].r4[2] = 0; + flowc->mnemval[9].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS; + flowc->mnemval[9].val = cpu_to_be32(pri); } set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); @@ -1176,6 +1173,7 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) { struct c4iw_ep *ep; struct cpl_act_establish *req = cplhdr(skb); + unsigned short tcp_opt = ntohs(req->tcp_opt); unsigned int tid = GET_TID(req); unsigned int atid = TID_TID_G(ntohl(req->tos_atid)); struct tid_info *t = dev->rdev.lldi.tids; @@ -1196,8 +1194,9 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) ep->snd_seq = be32_to_cpu(req->snd_isn); ep->rcv_seq = be32_to_cpu(req->rcv_isn); + ep->snd_wscale = TCPOPT_SND_WSCALE_G(tcp_opt); - set_emss(ep, ntohs(req->tcp_opt)); + set_emss(ep, tcp_opt); /* dealloc the atid */ remove_handle(ep->com.dev, &ep->com.dev->atid_idr, atid); @@ -1853,10 +1852,33 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) return 0; } +static void complete_cached_srq_buffers(struct c4iw_ep *ep, + __be32 srqidx_status) +{ + enum chip_type adapter_type; + u32 srqidx; + + adapter_type = ep->com.dev->rdev.lldi.adapter_type; + srqidx = ABORT_RSS_SRQIDX_G(be32_to_cpu(srqidx_status)); + + /* + * If this TCB had a srq buffer cached, then we must complete + * it. For user mode, that means saving the srqidx in the + * user/kernel status page for this qp. For kernel mode, just + * synthesize the CQE now. + */ + if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T5 && srqidx) { + if (ep->com.qp->ibqp.uobject) + t4_set_wq_in_error(&ep->com.qp->wq, srqidx); + else + c4iw_flush_srqidx(ep->com.qp, srqidx); + } +} + static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) { struct c4iw_ep *ep; - struct cpl_abort_rpl_rss *rpl = cplhdr(skb); + struct cpl_abort_rpl_rss6 *rpl = cplhdr(skb); int release = 0; unsigned int tid = GET_TID(rpl); @@ -1865,6 +1887,9 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) pr_warn("Abort rpl to freed endpoint\n"); return 0; } + + complete_cached_srq_buffers(ep, rpl->srqidx_status); + pr_debug("ep %p tid %u\n", ep, ep->hwtid); mutex_lock(&ep->com.mutex); switch (ep->com.state) { @@ -2603,16 +2628,17 @@ static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb) struct cpl_pass_establish *req = cplhdr(skb); unsigned int tid = GET_TID(req); int ret; + u16 tcp_opt = ntohs(req->tcp_opt); ep = get_ep_from_tid(dev, tid); pr_debug("ep %p tid %u\n", ep, ep->hwtid); ep->snd_seq = be32_to_cpu(req->snd_isn); ep->rcv_seq = be32_to_cpu(req->rcv_isn); + ep->snd_wscale = TCPOPT_SND_WSCALE_G(tcp_opt); - pr_debug("ep %p hwtid %u tcp_opt 0x%02x\n", ep, tid, - ntohs(req->tcp_opt)); + pr_debug("ep %p hwtid %u tcp_opt 0x%02x\n", ep, tid, tcp_opt); - set_emss(ep, ntohs(req->tcp_opt)); + set_emss(ep, tcp_opt); dst_confirm(ep->dst); mutex_lock(&ep->com.mutex); @@ -2719,28 +2745,35 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb) static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) { - struct cpl_abort_req_rss *req = cplhdr(skb); + struct cpl_abort_req_rss6 *req = cplhdr(skb); struct c4iw_ep *ep; struct sk_buff *rpl_skb; struct c4iw_qp_attributes attrs; int ret; int release = 0; unsigned int tid = GET_TID(req); + u8 status; + u32 len = roundup(sizeof(struct cpl_abort_rpl), 16); ep = get_ep_from_tid(dev, tid); if (!ep) return 0; - if (cxgb_is_neg_adv(req->status)) { + status = ABORT_RSS_STATUS_G(be32_to_cpu(req->srqidx_status)); + + if (cxgb_is_neg_adv(status)) { pr_debug("Negative advice on abort- tid %u status %d (%s)\n", - ep->hwtid, req->status, neg_adv_str(req->status)); + ep->hwtid, status, neg_adv_str(status)); ep->stats.abort_neg_adv++; mutex_lock(&dev->rdev.stats.lock); dev->rdev.stats.neg_adv++; mutex_unlock(&dev->rdev.stats.lock); goto deref_ep; } + + complete_cached_srq_buffers(ep, req->srqidx_status); + pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid, ep->com.state); set_bit(PEER_ABORT, &ep->com.history); @@ -3444,9 +3477,6 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) } insert_handle(dev, &dev->stid_idr, ep, ep->stid); - memcpy(&ep->com.local_addr, &cm_id->m_local_addr, - sizeof(ep->com.local_addr)); - state_set(&ep->com, LISTEN); if (ep->com.local_addr.ss_family == AF_INET) err = create_server4(dev, ep); diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 2be2e1ac1b5f..6d3042794094 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -77,6 +77,10 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, int user = (uctx != &rdev->uctx); int ret; struct sk_buff *skb; + struct c4iw_ucontext *ucontext = NULL; + + if (user) + ucontext = container_of(uctx, struct c4iw_ucontext, uctx); cq->cqid = c4iw_get_cqid(rdev, uctx); if (!cq->cqid) { @@ -100,6 +104,16 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, dma_unmap_addr_set(cq, mapping, cq->dma_addr); memset(cq->queue, 0, cq->memsize); + if (user && ucontext->is_32b_cqe) { + cq->qp_errp = &((struct t4_status_page *) + ((u8 *)cq->queue + (cq->size - 1) * + (sizeof(*cq->queue) / 2)))->qp_err; + } else { + cq->qp_errp = &((struct t4_status_page *) + ((u8 *)cq->queue + (cq->size - 1) * + sizeof(*cq->queue)))->qp_err; + } + /* build fw_ri_res_wr */ wr_len = sizeof *res_wr + sizeof *res; @@ -132,7 +146,9 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, FW_RI_RES_WR_IQPCIECH_V(2) | FW_RI_RES_WR_IQINTCNTTHRESH_V(0) | FW_RI_RES_WR_IQO_F | - FW_RI_RES_WR_IQESIZE_V(1)); + ((user && ucontext->is_32b_cqe) ? + FW_RI_RES_WR_IQESIZE_V(1) : + FW_RI_RES_WR_IQESIZE_V(2))); res->u.cq.iqsize = cpu_to_be16(cq->size); res->u.cq.iqaddr = cpu_to_be64(cq->dma_addr); @@ -166,7 +182,7 @@ err1: return ret; } -static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq) +static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq, u32 srqidx) { struct t4_cqe cqe; @@ -179,6 +195,8 @@ static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq) CQE_SWCQE_V(1) | CQE_QPID_V(wq->sq.qid)); cqe.bits_type_ts = cpu_to_be64(CQE_GENBIT_V((u64)cq->gen)); + if (srqidx) + cqe.u.srcqe.abs_rqe_idx = cpu_to_be32(srqidx); cq->sw_queue[cq->sw_pidx] = cqe; t4_swcq_produce(cq); } @@ -191,7 +209,7 @@ int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count) pr_debug("wq %p cq %p rq.in_use %u skip count %u\n", wq, cq, wq->rq.in_use, count); while (in_use--) { - insert_recv_cqe(wq, cq); + insert_recv_cqe(wq, cq, 0); flushed++; } return flushed; @@ -442,6 +460,72 @@ void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count) pr_debug("cq %p count %d\n", cq, *count); } +static void post_pending_srq_wrs(struct t4_srq *srq) +{ + struct t4_srq_pending_wr *pwr; + u16 idx = 0; + + while (srq->pending_in_use) { + pwr = &srq->pending_wrs[srq->pending_cidx]; + srq->sw_rq[srq->pidx].wr_id = pwr->wr_id; + srq->sw_rq[srq->pidx].valid = 1; + + pr_debug("%s posting pending cidx %u pidx %u wq_pidx %u in_use %u rq_size %u wr_id %llx\n", + __func__, + srq->cidx, srq->pidx, srq->wq_pidx, + srq->in_use, srq->size, + (unsigned long long)pwr->wr_id); + + c4iw_copy_wr_to_srq(srq, &pwr->wqe, pwr->len16); + t4_srq_consume_pending_wr(srq); + t4_srq_produce(srq, pwr->len16); + idx += DIV_ROUND_UP(pwr->len16 * 16, T4_EQ_ENTRY_SIZE); + } + + if (idx) { + t4_ring_srq_db(srq, idx, pwr->len16, &pwr->wqe); + srq->queue[srq->size].status.host_wq_pidx = + srq->wq_pidx; + } +} + +static u64 reap_srq_cqe(struct t4_cqe *hw_cqe, struct t4_srq *srq) +{ + int rel_idx = CQE_ABS_RQE_IDX(hw_cqe) - srq->rqt_abs_idx; + u64 wr_id; + + srq->sw_rq[rel_idx].valid = 0; + wr_id = srq->sw_rq[rel_idx].wr_id; + + if (rel_idx == srq->cidx) { + pr_debug("%s in order cqe rel_idx %u cidx %u pidx %u wq_pidx %u in_use %u rq_size %u wr_id %llx\n", + __func__, rel_idx, srq->cidx, srq->pidx, + srq->wq_pidx, srq->in_use, srq->size, + (unsigned long long)srq->sw_rq[rel_idx].wr_id); + t4_srq_consume(srq); + while (srq->ooo_count && !srq->sw_rq[srq->cidx].valid) { + pr_debug("%s eat ooo cidx %u pidx %u wq_pidx %u in_use %u rq_size %u ooo_count %u wr_id %llx\n", + __func__, srq->cidx, srq->pidx, + srq->wq_pidx, srq->in_use, + srq->size, srq->ooo_count, + (unsigned long long) + srq->sw_rq[srq->cidx].wr_id); + t4_srq_consume_ooo(srq); + } + if (srq->ooo_count == 0 && srq->pending_in_use) + post_pending_srq_wrs(srq); + } else { + pr_debug("%s ooo cqe rel_idx %u cidx %u pidx %u wq_pidx %u in_use %u rq_size %u ooo_count %u wr_id %llx\n", + __func__, rel_idx, srq->cidx, + srq->pidx, srq->wq_pidx, + srq->in_use, srq->size, + srq->ooo_count, + (unsigned long long)srq->sw_rq[rel_idx].wr_id); + t4_srq_produce_ooo(srq); + } + return wr_id; +} + /* * poll_cq * @@ -459,7 +543,8 @@ void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count) * -EOVERFLOW CQ overflow detected. */ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, - u8 *cqe_flushed, u64 *cookie, u32 *credit) + u8 *cqe_flushed, u64 *cookie, u32 *credit, + struct t4_srq *srq) { int ret = 0; struct t4_cqe *hw_cqe, read_cqe; @@ -524,7 +609,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, */ if (CQE_TYPE(hw_cqe) == 1) { if (CQE_STATUS(hw_cqe)) - t4_set_wq_in_error(wq); + t4_set_wq_in_error(wq, 0); ret = -EAGAIN; goto skip_cqe; } @@ -535,7 +620,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, */ if (CQE_WRID_STAG(hw_cqe) == 1) { if (CQE_STATUS(hw_cqe)) - t4_set_wq_in_error(wq); + t4_set_wq_in_error(wq, 0); ret = -EAGAIN; goto skip_cqe; } @@ -560,7 +645,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, if (CQE_STATUS(hw_cqe) || t4_wq_in_error(wq)) { *cqe_flushed = (CQE_STATUS(hw_cqe) == T4_ERR_SWFLUSH); - t4_set_wq_in_error(wq); + t4_set_wq_in_error(wq, 0); } /* @@ -574,15 +659,9 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, * then we complete this with T4_ERR_MSN and mark the wq in * error. */ - - if (t4_rq_empty(wq)) { - t4_set_wq_in_error(wq); - ret = -EAGAIN; - goto skip_cqe; - } if (unlikely(!CQE_STATUS(hw_cqe) && CQE_WRID_MSN(hw_cqe) != wq->rq.msn)) { - t4_set_wq_in_error(wq); + t4_set_wq_in_error(wq, 0); hw_cqe->header |= cpu_to_be32(CQE_STATUS_V(T4_ERR_MSN)); } goto proc_cqe; @@ -641,11 +720,16 @@ proc_cqe: c4iw_log_wr_stats(wq, hw_cqe); t4_sq_consume(wq); } else { - pr_debug("completing rq idx %u\n", wq->rq.cidx); - *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id; - if (c4iw_wr_log) - c4iw_log_wr_stats(wq, hw_cqe); - t4_rq_consume(wq); + if (!srq) { + pr_debug("completing rq idx %u\n", wq->rq.cidx); + *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id; + if (c4iw_wr_log) + c4iw_log_wr_stats(wq, hw_cqe); + t4_rq_consume(wq); + } else { + *cookie = reap_srq_cqe(hw_cqe, srq); + } + wq->rq.msn++; goto skip_cqe; } @@ -668,46 +752,33 @@ skip_cqe: return ret; } -/* - * Get one cq entry from c4iw and map it to openib. - * - * Returns: - * 0 cqe returned - * -ENODATA EMPTY; - * -EAGAIN caller must try again - * any other -errno fatal error - */ -static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) +static int __c4iw_poll_cq_one(struct c4iw_cq *chp, struct c4iw_qp *qhp, + struct ib_wc *wc, struct c4iw_srq *srq) { - struct c4iw_qp *qhp = NULL; - struct t4_cqe uninitialized_var(cqe), *rd_cqe; - struct t4_wq *wq; + struct t4_cqe uninitialized_var(cqe); + struct t4_wq *wq = qhp ? &qhp->wq : NULL; u32 credit = 0; u8 cqe_flushed; u64 cookie = 0; int ret; - ret = t4_next_cqe(&chp->cq, &rd_cqe); - - if (ret) - return ret; - - qhp = get_qhp(chp->rhp, CQE_QPID(rd_cqe)); - if (!qhp) - wq = NULL; - else { - spin_lock(&qhp->lock); - wq = &(qhp->wq); - } - ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit); + ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit, + srq ? &srq->wq : NULL); if (ret) goto out; wc->wr_id = cookie; - wc->qp = &qhp->ibqp; + wc->qp = qhp ? &qhp->ibqp : NULL; wc->vendor_err = CQE_STATUS(&cqe); wc->wc_flags = 0; + /* + * Simulate a SRQ_LIMIT_REACHED HW notification if required. + */ + if (srq && !(srq->flags & T4_SRQ_LIMIT_SUPPORT) && srq->armed && + srq->wq.in_use < srq->srq_limit) + c4iw_dispatch_srq_limit_reached_event(srq); + pr_debug("qpid 0x%x type %d opcode %d status 0x%x len %u wrid hi 0x%x lo 0x%x cookie 0x%llx\n", CQE_QPID(&cqe), CQE_TYPE(&cqe), CQE_OPCODE(&cqe), @@ -720,15 +791,32 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) wc->byte_len = CQE_LEN(&cqe); else wc->byte_len = 0; - wc->opcode = IB_WC_RECV; - if (CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_INV || - CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_SE_INV) { + + switch (CQE_OPCODE(&cqe)) { + case FW_RI_SEND: + wc->opcode = IB_WC_RECV; + break; + case FW_RI_SEND_WITH_INV: + case FW_RI_SEND_WITH_SE_INV: + wc->opcode = IB_WC_RECV; wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe); wc->wc_flags |= IB_WC_WITH_INVALIDATE; c4iw_invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey); + break; + case FW_RI_WRITE_IMMEDIATE: + wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; + wc->ex.imm_data = CQE_IMM_DATA(&cqe); + wc->wc_flags |= IB_WC_WITH_IMM; + break; + default: + pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n", + CQE_OPCODE(&cqe), CQE_QPID(&cqe)); + ret = -EINVAL; + goto out; } } else { switch (CQE_OPCODE(&cqe)) { + case FW_RI_WRITE_IMMEDIATE: case FW_RI_RDMA_WRITE: wc->opcode = IB_WC_RDMA_WRITE; break; @@ -819,8 +907,43 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) } } out: - if (wq) + return ret; +} + +/* + * Get one cq entry from c4iw and map it to openib. + * + * Returns: + * 0 cqe returned + * -ENODATA EMPTY; + * -EAGAIN caller must try again + * any other -errno fatal error + */ +static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) +{ + struct c4iw_srq *srq = NULL; + struct c4iw_qp *qhp = NULL; + struct t4_cqe *rd_cqe; + int ret; + + ret = t4_next_cqe(&chp->cq, &rd_cqe); + + if (ret) + return ret; + + qhp = get_qhp(chp->rhp, CQE_QPID(rd_cqe)); + if (qhp) { + spin_lock(&qhp->lock); + srq = qhp->srq; + if (srq) + spin_lock(&srq->lock); + ret = __c4iw_poll_cq_one(chp, qhp, wc, srq); spin_unlock(&qhp->lock); + if (srq) + spin_unlock(&srq->lock); + } else { + ret = __c4iw_poll_cq_one(chp, NULL, wc, NULL); + } return ret; } @@ -876,6 +999,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int vector = attr->comp_vector; struct c4iw_dev *rhp; struct c4iw_cq *chp; + struct c4iw_create_cq ucmd; struct c4iw_create_cq_resp uresp; struct c4iw_ucontext *ucontext = NULL; int ret, wr_len; @@ -891,9 +1015,16 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, if (vector >= rhp->rdev.lldi.nciq) return ERR_PTR(-EINVAL); + if (ib_context) { + ucontext = to_c4iw_ucontext(ib_context); + if (udata->inlen < sizeof(ucmd)) + ucontext->is_32b_cqe = 1; + } + chp = kzalloc(sizeof(*chp), GFP_KERNEL); if (!chp) return ERR_PTR(-ENOMEM); + chp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL); if (!chp->wr_waitp) { ret = -ENOMEM; @@ -908,9 +1039,6 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, goto err_free_wr_wait; } - if (ib_context) - ucontext = to_c4iw_ucontext(ib_context); - /* account for the status page. */ entries++; @@ -934,13 +1062,15 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, if (hwentries < 64) hwentries = 64; - memsize = hwentries * sizeof *chp->cq.queue; + memsize = hwentries * ((ucontext && ucontext->is_32b_cqe) ? + (sizeof(*chp->cq.queue) / 2) : sizeof(*chp->cq.queue)); /* * memsize must be a multiple of the page size if its a user cq. */ if (ucontext) memsize = roundup(memsize, PAGE_SIZE); + chp->cq.size = hwentries; chp->cq.memsize = memsize; chp->cq.vector = vector; @@ -971,6 +1101,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, if (!mm2) goto err_free_mm; + memset(&uresp, 0, sizeof(uresp)); uresp.qid_mask = rhp->rdev.cqmask; uresp.cqid = chp->cq.cqid; uresp.size = chp->cq.size; @@ -980,9 +1111,16 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, ucontext->key += PAGE_SIZE; uresp.gts_key = ucontext->key; ucontext->key += PAGE_SIZE; + /* communicate to the userspace that + * kernel driver supports 64B CQE + */ + uresp.flags |= C4IW_64B_CQE; + spin_unlock(&ucontext->mmap_lock); ret = ib_copy_to_udata(udata, &uresp, - sizeof(uresp) - sizeof(uresp.reserved)); + ucontext->is_32b_cqe ? + sizeof(uresp) - sizeof(uresp.flags) : + sizeof(uresp)); if (ret) goto err_free_mm2; @@ -1019,11 +1157,6 @@ err_free_chp: return ERR_PTR(ret); } -int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata) -{ - return -ENOSYS; -} - int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { struct c4iw_cq *chp; @@ -1039,3 +1172,19 @@ int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) spin_unlock_irqrestore(&chp->lock, flag); return ret; } + +void c4iw_flush_srqidx(struct c4iw_qp *qhp, u32 srqidx) +{ + struct c4iw_cq *rchp = to_c4iw_cq(qhp->ibqp.recv_cq); + unsigned long flag; + + /* locking heirarchy: cq lock first, then qp lock. */ + spin_lock_irqsave(&rchp->lock, flag); + spin_lock(&qhp->lock); + + /* create a SRQ RECV CQE for srqidx */ + insert_recv_cqe(&qhp->wq, &rchp->cq, srqidx); + + spin_unlock(&qhp->lock); + spin_unlock_irqrestore(&rchp->lock, flag); +} diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index a3c3418afd73..c13c0ba30f63 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -275,10 +275,11 @@ static int dump_qp(int id, void *p, void *data) set_ep_sin_addrs(ep, &lsin, &rsin, &m_lsin, &m_rsin); cc = snprintf(qpd->buf + qpd->pos, space, - "rc qp sq id %u rq id %u state %u " + "rc qp sq id %u %s id %u state %u " "onchip %u ep tid %u state %u " "%pI4:%u/%u->%pI4:%u/%u\n", - qp->wq.sq.qid, qp->wq.rq.qid, + qp->wq.sq.qid, qp->srq ? "srq" : "rq", + qp->srq ? qp->srq->idx : qp->wq.rq.qid, (int)qp->attr.state, qp->wq.sq.flags & T4_SQ_ONCHIP, ep->hwtid, (int)ep->com.state, @@ -480,6 +481,9 @@ static int stats_show(struct seq_file *seq, void *v) seq_printf(seq, " QID: %10llu %10llu %10llu %10llu\n", dev->rdev.stats.qid.total, dev->rdev.stats.qid.cur, dev->rdev.stats.qid.max, dev->rdev.stats.qid.fail); + seq_printf(seq, " SRQS: %10llu %10llu %10llu %10llu\n", + dev->rdev.stats.srqt.total, dev->rdev.stats.srqt.cur, + dev->rdev.stats.srqt.max, dev->rdev.stats.srqt.fail); seq_printf(seq, " TPTMEM: %10llu %10llu %10llu %10llu\n", dev->rdev.stats.stag.total, dev->rdev.stats.stag.cur, dev->rdev.stats.stag.max, dev->rdev.stats.stag.fail); @@ -530,6 +534,8 @@ static ssize_t stats_clear(struct file *file, const char __user *buf, dev->rdev.stats.pbl.fail = 0; dev->rdev.stats.rqt.max = 0; dev->rdev.stats.rqt.fail = 0; + dev->rdev.stats.rqt.max = 0; + dev->rdev.stats.rqt.fail = 0; dev->rdev.stats.ocqp.max = 0; dev->rdev.stats.ocqp.fail = 0; dev->rdev.stats.db_full = 0; @@ -802,7 +808,7 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) rdev->qpmask = rdev->lldi.udb_density - 1; rdev->cqmask = rdev->lldi.ucq_density - 1; - pr_debug("dev %s stag start 0x%0x size 0x%0x num stags %d pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x qp qid start %u size %u cq qid start %u size %u\n", + pr_debug("dev %s stag start 0x%0x size 0x%0x num stags %d pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x qp qid start %u size %u cq qid start %u size %u srq size %u\n", pci_name(rdev->lldi.pdev), rdev->lldi.vr->stag.start, rdev->lldi.vr->stag.size, c4iw_num_stags(rdev), rdev->lldi.vr->pbl.start, @@ -811,7 +817,8 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) rdev->lldi.vr->qp.start, rdev->lldi.vr->qp.size, rdev->lldi.vr->cq.start, - rdev->lldi.vr->cq.size); + rdev->lldi.vr->cq.size, + rdev->lldi.vr->srq.size); pr_debug("udb %pR db_reg %p gts_reg %p qpmask 0x%x cqmask 0x%x\n", &rdev->lldi.pdev->resource[2], rdev->lldi.db_reg, rdev->lldi.gts_reg, @@ -824,10 +831,12 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) rdev->stats.stag.total = rdev->lldi.vr->stag.size; rdev->stats.pbl.total = rdev->lldi.vr->pbl.size; rdev->stats.rqt.total = rdev->lldi.vr->rq.size; + rdev->stats.srqt.total = rdev->lldi.vr->srq.size; rdev->stats.ocqp.total = rdev->lldi.vr->ocq.size; rdev->stats.qid.total = rdev->lldi.vr->qp.size; - err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), T4_MAX_NUM_PD); + err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), + T4_MAX_NUM_PD, rdev->lldi.vr->srq.size); if (err) { pr_err("error %d initializing resources\n", err); return err; @@ -857,6 +866,7 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) rdev->status_page->qp_size = rdev->lldi.vr->qp.size; rdev->status_page->cq_start = rdev->lldi.vr->cq.start; rdev->status_page->cq_size = rdev->lldi.vr->cq.size; + rdev->status_page->write_cmpl_supported = rdev->lldi.write_cmpl_support; if (c4iw_wr_log) { rdev->wr_log = kcalloc(1 << c4iw_wr_log_size_order, diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c index 3e9d8b277ab9..8741d23168f3 100644 --- a/drivers/infiniband/hw/cxgb4/ev.c +++ b/drivers/infiniband/hw/cxgb4/ev.c @@ -70,9 +70,10 @@ static void dump_err_cqe(struct c4iw_dev *dev, struct t4_cqe *err_cqe) CQE_STATUS(err_cqe), CQE_TYPE(err_cqe), ntohl(err_cqe->len), CQE_WRID_HI(err_cqe), CQE_WRID_LOW(err_cqe)); - pr_debug("%016llx %016llx %016llx %016llx\n", + pr_debug("%016llx %016llx %016llx %016llx - %016llx %016llx %016llx %016llx\n", be64_to_cpu(p[0]), be64_to_cpu(p[1]), be64_to_cpu(p[2]), - be64_to_cpu(p[3])); + be64_to_cpu(p[3]), be64_to_cpu(p[4]), be64_to_cpu(p[5]), + be64_to_cpu(p[6]), be64_to_cpu(p[7])); /* * Ingress WRITE and READ_RESP errors provide diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 870649ff049c..f0fceadd0d12 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -97,6 +97,7 @@ struct c4iw_resource { struct c4iw_id_table tpt_table; struct c4iw_id_table qid_table; struct c4iw_id_table pdid_table; + struct c4iw_id_table srq_table; }; struct c4iw_qid_list { @@ -130,6 +131,8 @@ struct c4iw_stats { struct c4iw_stat stag; struct c4iw_stat pbl; struct c4iw_stat rqt; + struct c4iw_stat srqt; + struct c4iw_stat srq; struct c4iw_stat ocqp; u64 db_full; u64 db_empty; @@ -549,6 +552,7 @@ struct c4iw_qp { struct kref kref; wait_queue_head_t wait; int sq_sig_all; + struct c4iw_srq *srq; struct work_struct free_work; struct c4iw_ucontext *ucontext; struct c4iw_wr_wait *wr_waitp; @@ -559,6 +563,26 @@ static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp) return container_of(ibqp, struct c4iw_qp, ibqp); } +struct c4iw_srq { + struct ib_srq ibsrq; + struct list_head db_fc_entry; + struct c4iw_dev *rhp; + struct t4_srq wq; + struct sk_buff *destroy_skb; + u32 srq_limit; + u32 pdid; + int idx; + u32 flags; + spinlock_t lock; /* protects srq */ + struct c4iw_wr_wait *wr_waitp; + bool armed; +}; + +static inline struct c4iw_srq *to_c4iw_srq(struct ib_srq *ibsrq) +{ + return container_of(ibsrq, struct c4iw_srq, ibsrq); +} + struct c4iw_ucontext { struct ib_ucontext ibucontext; struct c4iw_dev_ucontext uctx; @@ -566,6 +590,7 @@ struct c4iw_ucontext { spinlock_t mmap_lock; struct list_head mmaps; struct kref kref; + bool is_32b_cqe; }; static inline struct c4iw_ucontext *to_c4iw_ucontext(struct ib_ucontext *c) @@ -885,7 +910,10 @@ enum conn_pre_alloc_buffers { CN_MAX_CON_BUF }; -#define FLOWC_LEN 80 +enum { + FLOWC_LEN = offsetof(struct fw_flowc_wr, mnemval[FW_FLOWC_MNEM_MAX]) +}; + union cpl_wr_size { struct cpl_abort_req abrt_req; struct cpl_abort_rpl abrt_rpl; @@ -952,6 +980,7 @@ struct c4iw_ep { unsigned int retry_count; int snd_win; int rcv_win; + u32 snd_wscale; struct c4iw_ep_stats stats; }; @@ -988,7 +1017,8 @@ void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qpid, struct c4iw_dev_ucontext *uctx); u32 c4iw_get_resource(struct c4iw_id_table *id_table); void c4iw_put_resource(struct c4iw_id_table *id_table, u32 entry); -int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid); +int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, + u32 nr_pdid, u32 nr_srqt); int c4iw_init_ctrl_qp(struct c4iw_rdev *rdev); int c4iw_pblpool_create(struct c4iw_rdev *rdev); int c4iw_rqtpool_create(struct c4iw_rdev *rdev); @@ -1007,10 +1037,10 @@ void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev, void c4iw_init_dev_ucontext(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx); int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); -int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr); -int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); +int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr); +int c4iw_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog); int c4iw_destroy_listen(struct iw_cm_id *cm_id); @@ -1037,8 +1067,14 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, struct ib_ucontext *ib_context, struct ib_udata *udata); -int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata); int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); +int c4iw_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask srq_attr_mask, + struct ib_udata *udata); +int c4iw_destroy_srq(struct ib_srq *ib_srq); +struct ib_srq *c4iw_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *attrs, + struct ib_udata *udata); int c4iw_destroy_qp(struct ib_qp *ib_qp); struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, @@ -1075,12 +1111,19 @@ extern c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS]; void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid, enum cxgb4_bar2_qtype qtype, unsigned int *pbar2_qid, u64 *pbar2_pa); +int c4iw_alloc_srq_idx(struct c4iw_rdev *rdev); +void c4iw_free_srq_idx(struct c4iw_rdev *rdev, int idx); extern void c4iw_log_wr_stats(struct t4_wq *wq, struct t4_cqe *cqe); extern int c4iw_wr_log; extern int db_fc_threshold; extern int db_coalescing_threshold; extern int use_dsgl; void c4iw_invalidate_mr(struct c4iw_dev *rhp, u32 rkey); +void c4iw_dispatch_srq_limit_reached_event(struct c4iw_srq *srq); +void c4iw_copy_wr_to_srq(struct t4_srq *srq, union t4_recv_wr *wqe, u8 len16); +void c4iw_flush_srqidx(struct c4iw_qp *qhp, u32 srqidx); +int c4iw_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); struct c4iw_wr_wait *c4iw_alloc_wr_wait(gfp_t gfp); typedef int c4iw_restrack_func(struct sk_buff *msg, diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 1feade8bb4b3..4eda6872e617 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -58,41 +58,6 @@ static int fastreg_support = 1; module_param(fastreg_support, int, 0644); MODULE_PARM_DESC(fastreg_support, "Advertise fastreg support (default=1)"); -static struct ib_ah *c4iw_ah_create(struct ib_pd *pd, - struct rdma_ah_attr *ah_attr, - struct ib_udata *udata) - -{ - return ERR_PTR(-ENOSYS); -} - -static int c4iw_ah_destroy(struct ib_ah *ah) -{ - return -ENOSYS; -} - -static int c4iw_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) -{ - return -ENOSYS; -} - -static int c4iw_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) -{ - return -ENOSYS; -} - -static int c4iw_process_mad(struct ib_device *ibdev, int mad_flags, - u8 port_num, const struct ib_wc *in_wc, - const struct ib_grh *in_grh, - const struct ib_mad_hdr *in_mad, - size_t in_mad_size, - struct ib_mad_hdr *out_mad, - size_t *out_mad_size, - u16 *out_mad_pkey_index) -{ - return -ENOSYS; -} - void _c4iw_free_ucontext(struct kref *kref) { struct c4iw_ucontext *ucontext; @@ -342,8 +307,12 @@ static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *pro props->vendor_part_id = (u32)dev->rdev.lldi.pdev->device; props->max_mr_size = T4_MAX_MR_SIZE; props->max_qp = dev->rdev.lldi.vr->qp.size / 2; + props->max_srq = dev->rdev.lldi.vr->srq.size; props->max_qp_wr = dev->rdev.hw_queue.t4_max_qp_depth; - props->max_sge = T4_MAX_RECV_SGE; + props->max_srq_wr = dev->rdev.hw_queue.t4_max_qp_depth; + props->max_send_sge = min(T4_MAX_SEND_SGE, T4_MAX_WRITE_SGE); + props->max_recv_sge = T4_MAX_RECV_SGE; + props->max_srq_sge = T4_MAX_RECV_SGE; props->max_sge_rd = 1; props->max_res_rd_atom = dev->rdev.lldi.max_ird_adapter; props->max_qp_rd_atom = min(dev->rdev.lldi.max_ordird_qp, @@ -592,7 +561,10 @@ void c4iw_register_device(struct work_struct *work) (1ull << IB_USER_VERBS_CMD_POLL_CQ) | (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | (1ull << IB_USER_VERBS_CMD_POST_SEND) | - (1ull << IB_USER_VERBS_CMD_POST_RECV); + (1ull << IB_USER_VERBS_CMD_POST_RECV) | + (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | + (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); dev->ibdev.node_type = RDMA_NODE_RNIC; BUILD_BUG_ON(sizeof(C4IW_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); memcpy(dev->ibdev.node_desc, C4IW_NODE_DESC, sizeof(C4IW_NODE_DESC)); @@ -608,15 +580,15 @@ void c4iw_register_device(struct work_struct *work) dev->ibdev.mmap = c4iw_mmap; dev->ibdev.alloc_pd = c4iw_allocate_pd; dev->ibdev.dealloc_pd = c4iw_deallocate_pd; - dev->ibdev.create_ah = c4iw_ah_create; - dev->ibdev.destroy_ah = c4iw_ah_destroy; dev->ibdev.create_qp = c4iw_create_qp; dev->ibdev.modify_qp = c4iw_ib_modify_qp; dev->ibdev.query_qp = c4iw_ib_query_qp; dev->ibdev.destroy_qp = c4iw_destroy_qp; + dev->ibdev.create_srq = c4iw_create_srq; + dev->ibdev.modify_srq = c4iw_modify_srq; + dev->ibdev.destroy_srq = c4iw_destroy_srq; dev->ibdev.create_cq = c4iw_create_cq; dev->ibdev.destroy_cq = c4iw_destroy_cq; - dev->ibdev.resize_cq = c4iw_resize_cq; dev->ibdev.poll_cq = c4iw_poll_cq; dev->ibdev.get_dma_mr = c4iw_get_dma_mr; dev->ibdev.reg_user_mr = c4iw_reg_user_mr; @@ -625,12 +597,10 @@ void c4iw_register_device(struct work_struct *work) dev->ibdev.dealloc_mw = c4iw_dealloc_mw; dev->ibdev.alloc_mr = c4iw_alloc_mr; dev->ibdev.map_mr_sg = c4iw_map_mr_sg; - dev->ibdev.attach_mcast = c4iw_multicast_attach; - dev->ibdev.detach_mcast = c4iw_multicast_detach; - dev->ibdev.process_mad = c4iw_process_mad; dev->ibdev.req_notify_cq = c4iw_arm_cq; dev->ibdev.post_send = c4iw_post_send; dev->ibdev.post_recv = c4iw_post_receive; + dev->ibdev.post_srq_recv = c4iw_post_srq_recv; dev->ibdev.alloc_hw_stats = c4iw_alloc_stats; dev->ibdev.get_hw_stats = c4iw_get_mib; dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION; diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index aef53305f1c3..b3203afa3b1d 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -147,21 +147,24 @@ static int alloc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq, int user) } static int destroy_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, - struct c4iw_dev_ucontext *uctx) + struct c4iw_dev_ucontext *uctx, int has_rq) { /* * uP clears EQ contexts when the connection exits rdma mode, * so no need to post a RESET WR for these EQs. */ - dma_free_coherent(&(rdev->lldi.pdev->dev), - wq->rq.memsize, wq->rq.queue, - dma_unmap_addr(&wq->rq, mapping)); dealloc_sq(rdev, &wq->sq); - c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); - kfree(wq->rq.sw_rq); kfree(wq->sq.sw_sq); - c4iw_put_qpid(rdev, wq->rq.qid, uctx); c4iw_put_qpid(rdev, wq->sq.qid, uctx); + + if (has_rq) { + dma_free_coherent(&rdev->lldi.pdev->dev, + wq->rq.memsize, wq->rq.queue, + dma_unmap_addr(&wq->rq, mapping)); + c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); + kfree(wq->rq.sw_rq); + c4iw_put_qpid(rdev, wq->rq.qid, uctx); + } return 0; } @@ -195,7 +198,8 @@ void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid, static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, struct t4_cq *rcq, struct t4_cq *scq, struct c4iw_dev_ucontext *uctx, - struct c4iw_wr_wait *wr_waitp) + struct c4iw_wr_wait *wr_waitp, + int need_rq) { int user = (uctx != &rdev->uctx); struct fw_ri_res_wr *res_wr; @@ -209,10 +213,12 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, if (!wq->sq.qid) return -ENOMEM; - wq->rq.qid = c4iw_get_qpid(rdev, uctx); - if (!wq->rq.qid) { - ret = -ENOMEM; - goto free_sq_qid; + if (need_rq) { + wq->rq.qid = c4iw_get_qpid(rdev, uctx); + if (!wq->rq.qid) { + ret = -ENOMEM; + goto free_sq_qid; + } } if (!user) { @@ -220,25 +226,31 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, GFP_KERNEL); if (!wq->sq.sw_sq) { ret = -ENOMEM; - goto free_rq_qid; + goto free_rq_qid;//FIXME } - wq->rq.sw_rq = kcalloc(wq->rq.size, sizeof(*wq->rq.sw_rq), - GFP_KERNEL); - if (!wq->rq.sw_rq) { - ret = -ENOMEM; - goto free_sw_sq; + if (need_rq) { + wq->rq.sw_rq = kcalloc(wq->rq.size, + sizeof(*wq->rq.sw_rq), + GFP_KERNEL); + if (!wq->rq.sw_rq) { + ret = -ENOMEM; + goto free_sw_sq; + } } } - /* - * RQT must be a power of 2 and at least 16 deep. - */ - wq->rq.rqt_size = roundup_pow_of_two(max_t(u16, wq->rq.size, 16)); - wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size); - if (!wq->rq.rqt_hwaddr) { - ret = -ENOMEM; - goto free_sw_rq; + if (need_rq) { + /* + * RQT must be a power of 2 and at least 16 deep. + */ + wq->rq.rqt_size = + roundup_pow_of_two(max_t(u16, wq->rq.size, 16)); + wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size); + if (!wq->rq.rqt_hwaddr) { + ret = -ENOMEM; + goto free_sw_rq; + } } ret = alloc_sq(rdev, &wq->sq, user); @@ -247,34 +259,39 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, memset(wq->sq.queue, 0, wq->sq.memsize); dma_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr); - wq->rq.queue = dma_alloc_coherent(&(rdev->lldi.pdev->dev), - wq->rq.memsize, &(wq->rq.dma_addr), - GFP_KERNEL); - if (!wq->rq.queue) { - ret = -ENOMEM; - goto free_sq; + if (need_rq) { + wq->rq.queue = dma_alloc_coherent(&rdev->lldi.pdev->dev, + wq->rq.memsize, + &wq->rq.dma_addr, + GFP_KERNEL); + if (!wq->rq.queue) { + ret = -ENOMEM; + goto free_sq; + } + pr_debug("sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n", + wq->sq.queue, + (unsigned long long)virt_to_phys(wq->sq.queue), + wq->rq.queue, + (unsigned long long)virt_to_phys(wq->rq.queue)); + memset(wq->rq.queue, 0, wq->rq.memsize); + dma_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr); } - pr_debug("sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n", - wq->sq.queue, - (unsigned long long)virt_to_phys(wq->sq.queue), - wq->rq.queue, - (unsigned long long)virt_to_phys(wq->rq.queue)); - memset(wq->rq.queue, 0, wq->rq.memsize); - dma_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr); wq->db = rdev->lldi.db_reg; wq->sq.bar2_va = c4iw_bar2_addrs(rdev, wq->sq.qid, T4_BAR2_QTYPE_EGRESS, &wq->sq.bar2_qid, user ? &wq->sq.bar2_pa : NULL); - wq->rq.bar2_va = c4iw_bar2_addrs(rdev, wq->rq.qid, T4_BAR2_QTYPE_EGRESS, - &wq->rq.bar2_qid, - user ? &wq->rq.bar2_pa : NULL); + if (need_rq) + wq->rq.bar2_va = c4iw_bar2_addrs(rdev, wq->rq.qid, + T4_BAR2_QTYPE_EGRESS, + &wq->rq.bar2_qid, + user ? &wq->rq.bar2_pa : NULL); /* * User mode must have bar2 access. */ - if (user && (!wq->sq.bar2_pa || !wq->rq.bar2_pa)) { + if (user && (!wq->sq.bar2_pa || (need_rq && !wq->rq.bar2_pa))) { pr_warn("%s: sqid %u or rqid %u not in BAR2 range\n", pci_name(rdev->lldi.pdev), wq->sq.qid, wq->rq.qid); goto free_dma; @@ -285,7 +302,8 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, /* build fw_ri_res_wr */ wr_len = sizeof *res_wr + 2 * sizeof *res; - + if (need_rq) + wr_len += sizeof(*res); skb = alloc_skb(wr_len, GFP_KERNEL); if (!skb) { ret = -ENOMEM; @@ -296,7 +314,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, res_wr = __skb_put_zero(skb, wr_len); res_wr->op_nres = cpu_to_be32( FW_WR_OP_V(FW_RI_RES_WR) | - FW_RI_RES_WR_NRES_V(2) | + FW_RI_RES_WR_NRES_V(need_rq ? 2 : 1) | FW_WR_COMPL_F); res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); res_wr->cookie = (uintptr_t)wr_waitp; @@ -327,30 +345,36 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, FW_RI_RES_WR_EQSIZE_V(eqsize)); res->u.sqrq.eqid = cpu_to_be32(wq->sq.qid); res->u.sqrq.eqaddr = cpu_to_be64(wq->sq.dma_addr); - res++; - res->u.sqrq.restype = FW_RI_RES_TYPE_RQ; - res->u.sqrq.op = FW_RI_RES_OP_WRITE; - /* - * eqsize is the number of 64B entries plus the status page size. - */ - eqsize = wq->rq.size * T4_RQ_NUM_SLOTS + - rdev->hw_queue.t4_eq_status_entries; - res->u.sqrq.fetchszm_to_iqid = cpu_to_be32( - FW_RI_RES_WR_HOSTFCMODE_V(0) | /* no host cidx updates */ - FW_RI_RES_WR_CPRIO_V(0) | /* don't keep in chip cache */ - FW_RI_RES_WR_PCIECHN_V(0) | /* set by uP at ri_init time */ - FW_RI_RES_WR_IQID_V(rcq->cqid)); - res->u.sqrq.dcaen_to_eqsize = cpu_to_be32( - FW_RI_RES_WR_DCAEN_V(0) | - FW_RI_RES_WR_DCACPU_V(0) | - FW_RI_RES_WR_FBMIN_V(2) | - FW_RI_RES_WR_FBMAX_V(3) | - FW_RI_RES_WR_CIDXFTHRESHO_V(0) | - FW_RI_RES_WR_CIDXFTHRESH_V(0) | - FW_RI_RES_WR_EQSIZE_V(eqsize)); - res->u.sqrq.eqid = cpu_to_be32(wq->rq.qid); - res->u.sqrq.eqaddr = cpu_to_be64(wq->rq.dma_addr); + if (need_rq) { + res++; + res->u.sqrq.restype = FW_RI_RES_TYPE_RQ; + res->u.sqrq.op = FW_RI_RES_OP_WRITE; + + /* + * eqsize is the number of 64B entries plus the status page size + */ + eqsize = wq->rq.size * T4_RQ_NUM_SLOTS + + rdev->hw_queue.t4_eq_status_entries; + res->u.sqrq.fetchszm_to_iqid = + /* no host cidx updates */ + cpu_to_be32(FW_RI_RES_WR_HOSTFCMODE_V(0) | + /* don't keep in chip cache */ + FW_RI_RES_WR_CPRIO_V(0) | + /* set by uP at ri_init time */ + FW_RI_RES_WR_PCIECHN_V(0) | + FW_RI_RES_WR_IQID_V(rcq->cqid)); + res->u.sqrq.dcaen_to_eqsize = + cpu_to_be32(FW_RI_RES_WR_DCAEN_V(0) | + FW_RI_RES_WR_DCACPU_V(0) | + FW_RI_RES_WR_FBMIN_V(2) | + FW_RI_RES_WR_FBMAX_V(3) | + FW_RI_RES_WR_CIDXFTHRESHO_V(0) | + FW_RI_RES_WR_CIDXFTHRESH_V(0) | + FW_RI_RES_WR_EQSIZE_V(eqsize)); + res->u.sqrq.eqid = cpu_to_be32(wq->rq.qid); + res->u.sqrq.eqaddr = cpu_to_be64(wq->rq.dma_addr); + } c4iw_init_wr_wait(wr_waitp); ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, wq->sq.qid, __func__); @@ -363,26 +387,30 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, return 0; free_dma: - dma_free_coherent(&(rdev->lldi.pdev->dev), - wq->rq.memsize, wq->rq.queue, - dma_unmap_addr(&wq->rq, mapping)); + if (need_rq) + dma_free_coherent(&rdev->lldi.pdev->dev, + wq->rq.memsize, wq->rq.queue, + dma_unmap_addr(&wq->rq, mapping)); free_sq: dealloc_sq(rdev, &wq->sq); free_hwaddr: - c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); + if (need_rq) + c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); free_sw_rq: - kfree(wq->rq.sw_rq); + if (need_rq) + kfree(wq->rq.sw_rq); free_sw_sq: kfree(wq->sq.sw_sq); free_rq_qid: - c4iw_put_qpid(rdev, wq->rq.qid, uctx); + if (need_rq) + c4iw_put_qpid(rdev, wq->rq.qid, uctx); free_sq_qid: c4iw_put_qpid(rdev, wq->sq.qid, uctx); return ret; } static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp, - struct ib_send_wr *wr, int max, u32 *plenp) + const struct ib_send_wr *wr, int max, u32 *plenp) { u8 *dstp, *srcp; u32 plen = 0; @@ -427,7 +455,12 @@ static int build_isgl(__be64 *queue_start, __be64 *queue_end, { int i; u32 plen = 0; - __be64 *flitp = (__be64 *)isglp->sge; + __be64 *flitp; + + if ((__be64 *)isglp == queue_end) + isglp = (struct fw_ri_isgl *)queue_start; + + flitp = (__be64 *)isglp->sge; for (i = 0; i < num_sge; i++) { if ((plen + sg_list[i].length) < plen) @@ -452,7 +485,7 @@ static int build_isgl(__be64 *queue_start, __be64 *queue_end, } static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe, - struct ib_send_wr *wr, u8 *len16) + const struct ib_send_wr *wr, u8 *len16) { u32 plen; int size; @@ -519,7 +552,7 @@ static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe, } static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe, - struct ib_send_wr *wr, u8 *len16) + const struct ib_send_wr *wr, u8 *len16) { u32 plen; int size; @@ -527,7 +560,15 @@ static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe, if (wr->num_sge > T4_MAX_SEND_SGE) return -EINVAL; - wqe->write.r2 = 0; + + /* + * iWARP protocol supports 64 bit immediate data but rdma api + * limits it to 32bit. + */ + if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) + wqe->write.iw_imm_data.ib_imm_data.imm_data32 = wr->ex.imm_data; + else + wqe->write.iw_imm_data.ib_imm_data.imm_data32 = 0; wqe->write.stag_sink = cpu_to_be32(rdma_wr(wr)->rkey); wqe->write.to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr); if (wr->num_sge) { @@ -561,7 +602,58 @@ static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe, return 0; } -static int build_rdma_read(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) +static void build_immd_cmpl(struct t4_sq *sq, struct fw_ri_immd_cmpl *immdp, + struct ib_send_wr *wr) +{ + memcpy((u8 *)immdp->data, (u8 *)(uintptr_t)wr->sg_list->addr, 16); + memset(immdp->r1, 0, 6); + immdp->op = FW_RI_DATA_IMMD; + immdp->immdlen = 16; +} + +static void build_rdma_write_cmpl(struct t4_sq *sq, + struct fw_ri_rdma_write_cmpl_wr *wcwr, + const struct ib_send_wr *wr, u8 *len16) +{ + u32 plen; + int size; + + /* + * This code assumes the struct fields preceding the write isgl + * fit in one 64B WR slot. This is because the WQE is built + * directly in the dma queue, and wrapping is only handled + * by the code buildling sgls. IE the "fixed part" of the wr + * structs must all fit in 64B. The WQE build code should probably be + * redesigned to avoid this restriction, but for now just add + * the BUILD_BUG_ON() to catch if this WQE struct gets too big. + */ + BUILD_BUG_ON(offsetof(struct fw_ri_rdma_write_cmpl_wr, u) > 64); + + wcwr->stag_sink = cpu_to_be32(rdma_wr(wr)->rkey); + wcwr->to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr); + wcwr->stag_inv = cpu_to_be32(wr->next->ex.invalidate_rkey); + wcwr->r2 = 0; + wcwr->r3 = 0; + + /* SEND_INV SGL */ + if (wr->next->send_flags & IB_SEND_INLINE) + build_immd_cmpl(sq, &wcwr->u_cmpl.immd_src, wr->next); + else + build_isgl((__be64 *)sq->queue, (__be64 *)&sq->queue[sq->size], + &wcwr->u_cmpl.isgl_src, wr->next->sg_list, 1, NULL); + + /* WRITE SGL */ + build_isgl((__be64 *)sq->queue, (__be64 *)&sq->queue[sq->size], + wcwr->u.isgl_src, wr->sg_list, wr->num_sge, &plen); + + size = sizeof(*wcwr) + sizeof(struct fw_ri_isgl) + + wr->num_sge * sizeof(struct fw_ri_sge); + wcwr->plen = cpu_to_be32(plen); + *len16 = DIV_ROUND_UP(size, 16); +} + +static int build_rdma_read(union t4_wr *wqe, const struct ib_send_wr *wr, + u8 *len16) { if (wr->num_sge > 1) return -EINVAL; @@ -590,8 +682,74 @@ static int build_rdma_read(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) return 0; } +static void post_write_cmpl(struct c4iw_qp *qhp, const struct ib_send_wr *wr) +{ + bool send_signaled = (wr->next->send_flags & IB_SEND_SIGNALED) || + qhp->sq_sig_all; + bool write_signaled = (wr->send_flags & IB_SEND_SIGNALED) || + qhp->sq_sig_all; + struct t4_swsqe *swsqe; + union t4_wr *wqe; + u16 write_wrid; + u8 len16; + u16 idx; + + /* + * The sw_sq entries still look like a WRITE and a SEND and consume + * 2 slots. The FW WR, however, will be a single uber-WR. + */ + wqe = (union t4_wr *)((u8 *)qhp->wq.sq.queue + + qhp->wq.sq.wq_pidx * T4_EQ_ENTRY_SIZE); + build_rdma_write_cmpl(&qhp->wq.sq, &wqe->write_cmpl, wr, &len16); + + /* WRITE swsqe */ + swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx]; + swsqe->opcode = FW_RI_RDMA_WRITE; + swsqe->idx = qhp->wq.sq.pidx; + swsqe->complete = 0; + swsqe->signaled = write_signaled; + swsqe->flushed = 0; + swsqe->wr_id = wr->wr_id; + if (c4iw_wr_log) { + swsqe->sge_ts = + cxgb4_read_sge_timestamp(qhp->rhp->rdev.lldi.ports[0]); + swsqe->host_time = ktime_get(); + } + + write_wrid = qhp->wq.sq.pidx; + + /* just bump the sw_sq */ + qhp->wq.sq.in_use++; + if (++qhp->wq.sq.pidx == qhp->wq.sq.size) + qhp->wq.sq.pidx = 0; + + /* SEND_WITH_INV swsqe */ + swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx]; + swsqe->opcode = FW_RI_SEND_WITH_INV; + swsqe->idx = qhp->wq.sq.pidx; + swsqe->complete = 0; + swsqe->signaled = send_signaled; + swsqe->flushed = 0; + swsqe->wr_id = wr->next->wr_id; + if (c4iw_wr_log) { + swsqe->sge_ts = + cxgb4_read_sge_timestamp(qhp->rhp->rdev.lldi.ports[0]); + swsqe->host_time = ktime_get(); + } + + wqe->write_cmpl.flags_send = send_signaled ? FW_RI_COMPLETION_FLAG : 0; + wqe->write_cmpl.wrid_send = qhp->wq.sq.pidx; + + init_wr_hdr(wqe, write_wrid, FW_RI_RDMA_WRITE_CMPL_WR, + write_signaled ? FW_RI_COMPLETION_FLAG : 0, len16); + t4_sq_produce(&qhp->wq, len16); + idx = DIV_ROUND_UP(len16 * 16, T4_EQ_ENTRY_SIZE); + + t4_ring_sq_db(&qhp->wq, idx, wqe); +} + static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe, - struct ib_recv_wr *wr, u8 *len16) + const struct ib_recv_wr *wr, u8 *len16) { int ret; @@ -605,8 +763,22 @@ static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe, return 0; } +static int build_srq_recv(union t4_recv_wr *wqe, const struct ib_recv_wr *wr, + u8 *len16) +{ + int ret; + + ret = build_isgl((__be64 *)wqe, (__be64 *)(wqe + 1), + &wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL); + if (ret) + return ret; + *len16 = DIV_ROUND_UP(sizeof(wqe->recv) + + wr->num_sge * sizeof(struct fw_ri_sge), 16); + return 0; +} + static void build_tpte_memreg(struct fw_ri_fr_nsmr_tpte_wr *fr, - struct ib_reg_wr *wr, struct c4iw_mr *mhp, + const struct ib_reg_wr *wr, struct c4iw_mr *mhp, u8 *len16) { __be64 *p = (__be64 *)fr->pbl; @@ -638,8 +810,8 @@ static void build_tpte_memreg(struct fw_ri_fr_nsmr_tpte_wr *fr, } static int build_memreg(struct t4_sq *sq, union t4_wr *wqe, - struct ib_reg_wr *wr, struct c4iw_mr *mhp, u8 *len16, - bool dsgl_supported) + const struct ib_reg_wr *wr, struct c4iw_mr *mhp, + u8 *len16, bool dsgl_supported) { struct fw_ri_immd *imdp; __be64 *p; @@ -701,7 +873,8 @@ static int build_memreg(struct t4_sq *sq, union t4_wr *wqe, return 0; } -static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) +static int build_inv_stag(union t4_wr *wqe, const struct ib_send_wr *wr, + u8 *len16) { wqe->inv.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey); wqe->inv.r2 = 0; @@ -721,7 +894,7 @@ static void free_qp_work(struct work_struct *work) pr_debug("qhp %p ucontext %p\n", qhp, ucontext); destroy_qp(&rhp->rdev, &qhp->wq, - ucontext ? &ucontext->uctx : &rhp->rdev.uctx); + ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !qhp->srq); if (ucontext) c4iw_put_ucontext(ucontext); @@ -804,6 +977,9 @@ static int ib_to_fw_opcode(int ib_opcode) case IB_WR_RDMA_WRITE: opcode = FW_RI_RDMA_WRITE; break; + case IB_WR_RDMA_WRITE_WITH_IMM: + opcode = FW_RI_WRITE_IMMEDIATE; + break; case IB_WR_RDMA_READ: case IB_WR_RDMA_READ_WITH_INV: opcode = FW_RI_READ_REQ; @@ -820,7 +996,8 @@ static int ib_to_fw_opcode(int ib_opcode) return opcode; } -static int complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr) +static int complete_sq_drain_wr(struct c4iw_qp *qhp, + const struct ib_send_wr *wr) { struct t4_cqe cqe = {}; struct c4iw_cq *schp; @@ -858,8 +1035,9 @@ static int complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr) return 0; } -static int complete_sq_drain_wrs(struct c4iw_qp *qhp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +static int complete_sq_drain_wrs(struct c4iw_qp *qhp, + const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { int ret = 0; @@ -874,7 +1052,8 @@ static int complete_sq_drain_wrs(struct c4iw_qp *qhp, struct ib_send_wr *wr, return ret; } -static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr) +static void complete_rq_drain_wr(struct c4iw_qp *qhp, + const struct ib_recv_wr *wr) { struct t4_cqe cqe = {}; struct c4iw_cq *rchp; @@ -906,7 +1085,8 @@ static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr) } } -static void complete_rq_drain_wrs(struct c4iw_qp *qhp, struct ib_recv_wr *wr) +static void complete_rq_drain_wrs(struct c4iw_qp *qhp, + const struct ib_recv_wr *wr) { while (wr) { complete_rq_drain_wr(qhp, wr); @@ -914,14 +1094,15 @@ static void complete_rq_drain_wrs(struct c4iw_qp *qhp, struct ib_recv_wr *wr) } } -int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { int err = 0; u8 len16 = 0; enum fw_wr_opcodes fw_opcode = 0; enum fw_ri_wr_flags fw_flags; struct c4iw_qp *qhp; + struct c4iw_dev *rhp; union t4_wr *wqe = NULL; u32 num_wrs; struct t4_swsqe *swsqe; @@ -929,6 +1110,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, u16 idx = 0; qhp = to_c4iw_qp(ibqp); + rhp = qhp->rhp; spin_lock_irqsave(&qhp->lock, flag); /* @@ -946,6 +1128,30 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, *bad_wr = wr; return -ENOMEM; } + + /* + * Fastpath for NVMe-oF target WRITE + SEND_WITH_INV wr chain which is + * the response for small NVMEe-oF READ requests. If the chain is + * exactly a WRITE->SEND_WITH_INV and the sgl depths and lengths + * meet the requirements of the fw_ri_write_cmpl_wr work request, + * then build and post the write_cmpl WR. If any of the tests + * below are not true, then we continue on with the tradtional WRITE + * and SEND WRs. + */ + if (qhp->rhp->rdev.lldi.write_cmpl_support && + CHELSIO_CHIP_VERSION(qhp->rhp->rdev.lldi.adapter_type) >= + CHELSIO_T5 && + wr && wr->next && !wr->next->next && + wr->opcode == IB_WR_RDMA_WRITE && + wr->sg_list[0].length && wr->num_sge <= T4_WRITE_CMPL_MAX_SGL && + wr->next->opcode == IB_WR_SEND_WITH_INV && + wr->next->sg_list[0].length == T4_WRITE_CMPL_MAX_CQE && + wr->next->num_sge == 1 && num_wrs >= 2) { + post_write_cmpl(qhp, wr); + spin_unlock_irqrestore(&qhp->lock, flag); + return 0; + } + while (wr) { if (num_wrs == 0) { err = -ENOMEM; @@ -973,6 +1179,13 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, swsqe->opcode = FW_RI_SEND_WITH_INV; err = build_rdma_send(&qhp->wq.sq, wqe, wr, &len16); break; + case IB_WR_RDMA_WRITE_WITH_IMM: + if (unlikely(!rhp->rdev.lldi.write_w_imm_support)) { + err = -EINVAL; + break; + } + fw_flags |= FW_RI_RDMA_WRITE_WITH_IMMEDIATE; + /*FALLTHROUGH*/ case IB_WR_RDMA_WRITE: fw_opcode = FW_RI_RDMA_WRITE_WR; swsqe->opcode = FW_RI_RDMA_WRITE; @@ -983,8 +1196,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, fw_opcode = FW_RI_RDMA_READ_WR; swsqe->opcode = FW_RI_READ_REQ; if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) { - c4iw_invalidate_mr(qhp->rhp, - wr->sg_list[0].lkey); + c4iw_invalidate_mr(rhp, wr->sg_list[0].lkey); fw_flags = FW_RI_RDMA_READ_INVALIDATE; } else { fw_flags = 0; @@ -1000,7 +1212,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct c4iw_mr *mhp = to_c4iw_mr(reg_wr(wr)->mr); swsqe->opcode = FW_RI_FAST_REGISTER; - if (qhp->rhp->rdev.lldi.fr_nsmr_tpte_wr_support && + if (rhp->rdev.lldi.fr_nsmr_tpte_wr_support && !mhp->attr.state && mhp->mpl_len <= 2) { fw_opcode = FW_RI_FR_NSMR_TPTE_WR; build_tpte_memreg(&wqe->fr_tpte, reg_wr(wr), @@ -1009,7 +1221,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, fw_opcode = FW_RI_FR_NSMR_WR; err = build_memreg(&qhp->wq.sq, wqe, reg_wr(wr), mhp, &len16, - qhp->rhp->rdev.lldi.ulptx_memwrite_dsgl); + rhp->rdev.lldi.ulptx_memwrite_dsgl); if (err) break; } @@ -1022,7 +1234,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, fw_opcode = FW_RI_INV_LSTAG_WR; swsqe->opcode = FW_RI_LOCAL_INV; err = build_inv_stag(wqe, wr, &len16); - c4iw_invalidate_mr(qhp->rhp, wr->ex.invalidate_rkey); + c4iw_invalidate_mr(rhp, wr->ex.invalidate_rkey); break; default: pr_warn("%s post of type=%d TBD!\n", __func__, @@ -1041,7 +1253,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, swsqe->wr_id = wr->wr_id; if (c4iw_wr_log) { swsqe->sge_ts = cxgb4_read_sge_timestamp( - qhp->rhp->rdev.lldi.ports[0]); + rhp->rdev.lldi.ports[0]); swsqe->host_time = ktime_get(); } @@ -1055,7 +1267,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, t4_sq_produce(&qhp->wq, len16); idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); } - if (!qhp->rhp->rdev.status_page->db_off) { + if (!rhp->rdev.status_page->db_off) { t4_ring_sq_db(&qhp->wq, idx, wqe); spin_unlock_irqrestore(&qhp->lock, flag); } else { @@ -1065,8 +1277,8 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, return err; } -int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int c4iw_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { int err = 0; struct c4iw_qp *qhp; @@ -1145,6 +1357,89 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, return err; } +static void defer_srq_wr(struct t4_srq *srq, union t4_recv_wr *wqe, + u64 wr_id, u8 len16) +{ + struct t4_srq_pending_wr *pwr = &srq->pending_wrs[srq->pending_pidx]; + + pr_debug("%s cidx %u pidx %u wq_pidx %u in_use %u ooo_count %u wr_id 0x%llx pending_cidx %u pending_pidx %u pending_in_use %u\n", + __func__, srq->cidx, srq->pidx, srq->wq_pidx, + srq->in_use, srq->ooo_count, + (unsigned long long)wr_id, srq->pending_cidx, + srq->pending_pidx, srq->pending_in_use); + pwr->wr_id = wr_id; + pwr->len16 = len16; + memcpy(&pwr->wqe, wqe, len16 * 16); + t4_srq_produce_pending_wr(srq); +} + +int c4iw_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) +{ + union t4_recv_wr *wqe, lwqe; + struct c4iw_srq *srq; + unsigned long flag; + u8 len16 = 0; + u16 idx = 0; + int err = 0; + u32 num_wrs; + + srq = to_c4iw_srq(ibsrq); + spin_lock_irqsave(&srq->lock, flag); + num_wrs = t4_srq_avail(&srq->wq); + if (num_wrs == 0) { + spin_unlock_irqrestore(&srq->lock, flag); + return -ENOMEM; + } + while (wr) { + if (wr->num_sge > T4_MAX_RECV_SGE) { + err = -EINVAL; + *bad_wr = wr; + break; + } + wqe = &lwqe; + if (num_wrs) + err = build_srq_recv(wqe, wr, &len16); + else + err = -ENOMEM; + if (err) { + *bad_wr = wr; + break; + } + + wqe->recv.opcode = FW_RI_RECV_WR; + wqe->recv.r1 = 0; + wqe->recv.wrid = srq->wq.pidx; + wqe->recv.r2[0] = 0; + wqe->recv.r2[1] = 0; + wqe->recv.r2[2] = 0; + wqe->recv.len16 = len16; + + if (srq->wq.ooo_count || + srq->wq.pending_in_use || + srq->wq.sw_rq[srq->wq.pidx].valid) { + defer_srq_wr(&srq->wq, wqe, wr->wr_id, len16); + } else { + srq->wq.sw_rq[srq->wq.pidx].wr_id = wr->wr_id; + srq->wq.sw_rq[srq->wq.pidx].valid = 1; + c4iw_copy_wr_to_srq(&srq->wq, wqe, len16); + pr_debug("%s cidx %u pidx %u wq_pidx %u in_use %u wr_id 0x%llx\n", + __func__, srq->wq.cidx, + srq->wq.pidx, srq->wq.wq_pidx, + srq->wq.in_use, + (unsigned long long)wr->wr_id); + t4_srq_produce(&srq->wq, len16); + idx += DIV_ROUND_UP(len16 * 16, T4_EQ_ENTRY_SIZE); + } + wr = wr->next; + num_wrs--; + } + if (idx) + t4_ring_srq_db(&srq->wq, idx, len16, wqe); + spin_unlock_irqrestore(&srq->lock, flag); + return err; +} + static inline void build_term_codes(struct t4_cqe *err_cqe, u8 *layer_type, u8 *ecode) { @@ -1321,7 +1616,7 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, struct c4iw_cq *schp) { int count; - int rq_flushed, sq_flushed; + int rq_flushed = 0, sq_flushed; unsigned long flag; pr_debug("qhp %p rchp %p schp %p\n", qhp, rchp, schp); @@ -1340,11 +1635,13 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, return; } qhp->wq.flushed = 1; - t4_set_wq_in_error(&qhp->wq); + t4_set_wq_in_error(&qhp->wq, 0); c4iw_flush_hw_cq(rchp, qhp); - c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count); - rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count); + if (!qhp->srq) { + c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count); + rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count); + } if (schp != rchp) c4iw_flush_hw_cq(schp, qhp); @@ -1388,7 +1685,7 @@ static void flush_qp(struct c4iw_qp *qhp) schp = to_c4iw_cq(qhp->ibqp.send_cq); if (qhp->ibqp.uobject) { - t4_set_wq_in_error(&qhp->wq); + t4_set_wq_in_error(&qhp->wq, 0); t4_set_cq_in_error(&rchp->cq); spin_lock_irqsave(&rchp->comp_handler_lock, flag); (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); @@ -1517,16 +1814,21 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp) wqe->u.init.pdid = cpu_to_be32(qhp->attr.pd); wqe->u.init.qpid = cpu_to_be32(qhp->wq.sq.qid); wqe->u.init.sq_eqid = cpu_to_be32(qhp->wq.sq.qid); - wqe->u.init.rq_eqid = cpu_to_be32(qhp->wq.rq.qid); + if (qhp->srq) { + wqe->u.init.rq_eqid = cpu_to_be32(FW_RI_INIT_RQEQID_SRQ | + qhp->srq->idx); + } else { + wqe->u.init.rq_eqid = cpu_to_be32(qhp->wq.rq.qid); + wqe->u.init.hwrqsize = cpu_to_be32(qhp->wq.rq.rqt_size); + wqe->u.init.hwrqaddr = cpu_to_be32(qhp->wq.rq.rqt_hwaddr - + rhp->rdev.lldi.vr->rq.start); + } wqe->u.init.scqid = cpu_to_be32(qhp->attr.scq); wqe->u.init.rcqid = cpu_to_be32(qhp->attr.rcq); wqe->u.init.ord_max = cpu_to_be32(qhp->attr.max_ord); wqe->u.init.ird_max = cpu_to_be32(qhp->attr.max_ird); wqe->u.init.iss = cpu_to_be32(qhp->ep->snd_seq); wqe->u.init.irs = cpu_to_be32(qhp->ep->rcv_seq); - wqe->u.init.hwrqsize = cpu_to_be32(qhp->wq.rq.rqt_size); - wqe->u.init.hwrqaddr = cpu_to_be32(qhp->wq.rq.rqt_hwaddr - - rhp->rdev.lldi.vr->rq.start); if (qhp->attr.mpa_attr.initiator) build_rtr_msg(qhp->attr.mpa_attr.p2p_type, &wqe->u.init); @@ -1643,7 +1945,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, case C4IW_QP_STATE_RTS: switch (attrs->next_state) { case C4IW_QP_STATE_CLOSING: - t4_set_wq_in_error(&qhp->wq); + t4_set_wq_in_error(&qhp->wq, 0); set_state(qhp, C4IW_QP_STATE_CLOSING); ep = qhp->ep; if (!internal) { @@ -1656,7 +1958,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, goto err; break; case C4IW_QP_STATE_TERMINATE: - t4_set_wq_in_error(&qhp->wq); + t4_set_wq_in_error(&qhp->wq, 0); set_state(qhp, C4IW_QP_STATE_TERMINATE); qhp->attr.layer_etype = attrs->layer_etype; qhp->attr.ecode = attrs->ecode; @@ -1673,7 +1975,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, } break; case C4IW_QP_STATE_ERROR: - t4_set_wq_in_error(&qhp->wq); + t4_set_wq_in_error(&qhp->wq, 0); set_state(qhp, C4IW_QP_STATE_ERROR); if (!internal) { abort = 1; @@ -1819,7 +2121,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, struct c4iw_cq *schp; struct c4iw_cq *rchp; struct c4iw_create_qp_resp uresp; - unsigned int sqsize, rqsize; + unsigned int sqsize, rqsize = 0; struct c4iw_ucontext *ucontext; int ret; struct c4iw_mm_entry *sq_key_mm, *rq_key_mm = NULL, *sq_db_key_mm; @@ -1840,11 +2142,13 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, if (attrs->cap.max_inline_data > T4_MAX_SEND_INLINE) return ERR_PTR(-EINVAL); - if (attrs->cap.max_recv_wr > rhp->rdev.hw_queue.t4_max_rq_size) - return ERR_PTR(-E2BIG); - rqsize = attrs->cap.max_recv_wr + 1; - if (rqsize < 8) - rqsize = 8; + if (!attrs->srq) { + if (attrs->cap.max_recv_wr > rhp->rdev.hw_queue.t4_max_rq_size) + return ERR_PTR(-E2BIG); + rqsize = attrs->cap.max_recv_wr + 1; + if (rqsize < 8) + rqsize = 8; + } if (attrs->cap.max_send_wr > rhp->rdev.hw_queue.t4_max_sq_size) return ERR_PTR(-E2BIG); @@ -1869,19 +2173,23 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, (sqsize + rhp->rdev.hw_queue.t4_eq_status_entries) * sizeof(*qhp->wq.sq.queue) + 16 * sizeof(__be64); qhp->wq.sq.flush_cidx = -1; - qhp->wq.rq.size = rqsize; - qhp->wq.rq.memsize = - (rqsize + rhp->rdev.hw_queue.t4_eq_status_entries) * - sizeof(*qhp->wq.rq.queue); + if (!attrs->srq) { + qhp->wq.rq.size = rqsize; + qhp->wq.rq.memsize = + (rqsize + rhp->rdev.hw_queue.t4_eq_status_entries) * + sizeof(*qhp->wq.rq.queue); + } if (ucontext) { qhp->wq.sq.memsize = roundup(qhp->wq.sq.memsize, PAGE_SIZE); - qhp->wq.rq.memsize = roundup(qhp->wq.rq.memsize, PAGE_SIZE); + if (!attrs->srq) + qhp->wq.rq.memsize = + roundup(qhp->wq.rq.memsize, PAGE_SIZE); } ret = create_qp(&rhp->rdev, &qhp->wq, &schp->cq, &rchp->cq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, - qhp->wr_waitp); + qhp->wr_waitp, !attrs->srq); if (ret) goto err_free_wr_wait; @@ -1894,10 +2202,12 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, qhp->attr.scq = ((struct c4iw_cq *) attrs->send_cq)->cq.cqid; qhp->attr.rcq = ((struct c4iw_cq *) attrs->recv_cq)->cq.cqid; qhp->attr.sq_num_entries = attrs->cap.max_send_wr; - qhp->attr.rq_num_entries = attrs->cap.max_recv_wr; qhp->attr.sq_max_sges = attrs->cap.max_send_sge; qhp->attr.sq_max_sges_rdma_write = attrs->cap.max_send_sge; - qhp->attr.rq_max_sges = attrs->cap.max_recv_sge; + if (!attrs->srq) { + qhp->attr.rq_num_entries = attrs->cap.max_recv_wr; + qhp->attr.rq_max_sges = attrs->cap.max_recv_sge; + } qhp->attr.state = C4IW_QP_STATE_IDLE; qhp->attr.next_state = C4IW_QP_STATE_IDLE; qhp->attr.enable_rdma_read = 1; @@ -1922,21 +2232,27 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, ret = -ENOMEM; goto err_remove_handle; } - rq_key_mm = kmalloc(sizeof(*rq_key_mm), GFP_KERNEL); - if (!rq_key_mm) { - ret = -ENOMEM; - goto err_free_sq_key; + if (!attrs->srq) { + rq_key_mm = kmalloc(sizeof(*rq_key_mm), GFP_KERNEL); + if (!rq_key_mm) { + ret = -ENOMEM; + goto err_free_sq_key; + } } sq_db_key_mm = kmalloc(sizeof(*sq_db_key_mm), GFP_KERNEL); if (!sq_db_key_mm) { ret = -ENOMEM; goto err_free_rq_key; } - rq_db_key_mm = kmalloc(sizeof(*rq_db_key_mm), GFP_KERNEL); - if (!rq_db_key_mm) { - ret = -ENOMEM; - goto err_free_sq_db_key; + if (!attrs->srq) { + rq_db_key_mm = + kmalloc(sizeof(*rq_db_key_mm), GFP_KERNEL); + if (!rq_db_key_mm) { + ret = -ENOMEM; + goto err_free_sq_db_key; + } } + memset(&uresp, 0, sizeof(uresp)); if (t4_sq_onchip(&qhp->wq.sq)) { ma_sync_key_mm = kmalloc(sizeof(*ma_sync_key_mm), GFP_KERNEL); @@ -1945,30 +2261,35 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, goto err_free_rq_db_key; } uresp.flags = C4IW_QPF_ONCHIP; - } else - uresp.flags = 0; + } + if (rhp->rdev.lldi.write_w_imm_support) + uresp.flags |= C4IW_QPF_WRITE_W_IMM; uresp.qid_mask = rhp->rdev.qpmask; uresp.sqid = qhp->wq.sq.qid; uresp.sq_size = qhp->wq.sq.size; uresp.sq_memsize = qhp->wq.sq.memsize; - uresp.rqid = qhp->wq.rq.qid; - uresp.rq_size = qhp->wq.rq.size; - uresp.rq_memsize = qhp->wq.rq.memsize; + if (!attrs->srq) { + uresp.rqid = qhp->wq.rq.qid; + uresp.rq_size = qhp->wq.rq.size; + uresp.rq_memsize = qhp->wq.rq.memsize; + } spin_lock(&ucontext->mmap_lock); if (ma_sync_key_mm) { uresp.ma_sync_key = ucontext->key; ucontext->key += PAGE_SIZE; - } else { - uresp.ma_sync_key = 0; } uresp.sq_key = ucontext->key; ucontext->key += PAGE_SIZE; - uresp.rq_key = ucontext->key; - ucontext->key += PAGE_SIZE; + if (!attrs->srq) { + uresp.rq_key = ucontext->key; + ucontext->key += PAGE_SIZE; + } uresp.sq_db_gts_key = ucontext->key; ucontext->key += PAGE_SIZE; - uresp.rq_db_gts_key = ucontext->key; - ucontext->key += PAGE_SIZE; + if (!attrs->srq) { + uresp.rq_db_gts_key = ucontext->key; + ucontext->key += PAGE_SIZE; + } spin_unlock(&ucontext->mmap_lock); ret = ib_copy_to_udata(udata, &uresp, sizeof uresp); if (ret) @@ -1977,18 +2298,23 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, sq_key_mm->addr = qhp->wq.sq.phys_addr; sq_key_mm->len = PAGE_ALIGN(qhp->wq.sq.memsize); insert_mmap(ucontext, sq_key_mm); - rq_key_mm->key = uresp.rq_key; - rq_key_mm->addr = virt_to_phys(qhp->wq.rq.queue); - rq_key_mm->len = PAGE_ALIGN(qhp->wq.rq.memsize); - insert_mmap(ucontext, rq_key_mm); + if (!attrs->srq) { + rq_key_mm->key = uresp.rq_key; + rq_key_mm->addr = virt_to_phys(qhp->wq.rq.queue); + rq_key_mm->len = PAGE_ALIGN(qhp->wq.rq.memsize); + insert_mmap(ucontext, rq_key_mm); + } sq_db_key_mm->key = uresp.sq_db_gts_key; sq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.sq.bar2_pa; sq_db_key_mm->len = PAGE_SIZE; insert_mmap(ucontext, sq_db_key_mm); - rq_db_key_mm->key = uresp.rq_db_gts_key; - rq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.rq.bar2_pa; - rq_db_key_mm->len = PAGE_SIZE; - insert_mmap(ucontext, rq_db_key_mm); + if (!attrs->srq) { + rq_db_key_mm->key = uresp.rq_db_gts_key; + rq_db_key_mm->addr = + (u64)(unsigned long)qhp->wq.rq.bar2_pa; + rq_db_key_mm->len = PAGE_SIZE; + insert_mmap(ucontext, rq_db_key_mm); + } if (ma_sync_key_mm) { ma_sync_key_mm->key = uresp.ma_sync_key; ma_sync_key_mm->addr = @@ -2001,7 +2327,19 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, c4iw_get_ucontext(ucontext); qhp->ucontext = ucontext; } + if (!attrs->srq) { + qhp->wq.qp_errp = + &qhp->wq.rq.queue[qhp->wq.rq.size].status.qp_err; + } else { + qhp->wq.qp_errp = + &qhp->wq.sq.queue[qhp->wq.sq.size].status.qp_err; + qhp->wq.srqidxp = + &qhp->wq.sq.queue[qhp->wq.sq.size].status.srqidx; + } + qhp->ibqp.qp_num = qhp->wq.sq.qid; + if (attrs->srq) + qhp->srq = to_c4iw_srq(attrs->srq); INIT_LIST_HEAD(&qhp->db_fc_entry); pr_debug("sq id %u size %u memsize %zu num_entries %u rq id %u size %u memsize %zu num_entries %u\n", qhp->wq.sq.qid, qhp->wq.sq.size, qhp->wq.sq.memsize, @@ -2011,18 +2349,20 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, err_free_ma_sync_key: kfree(ma_sync_key_mm); err_free_rq_db_key: - kfree(rq_db_key_mm); + if (!attrs->srq) + kfree(rq_db_key_mm); err_free_sq_db_key: kfree(sq_db_key_mm); err_free_rq_key: - kfree(rq_key_mm); + if (!attrs->srq) + kfree(rq_key_mm); err_free_sq_key: kfree(sq_key_mm); err_remove_handle: remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid); err_destroy_qp: destroy_qp(&rhp->rdev, &qhp->wq, - ucontext ? &ucontext->uctx : &rhp->rdev.uctx); + ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !attrs->srq); err_free_wr_wait: c4iw_put_wr_wait(qhp->wr_waitp); err_free_qhp: @@ -2088,6 +2428,45 @@ struct ib_qp *c4iw_get_qp(struct ib_device *dev, int qpn) return (struct ib_qp *)get_qhp(to_c4iw_dev(dev), qpn); } +void c4iw_dispatch_srq_limit_reached_event(struct c4iw_srq *srq) +{ + struct ib_event event = {}; + + event.device = &srq->rhp->ibdev; + event.element.srq = &srq->ibsrq; + event.event = IB_EVENT_SRQ_LIMIT_REACHED; + ib_dispatch_event(&event); +} + +int c4iw_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask srq_attr_mask, + struct ib_udata *udata) +{ + struct c4iw_srq *srq = to_c4iw_srq(ib_srq); + int ret = 0; + + /* + * XXX 0 mask == a SW interrupt for srq_limit reached... + */ + if (udata && !srq_attr_mask) { + c4iw_dispatch_srq_limit_reached_event(srq); + goto out; + } + + /* no support for this yet */ + if (srq_attr_mask & IB_SRQ_MAX_WR) { + ret = -EINVAL; + goto out; + } + + if (!udata && (srq_attr_mask & IB_SRQ_LIMIT)) { + srq->armed = true; + srq->srq_limit = attr->srq_limit; + } +out: + return ret; +} + int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_qp_init_attr *init_attr) { @@ -2104,3 +2483,359 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : 0; return 0; } + +static void free_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx, + struct c4iw_wr_wait *wr_waitp) +{ + struct c4iw_rdev *rdev = &srq->rhp->rdev; + struct sk_buff *skb = srq->destroy_skb; + struct t4_srq *wq = &srq->wq; + struct fw_ri_res_wr *res_wr; + struct fw_ri_res *res; + int wr_len; + + wr_len = sizeof(*res_wr) + sizeof(*res); + set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); + + res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len); + memset(res_wr, 0, wr_len); + res_wr->op_nres = cpu_to_be32(FW_WR_OP_V(FW_RI_RES_WR) | + FW_RI_RES_WR_NRES_V(1) | + FW_WR_COMPL_F); + res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); + res_wr->cookie = (uintptr_t)wr_waitp; + res = res_wr->res; + res->u.srq.restype = FW_RI_RES_TYPE_SRQ; + res->u.srq.op = FW_RI_RES_OP_RESET; + res->u.srq.srqid = cpu_to_be32(srq->idx); + res->u.srq.eqid = cpu_to_be32(wq->qid); + + c4iw_init_wr_wait(wr_waitp); + c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, __func__); + + dma_free_coherent(&rdev->lldi.pdev->dev, + wq->memsize, wq->queue, + pci_unmap_addr(wq, mapping)); + c4iw_rqtpool_free(rdev, wq->rqt_hwaddr, wq->rqt_size); + kfree(wq->sw_rq); + c4iw_put_qpid(rdev, wq->qid, uctx); +} + +static int alloc_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx, + struct c4iw_wr_wait *wr_waitp) +{ + struct c4iw_rdev *rdev = &srq->rhp->rdev; + int user = (uctx != &rdev->uctx); + struct t4_srq *wq = &srq->wq; + struct fw_ri_res_wr *res_wr; + struct fw_ri_res *res; + struct sk_buff *skb; + int wr_len; + int eqsize; + int ret = -ENOMEM; + + wq->qid = c4iw_get_qpid(rdev, uctx); + if (!wq->qid) + goto err; + + if (!user) { + wq->sw_rq = kcalloc(wq->size, sizeof(*wq->sw_rq), + GFP_KERNEL); + if (!wq->sw_rq) + goto err_put_qpid; + wq->pending_wrs = kcalloc(srq->wq.size, + sizeof(*srq->wq.pending_wrs), + GFP_KERNEL); + if (!wq->pending_wrs) + goto err_free_sw_rq; + } + + wq->rqt_size = wq->size; + wq->rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rqt_size); + if (!wq->rqt_hwaddr) + goto err_free_pending_wrs; + wq->rqt_abs_idx = (wq->rqt_hwaddr - rdev->lldi.vr->rq.start) >> + T4_RQT_ENTRY_SHIFT; + + wq->queue = dma_alloc_coherent(&rdev->lldi.pdev->dev, + wq->memsize, &wq->dma_addr, + GFP_KERNEL); + if (!wq->queue) + goto err_free_rqtpool; + + memset(wq->queue, 0, wq->memsize); + pci_unmap_addr_set(wq, mapping, wq->dma_addr); + + wq->bar2_va = c4iw_bar2_addrs(rdev, wq->qid, T4_BAR2_QTYPE_EGRESS, + &wq->bar2_qid, + user ? &wq->bar2_pa : NULL); + + /* + * User mode must have bar2 access. + */ + + if (user && !wq->bar2_va) { + pr_warn(MOD "%s: srqid %u not in BAR2 range.\n", + pci_name(rdev->lldi.pdev), wq->qid); + ret = -EINVAL; + goto err_free_queue; + } + + /* build fw_ri_res_wr */ + wr_len = sizeof(*res_wr) + sizeof(*res); + + skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL); + if (!skb) + goto err_free_queue; + set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); + + res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len); + memset(res_wr, 0, wr_len); + res_wr->op_nres = cpu_to_be32(FW_WR_OP_V(FW_RI_RES_WR) | + FW_RI_RES_WR_NRES_V(1) | + FW_WR_COMPL_F); + res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); + res_wr->cookie = (uintptr_t)wr_waitp; + res = res_wr->res; + res->u.srq.restype = FW_RI_RES_TYPE_SRQ; + res->u.srq.op = FW_RI_RES_OP_WRITE; + + /* + * eqsize is the number of 64B entries plus the status page size. + */ + eqsize = wq->size * T4_RQ_NUM_SLOTS + + rdev->hw_queue.t4_eq_status_entries; + res->u.srq.eqid = cpu_to_be32(wq->qid); + res->u.srq.fetchszm_to_iqid = + /* no host cidx updates */ + cpu_to_be32(FW_RI_RES_WR_HOSTFCMODE_V(0) | + FW_RI_RES_WR_CPRIO_V(0) | /* don't keep in chip cache */ + FW_RI_RES_WR_PCIECHN_V(0) | /* set by uP at ri_init time */ + FW_RI_RES_WR_FETCHRO_V(0)); /* relaxed_ordering */ + res->u.srq.dcaen_to_eqsize = + cpu_to_be32(FW_RI_RES_WR_DCAEN_V(0) | + FW_RI_RES_WR_DCACPU_V(0) | + FW_RI_RES_WR_FBMIN_V(2) | + FW_RI_RES_WR_FBMAX_V(3) | + FW_RI_RES_WR_CIDXFTHRESHO_V(0) | + FW_RI_RES_WR_CIDXFTHRESH_V(0) | + FW_RI_RES_WR_EQSIZE_V(eqsize)); + res->u.srq.eqaddr = cpu_to_be64(wq->dma_addr); + res->u.srq.srqid = cpu_to_be32(srq->idx); + res->u.srq.pdid = cpu_to_be32(srq->pdid); + res->u.srq.hwsrqsize = cpu_to_be32(wq->rqt_size); + res->u.srq.hwsrqaddr = cpu_to_be32(wq->rqt_hwaddr - + rdev->lldi.vr->rq.start); + + c4iw_init_wr_wait(wr_waitp); + + ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, wq->qid, __func__); + if (ret) + goto err_free_queue; + + pr_debug("%s srq %u eqid %u pdid %u queue va %p pa 0x%llx\n" + " bar2_addr %p rqt addr 0x%x size %d\n", + __func__, srq->idx, wq->qid, srq->pdid, wq->queue, + (u64)virt_to_phys(wq->queue), wq->bar2_va, + wq->rqt_hwaddr, wq->rqt_size); + + return 0; +err_free_queue: + dma_free_coherent(&rdev->lldi.pdev->dev, + wq->memsize, wq->queue, + pci_unmap_addr(wq, mapping)); +err_free_rqtpool: + c4iw_rqtpool_free(rdev, wq->rqt_hwaddr, wq->rqt_size); +err_free_pending_wrs: + if (!user) + kfree(wq->pending_wrs); +err_free_sw_rq: + if (!user) + kfree(wq->sw_rq); +err_put_qpid: + c4iw_put_qpid(rdev, wq->qid, uctx); +err: + return ret; +} + +void c4iw_copy_wr_to_srq(struct t4_srq *srq, union t4_recv_wr *wqe, u8 len16) +{ + u64 *src, *dst; + + src = (u64 *)wqe; + dst = (u64 *)((u8 *)srq->queue + srq->wq_pidx * T4_EQ_ENTRY_SIZE); + while (len16) { + *dst++ = *src++; + if (dst >= (u64 *)&srq->queue[srq->size]) + dst = (u64 *)srq->queue; + *dst++ = *src++; + if (dst >= (u64 *)&srq->queue[srq->size]) + dst = (u64 *)srq->queue; + len16--; + } +} + +struct ib_srq *c4iw_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *attrs, + struct ib_udata *udata) +{ + struct c4iw_dev *rhp; + struct c4iw_srq *srq; + struct c4iw_pd *php; + struct c4iw_create_srq_resp uresp; + struct c4iw_ucontext *ucontext; + struct c4iw_mm_entry *srq_key_mm, *srq_db_key_mm; + int rqsize; + int ret; + int wr_len; + + pr_debug("%s ib_pd %p\n", __func__, pd); + + php = to_c4iw_pd(pd); + rhp = php->rhp; + + if (!rhp->rdev.lldi.vr->srq.size) + return ERR_PTR(-EINVAL); + if (attrs->attr.max_wr > rhp->rdev.hw_queue.t4_max_rq_size) + return ERR_PTR(-E2BIG); + if (attrs->attr.max_sge > T4_MAX_RECV_SGE) + return ERR_PTR(-E2BIG); + + /* + * SRQ RQT and RQ must be a power of 2 and at least 16 deep. + */ + rqsize = attrs->attr.max_wr + 1; + rqsize = roundup_pow_of_two(max_t(u16, rqsize, 16)); + + ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL; + + srq = kzalloc(sizeof(*srq), GFP_KERNEL); + if (!srq) + return ERR_PTR(-ENOMEM); + + srq->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL); + if (!srq->wr_waitp) { + ret = -ENOMEM; + goto err_free_srq; + } + + srq->idx = c4iw_alloc_srq_idx(&rhp->rdev); + if (srq->idx < 0) { + ret = -ENOMEM; + goto err_free_wr_wait; + } + + wr_len = sizeof(struct fw_ri_res_wr) + sizeof(struct fw_ri_res); + srq->destroy_skb = alloc_skb(wr_len, GFP_KERNEL); + if (!srq->destroy_skb) { + ret = -ENOMEM; + goto err_free_srq_idx; + } + + srq->rhp = rhp; + srq->pdid = php->pdid; + + srq->wq.size = rqsize; + srq->wq.memsize = + (rqsize + rhp->rdev.hw_queue.t4_eq_status_entries) * + sizeof(*srq->wq.queue); + if (ucontext) + srq->wq.memsize = roundup(srq->wq.memsize, PAGE_SIZE); + + ret = alloc_srq_queue(srq, ucontext ? &ucontext->uctx : + &rhp->rdev.uctx, srq->wr_waitp); + if (ret) + goto err_free_skb; + attrs->attr.max_wr = rqsize - 1; + + if (CHELSIO_CHIP_VERSION(rhp->rdev.lldi.adapter_type) > CHELSIO_T6) + srq->flags = T4_SRQ_LIMIT_SUPPORT; + + ret = insert_handle(rhp, &rhp->qpidr, srq, srq->wq.qid); + if (ret) + goto err_free_queue; + + if (udata) { + srq_key_mm = kmalloc(sizeof(*srq_key_mm), GFP_KERNEL); + if (!srq_key_mm) { + ret = -ENOMEM; + goto err_remove_handle; + } + srq_db_key_mm = kmalloc(sizeof(*srq_db_key_mm), GFP_KERNEL); + if (!srq_db_key_mm) { + ret = -ENOMEM; + goto err_free_srq_key_mm; + } + memset(&uresp, 0, sizeof(uresp)); + uresp.flags = srq->flags; + uresp.qid_mask = rhp->rdev.qpmask; + uresp.srqid = srq->wq.qid; + uresp.srq_size = srq->wq.size; + uresp.srq_memsize = srq->wq.memsize; + uresp.rqt_abs_idx = srq->wq.rqt_abs_idx; + spin_lock(&ucontext->mmap_lock); + uresp.srq_key = ucontext->key; + ucontext->key += PAGE_SIZE; + uresp.srq_db_gts_key = ucontext->key; + ucontext->key += PAGE_SIZE; + spin_unlock(&ucontext->mmap_lock); + ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + if (ret) + goto err_free_srq_db_key_mm; + srq_key_mm->key = uresp.srq_key; + srq_key_mm->addr = virt_to_phys(srq->wq.queue); + srq_key_mm->len = PAGE_ALIGN(srq->wq.memsize); + insert_mmap(ucontext, srq_key_mm); + srq_db_key_mm->key = uresp.srq_db_gts_key; + srq_db_key_mm->addr = (u64)(unsigned long)srq->wq.bar2_pa; + srq_db_key_mm->len = PAGE_SIZE; + insert_mmap(ucontext, srq_db_key_mm); + } + + pr_debug("%s srq qid %u idx %u size %u memsize %lu num_entries %u\n", + __func__, srq->wq.qid, srq->idx, srq->wq.size, + (unsigned long)srq->wq.memsize, attrs->attr.max_wr); + + spin_lock_init(&srq->lock); + return &srq->ibsrq; +err_free_srq_db_key_mm: + kfree(srq_db_key_mm); +err_free_srq_key_mm: + kfree(srq_key_mm); +err_remove_handle: + remove_handle(rhp, &rhp->qpidr, srq->wq.qid); +err_free_queue: + free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, + srq->wr_waitp); +err_free_skb: + if (srq->destroy_skb) + kfree_skb(srq->destroy_skb); +err_free_srq_idx: + c4iw_free_srq_idx(&rhp->rdev, srq->idx); +err_free_wr_wait: + c4iw_put_wr_wait(srq->wr_waitp); +err_free_srq: + kfree(srq); + return ERR_PTR(ret); +} + +int c4iw_destroy_srq(struct ib_srq *ibsrq) +{ + struct c4iw_dev *rhp; + struct c4iw_srq *srq; + struct c4iw_ucontext *ucontext; + + srq = to_c4iw_srq(ibsrq); + rhp = srq->rhp; + + pr_debug("%s id %d\n", __func__, srq->wq.qid); + + remove_handle(rhp, &rhp->qpidr, srq->wq.qid); + ucontext = ibsrq->uobject ? + to_c4iw_ucontext(ibsrq->uobject->context) : NULL; + free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, + srq->wr_waitp); + c4iw_free_srq_idx(&rhp->rdev, srq->idx); + c4iw_put_wr_wait(srq->wr_waitp); + kfree(srq); + return 0; +} diff --git a/drivers/infiniband/hw/cxgb4/resource.c b/drivers/infiniband/hw/cxgb4/resource.c index 0ef25ae05e6f..57ed26b3cc21 100644 --- a/drivers/infiniband/hw/cxgb4/resource.c +++ b/drivers/infiniband/hw/cxgb4/resource.c @@ -53,7 +53,8 @@ static int c4iw_init_qid_table(struct c4iw_rdev *rdev) } /* nr_* must be power of 2 */ -int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid) +int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, + u32 nr_pdid, u32 nr_srqt) { int err = 0; err = c4iw_id_table_alloc(&rdev->resource.tpt_table, 0, nr_tpt, 1, @@ -67,7 +68,17 @@ int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid) nr_pdid, 1, 0); if (err) goto pdid_err; + if (!nr_srqt) + err = c4iw_id_table_alloc(&rdev->resource.srq_table, 0, + 1, 1, 0); + else + err = c4iw_id_table_alloc(&rdev->resource.srq_table, 0, + nr_srqt, 0, 0); + if (err) + goto srq_err; return 0; + srq_err: + c4iw_id_table_free(&rdev->resource.pdid_table); pdid_err: c4iw_id_table_free(&rdev->resource.qid_table); qid_err: @@ -371,13 +382,21 @@ void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size) int c4iw_rqtpool_create(struct c4iw_rdev *rdev) { unsigned rqt_start, rqt_chunk, rqt_top; + int skip = 0; rdev->rqt_pool = gen_pool_create(MIN_RQT_SHIFT, -1); if (!rdev->rqt_pool) return -ENOMEM; - rqt_start = rdev->lldi.vr->rq.start; - rqt_chunk = rdev->lldi.vr->rq.size; + /* + * If SRQs are supported, then never use the first RQE from + * the RQT region. This is because HW uses RQT index 0 as NULL. + */ + if (rdev->lldi.vr->srq.size) + skip = T4_RQT_ENTRY_SIZE; + + rqt_start = rdev->lldi.vr->rq.start + skip; + rqt_chunk = rdev->lldi.vr->rq.size - skip; rqt_top = rqt_start + rqt_chunk; while (rqt_start < rqt_top) { @@ -405,6 +424,32 @@ void c4iw_rqtpool_destroy(struct c4iw_rdev *rdev) kref_put(&rdev->rqt_kref, destroy_rqtpool); } +int c4iw_alloc_srq_idx(struct c4iw_rdev *rdev) +{ + int idx; + + idx = c4iw_id_alloc(&rdev->resource.srq_table); + mutex_lock(&rdev->stats.lock); + if (idx == -1) { + rdev->stats.srqt.fail++; + mutex_unlock(&rdev->stats.lock); + return -ENOMEM; + } + rdev->stats.srqt.cur++; + if (rdev->stats.srqt.cur > rdev->stats.srqt.max) + rdev->stats.srqt.max = rdev->stats.srqt.cur; + mutex_unlock(&rdev->stats.lock); + return idx; +} + +void c4iw_free_srq_idx(struct c4iw_rdev *rdev, int idx) +{ + c4iw_id_free(&rdev->resource.srq_table, idx); + mutex_lock(&rdev->stats.lock); + rdev->stats.srqt.cur--; + mutex_unlock(&rdev->stats.lock); +} + /* * On-Chip QP Memory. */ diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index 8369c7c8de83..e42021fd6fd6 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -52,12 +52,16 @@ struct t4_status_page { __be16 pidx; u8 qp_err; /* flit 1 - sw owns */ u8 db_off; - u8 pad; + u8 pad[2]; u16 host_wq_pidx; u16 host_cidx; u16 host_pidx; + u16 pad2; + u32 srqidx; }; +#define T4_RQT_ENTRY_SHIFT 6 +#define T4_RQT_ENTRY_SIZE BIT(T4_RQT_ENTRY_SHIFT) #define T4_EQ_ENTRY_SIZE 64 #define T4_SQ_NUM_SLOTS 5 @@ -87,6 +91,9 @@ static inline int t4_max_fr_depth(int use_dsgl) #define T4_RQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_RQ_NUM_SLOTS) #define T4_MAX_RECV_SGE 4 +#define T4_WRITE_CMPL_MAX_SGL 4 +#define T4_WRITE_CMPL_MAX_CQE 16 + union t4_wr { struct fw_ri_res_wr res; struct fw_ri_wr ri; @@ -97,6 +104,7 @@ union t4_wr { struct fw_ri_fr_nsmr_wr fr; struct fw_ri_fr_nsmr_tpte_wr fr_tpte; struct fw_ri_inv_lstag_wr inv; + struct fw_ri_rdma_write_cmpl_wr write_cmpl; struct t4_status_page status; __be64 flits[T4_EQ_ENTRY_SIZE / sizeof(__be64) * T4_SQ_NUM_SLOTS]; }; @@ -179,9 +187,32 @@ struct t4_cqe { __be32 wrid_hi; __be32 wrid_low; } gen; + struct { + __be32 stag; + __be32 msn; + __be32 reserved; + __be32 abs_rqe_idx; + } srcqe; + struct { + __be32 mo; + __be32 msn; + /* + * Use union for immediate data to be consistent with + * stack's 32 bit data and iWARP spec's 64 bit data. + */ + union { + struct { + __be32 imm_data32; + u32 reserved; + } ib_imm_data; + __be64 imm_data64; + } iw_imm_data; + } imm_data_rcqe; + u64 drain_cookie; + __be64 flits[3]; } u; - __be64 reserved; + __be64 reserved[3]; __be64 bits_type_ts; }; @@ -237,6 +268,9 @@ struct t4_cqe { /* used for RQ completion processing */ #define CQE_WRID_STAG(x) (be32_to_cpu((x)->u.rcqe.stag)) #define CQE_WRID_MSN(x) (be32_to_cpu((x)->u.rcqe.msn)) +#define CQE_ABS_RQE_IDX(x) (be32_to_cpu((x)->u.srcqe.abs_rqe_idx)) +#define CQE_IMM_DATA(x)( \ + (x)->u.imm_data_rcqe.iw_imm_data.ib_imm_data.imm_data32) /* used for SQ completion processing */ #define CQE_WRID_SQ_IDX(x) ((x)->u.scqe.cidx) @@ -320,6 +354,7 @@ struct t4_swrqe { u64 wr_id; ktime_t host_time; u64 sge_ts; + int valid; }; struct t4_rq { @@ -349,8 +384,98 @@ struct t4_wq { void __iomem *db; struct c4iw_rdev *rdev; int flushed; + u8 *qp_errp; + u32 *srqidxp; +}; + +struct t4_srq_pending_wr { + u64 wr_id; + union t4_recv_wr wqe; + u8 len16; +}; + +struct t4_srq { + union t4_recv_wr *queue; + dma_addr_t dma_addr; + DECLARE_PCI_UNMAP_ADDR(mapping); + struct t4_swrqe *sw_rq; + void __iomem *bar2_va; + u64 bar2_pa; + size_t memsize; + u32 bar2_qid; + u32 qid; + u32 msn; + u32 rqt_hwaddr; + u32 rqt_abs_idx; + u16 rqt_size; + u16 size; + u16 cidx; + u16 pidx; + u16 wq_pidx; + u16 wq_pidx_inc; + u16 in_use; + struct t4_srq_pending_wr *pending_wrs; + u16 pending_cidx; + u16 pending_pidx; + u16 pending_in_use; + u16 ooo_count; }; +static inline u32 t4_srq_avail(struct t4_srq *srq) +{ + return srq->size - 1 - srq->in_use; +} + +static inline void t4_srq_produce(struct t4_srq *srq, u8 len16) +{ + srq->in_use++; + if (++srq->pidx == srq->size) + srq->pidx = 0; + srq->wq_pidx += DIV_ROUND_UP(len16 * 16, T4_EQ_ENTRY_SIZE); + if (srq->wq_pidx >= srq->size * T4_RQ_NUM_SLOTS) + srq->wq_pidx %= srq->size * T4_RQ_NUM_SLOTS; + srq->queue[srq->size].status.host_pidx = srq->pidx; +} + +static inline void t4_srq_produce_pending_wr(struct t4_srq *srq) +{ + srq->pending_in_use++; + srq->in_use++; + if (++srq->pending_pidx == srq->size) + srq->pending_pidx = 0; +} + +static inline void t4_srq_consume_pending_wr(struct t4_srq *srq) +{ + srq->pending_in_use--; + srq->in_use--; + if (++srq->pending_cidx == srq->size) + srq->pending_cidx = 0; +} + +static inline void t4_srq_produce_ooo(struct t4_srq *srq) +{ + srq->in_use--; + srq->ooo_count++; +} + +static inline void t4_srq_consume_ooo(struct t4_srq *srq) +{ + srq->cidx++; + if (srq->cidx == srq->size) + srq->cidx = 0; + srq->queue[srq->size].status.host_cidx = srq->cidx; + srq->ooo_count--; +} + +static inline void t4_srq_consume(struct t4_srq *srq) +{ + srq->in_use--; + if (++srq->cidx == srq->size) + srq->cidx = 0; + srq->queue[srq->size].status.host_cidx = srq->cidx; +} + static inline int t4_rqes_posted(struct t4_wq *wq) { return wq->rq.in_use; @@ -384,7 +509,6 @@ static inline void t4_rq_produce(struct t4_wq *wq, u8 len16) static inline void t4_rq_consume(struct t4_wq *wq) { wq->rq.in_use--; - wq->rq.msn++; if (++wq->rq.cidx == wq->rq.size) wq->rq.cidx = 0; } @@ -464,6 +588,25 @@ static inline void pio_copy(u64 __iomem *dst, u64 *src) } } +static inline void t4_ring_srq_db(struct t4_srq *srq, u16 inc, u8 len16, + union t4_recv_wr *wqe) +{ + /* Flush host queue memory writes. */ + wmb(); + if (inc == 1 && srq->bar2_qid == 0 && wqe) { + pr_debug("%s : WC srq->pidx = %d; len16=%d\n", + __func__, srq->pidx, len16); + pio_copy(srq->bar2_va + SGE_UDB_WCDOORBELL, (u64 *)wqe); + } else { + pr_debug("%s: DB srq->pidx = %d; len16=%d\n", + __func__, srq->pidx, len16); + writel(PIDX_T5_V(inc) | QID_V(srq->bar2_qid), + srq->bar2_va + SGE_UDB_KDOORBELL); + } + /* Flush user doorbell area writes. */ + wmb(); +} + static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, union t4_wr *wqe) { @@ -515,12 +658,14 @@ static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc, static inline int t4_wq_in_error(struct t4_wq *wq) { - return wq->rq.queue[wq->rq.size].status.qp_err; + return *wq->qp_errp; } -static inline void t4_set_wq_in_error(struct t4_wq *wq) +static inline void t4_set_wq_in_error(struct t4_wq *wq, u32 srqidx) { - wq->rq.queue[wq->rq.size].status.qp_err = 1; + if (srqidx) + *wq->srqidxp = srqidx; + *wq->qp_errp = 1; } static inline void t4_disable_wq_db(struct t4_wq *wq) @@ -565,6 +710,7 @@ struct t4_cq { u16 cidx_inc; u8 gen; u8 error; + u8 *qp_errp; unsigned long flags; }; @@ -698,18 +844,18 @@ static inline int t4_next_cqe(struct t4_cq *cq, struct t4_cqe **cqe) static inline int t4_cq_in_error(struct t4_cq *cq) { - return ((struct t4_status_page *)&cq->queue[cq->size])->qp_err; + return *cq->qp_errp; } static inline void t4_set_cq_in_error(struct t4_cq *cq) { - ((struct t4_status_page *)&cq->queue[cq->size])->qp_err = 1; + *cq->qp_errp = 1; } #endif struct t4_dev_status_page { u8 db_off; - u8 pad1; + u8 write_cmpl_supported; u16 pad2; u32 pad3; u64 qp_start; diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h index 58c531db4f4a..cbdb300a4794 100644 --- a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h +++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h @@ -50,7 +50,8 @@ enum fw_ri_wr_opcode { FW_RI_BYPASS = 0xd, FW_RI_RECEIVE = 0xe, - FW_RI_SGE_EC_CR_RETURN = 0xf + FW_RI_SGE_EC_CR_RETURN = 0xf, + FW_RI_WRITE_IMMEDIATE = FW_RI_RDMA_INIT }; enum fw_ri_wr_flags { @@ -59,7 +60,8 @@ enum fw_ri_wr_flags { FW_RI_SOLICITED_EVENT_FLAG = 0x04, FW_RI_READ_FENCE_FLAG = 0x08, FW_RI_LOCAL_FENCE_FLAG = 0x10, - FW_RI_RDMA_READ_INVALIDATE = 0x20 + FW_RI_RDMA_READ_INVALIDATE = 0x20, + FW_RI_RDMA_WRITE_WITH_IMMEDIATE = 0x40 }; enum fw_ri_mpa_attrs { @@ -263,6 +265,7 @@ enum fw_ri_res_type { FW_RI_RES_TYPE_SQ, FW_RI_RES_TYPE_RQ, FW_RI_RES_TYPE_CQ, + FW_RI_RES_TYPE_SRQ, }; enum fw_ri_res_op { @@ -296,6 +299,20 @@ struct fw_ri_res { __be32 r6_lo; __be64 r7; } cq; + struct fw_ri_res_srq { + __u8 restype; + __u8 op; + __be16 r3; + __be32 eqid; + __be32 r4[2]; + __be32 fetchszm_to_iqid; + __be32 dcaen_to_eqsize; + __be64 eqaddr; + __be32 srqid; + __be32 pdid; + __be32 hwsrqsize; + __be32 hwsrqaddr; + } srq; } u; }; @@ -531,7 +548,17 @@ struct fw_ri_rdma_write_wr { __u16 wrid; __u8 r1[3]; __u8 len16; - __be64 r2; + /* + * Use union for immediate data to be consistent with stack's 32 bit + * data and iWARP spec's 64 bit data. + */ + union { + struct { + __be32 imm_data32; + u32 reserved; + } ib_imm_data; + __be64 imm_data64; + } iw_imm_data; __be32 plen; __be32 stag_sink; __be64 to_sink; @@ -568,6 +595,37 @@ struct fw_ri_send_wr { #define FW_RI_SEND_WR_SENDOP_G(x) \ (((x) >> FW_RI_SEND_WR_SENDOP_S) & FW_RI_SEND_WR_SENDOP_M) +struct fw_ri_rdma_write_cmpl_wr { + __u8 opcode; + __u8 flags; + __u16 wrid; + __u8 r1[3]; + __u8 len16; + __u8 r2; + __u8 flags_send; + __u16 wrid_send; + __be32 stag_inv; + __be32 plen; + __be32 stag_sink; + __be64 to_sink; + union fw_ri_cmpl { + struct fw_ri_immd_cmpl { + __u8 op; + __u8 r1[6]; + __u8 immdlen; + __u8 data[16]; + } immd_src; + struct fw_ri_isgl isgl_src; + } u_cmpl; + __be64 r3; +#ifndef C99_NOT_SUPPORTED + union fw_ri_write { + struct fw_ri_immd immd_src[0]; + struct fw_ri_isgl isgl_src[0]; + } u; +#endif +}; + struct fw_ri_rdma_read_wr { __u8 opcode; __u8 flags; @@ -707,6 +765,10 @@ enum fw_ri_init_p2ptype { FW_RI_INIT_P2PTYPE_DISABLED = 0xf, }; +enum fw_ri_init_rqeqid_srq { + FW_RI_INIT_RQEQID_SRQ = 1 << 31, +}; + struct fw_ri_wr { __be32 op_compl; __be32 flowid_len16; diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 6deb101cdd43..2c19bf772451 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -8143,8 +8143,15 @@ static void is_sdma_eng_int(struct hfi1_devdata *dd, unsigned int source) } } -/* +/** + * is_rcv_avail_int() - User receive context available IRQ handler + * @dd: valid dd + * @source: logical IRQ source (offset from IS_RCVAVAIL_START) + * * RX block receive available interrupt. Source is < 160. + * + * This is the general interrupt handler for user (PSM) receive contexts, + * and can only be used for non-threaded IRQs. */ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source) { @@ -8154,12 +8161,7 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source) if (likely(source < dd->num_rcv_contexts)) { rcd = hfi1_rcd_get_by_index(dd, source); if (rcd) { - /* Check for non-user contexts, including vnic */ - if (source < dd->first_dyn_alloc_ctxt || rcd->is_vnic) - rcd->do_interrupt(rcd, 0); - else - handle_user_interrupt(rcd); - + handle_user_interrupt(rcd); hfi1_rcd_put(rcd); return; /* OK */ } @@ -8173,8 +8175,14 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source) err_detail, source); } -/* +/** + * is_rcv_urgent_int() - User receive context urgent IRQ handler + * @dd: valid dd + * @source: logical IRQ source (ofse from IS_RCVURGENT_START) + * * RX block receive urgent interrupt. Source is < 160. + * + * NOTE: kernel receive contexts specifically do NOT enable this IRQ. */ static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source) { @@ -8184,11 +8192,7 @@ static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source) if (likely(source < dd->num_rcv_contexts)) { rcd = hfi1_rcd_get_by_index(dd, source); if (rcd) { - /* only pay attention to user urgent interrupts */ - if (source >= dd->first_dyn_alloc_ctxt && - !rcd->is_vnic) - handle_user_interrupt(rcd); - + handle_user_interrupt(rcd); hfi1_rcd_put(rcd); return; /* OK */ } @@ -8260,9 +8264,14 @@ static void is_interrupt(struct hfi1_devdata *dd, unsigned int source) dd_dev_err(dd, "invalid interrupt source %u\n", source); } -/* - * General interrupt handler. This is able to correctly handle - * all interrupts in case INTx is used. +/** + * gerneral_interrupt() - General interrupt handler + * @irq: MSIx IRQ vector + * @data: hfi1 devdata + * + * This is able to correctly handle all non-threaded interrupts. Receive + * context DATA IRQs are threaded and are not supported by this handler. + * */ static irqreturn_t general_interrupt(int irq, void *data) { @@ -10130,7 +10139,7 @@ static void set_lidlmc(struct hfi1_pportdata *ppd) (((lid & mask) & SEND_CTXT_CHECK_SLID_VALUE_MASK) << SEND_CTXT_CHECK_SLID_VALUE_SHIFT); - for (i = 0; i < dd->chip_send_contexts; i++) { + for (i = 0; i < chip_send_contexts(dd); i++) { hfi1_cdbg(LINKVERB, "SendContext[%d].SLID_CHECK = 0x%x", i, (u32)sreg); write_kctxt_csr(dd, i, SEND_CTXT_CHECK_SLID, sreg); @@ -11857,7 +11866,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, * sequence numbers could land exactly on the same spot. * E.g. a rcd restart before the receive header wrapped. */ - memset(rcd->rcvhdrq, 0, rcd->rcvhdrq_size); + memset(rcd->rcvhdrq, 0, rcvhdrq_size(rcd)); /* starting timeout */ rcd->rcvavail_timeout = dd->rcv_intr_timeout_csr; @@ -11952,9 +11961,8 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK; if (op & HFI1_RCVCTRL_NO_EGR_DROP_DIS) rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK; - rcd->rcvctrl = rcvctrl; hfi1_cdbg(RCVCTRL, "ctxt %d rcvctrl 0x%llx\n", ctxt, rcvctrl); - write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcd->rcvctrl); + write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcvctrl); /* work around sticky RcvCtxtStatus.BlockedRHQFull */ if (did_enable && @@ -12042,7 +12050,7 @@ u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp) } else if (entry->flags & CNTR_SDMA) { hfi1_cdbg(CNTR, "\t Per SDMA Engine\n"); - for (j = 0; j < dd->chip_sdma_engines; + for (j = 0; j < chip_sdma_engines(dd); j++) { val = entry->rw_cntr(entry, dd, j, @@ -12418,6 +12426,7 @@ static int init_cntrs(struct hfi1_devdata *dd) struct hfi1_pportdata *ppd; const char *bit_type_32 = ",32"; const int bit_type_32_sz = strlen(bit_type_32); + u32 sdma_engines = chip_sdma_engines(dd); /* set up the stats timer; the add_timer is done at the end */ timer_setup(&dd->synth_stats_timer, update_synth_timer, 0); @@ -12450,7 +12459,7 @@ static int init_cntrs(struct hfi1_devdata *dd) } } else if (dev_cntrs[i].flags & CNTR_SDMA) { dev_cntrs[i].offset = dd->ndevcntrs; - for (j = 0; j < dd->chip_sdma_engines; j++) { + for (j = 0; j < sdma_engines; j++) { snprintf(name, C_MAX_NAME, "%s%d", dev_cntrs[i].name, j); sz += strlen(name); @@ -12507,7 +12516,7 @@ static int init_cntrs(struct hfi1_devdata *dd) *p++ = '\n'; } } else if (dev_cntrs[i].flags & CNTR_SDMA) { - for (j = 0; j < dd->chip_sdma_engines; j++) { + for (j = 0; j < sdma_engines; j++) { snprintf(name, C_MAX_NAME, "%s%d", dev_cntrs[i].name, j); memcpy(p, name, strlen(name)); @@ -13020,9 +13029,9 @@ static void clear_all_interrupts(struct hfi1_devdata *dd) write_csr(dd, SEND_PIO_ERR_CLEAR, ~(u64)0); write_csr(dd, SEND_DMA_ERR_CLEAR, ~(u64)0); write_csr(dd, SEND_EGRESS_ERR_CLEAR, ~(u64)0); - for (i = 0; i < dd->chip_send_contexts; i++) + for (i = 0; i < chip_send_contexts(dd); i++) write_kctxt_csr(dd, i, SEND_CTXT_ERR_CLEAR, ~(u64)0); - for (i = 0; i < dd->chip_sdma_engines; i++) + for (i = 0; i < chip_sdma_engines(dd); i++) write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_CLEAR, ~(u64)0); write_csr(dd, DCC_ERR_FLG_CLR, ~(u64)0); @@ -13030,48 +13039,30 @@ static void clear_all_interrupts(struct hfi1_devdata *dd) write_csr(dd, DC_DC8051_ERR_CLR, ~(u64)0); } -/* Move to pcie.c? */ -static void disable_intx(struct pci_dev *pdev) -{ - pci_intx(pdev, 0); -} - /** * hfi1_clean_up_interrupts() - Free all IRQ resources * @dd: valid device data data structure * - * Free the MSI or INTx IRQs and assoicated PCI resources, - * if they have been allocated. + * Free the MSIx and assoicated PCI resources, if they have been allocated. */ void hfi1_clean_up_interrupts(struct hfi1_devdata *dd) { int i; + struct hfi1_msix_entry *me = dd->msix_entries; /* remove irqs - must happen before disabling/turning off */ - if (dd->num_msix_entries) { - /* MSI-X */ - struct hfi1_msix_entry *me = dd->msix_entries; - - for (i = 0; i < dd->num_msix_entries; i++, me++) { - if (!me->arg) /* => no irq, no affinity */ - continue; - hfi1_put_irq_affinity(dd, me); - pci_free_irq(dd->pcidev, i, me->arg); - } - - /* clean structures */ - kfree(dd->msix_entries); - dd->msix_entries = NULL; - dd->num_msix_entries = 0; - } else { - /* INTx */ - if (dd->requested_intx_irq) { - pci_free_irq(dd->pcidev, 0, dd); - dd->requested_intx_irq = 0; - } - disable_intx(dd->pcidev); + for (i = 0; i < dd->num_msix_entries; i++, me++) { + if (!me->arg) /* => no irq, no affinity */ + continue; + hfi1_put_irq_affinity(dd, me); + pci_free_irq(dd->pcidev, i, me->arg); } + /* clean structures */ + kfree(dd->msix_entries); + dd->msix_entries = NULL; + dd->num_msix_entries = 0; + pci_free_irq_vectors(dd->pcidev); } @@ -13121,20 +13112,6 @@ static void remap_sdma_interrupts(struct hfi1_devdata *dd, msix_intr); } -static int request_intx_irq(struct hfi1_devdata *dd) -{ - int ret; - - ret = pci_request_irq(dd->pcidev, 0, general_interrupt, NULL, dd, - DRIVER_NAME "_%d", dd->unit); - if (ret) - dd_dev_err(dd, "unable to request INTx interrupt, err %d\n", - ret); - else - dd->requested_intx_irq = 1; - return ret; -} - static int request_msix_irqs(struct hfi1_devdata *dd) { int first_general, last_general; @@ -13253,11 +13230,6 @@ void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd) { int i; - if (!dd->num_msix_entries) { - synchronize_irq(pci_irq_vector(dd->pcidev, 0)); - return; - } - for (i = 0; i < dd->vnic.num_ctxt; i++) { struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i]; struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr]; @@ -13346,7 +13318,6 @@ static int set_up_interrupts(struct hfi1_devdata *dd) { u32 total; int ret, request; - int single_interrupt = 0; /* we expect to have all the interrupts */ /* * Interrupt count: @@ -13363,17 +13334,6 @@ static int set_up_interrupts(struct hfi1_devdata *dd) if (request < 0) { ret = request; goto fail; - } else if (request == 0) { - /* using INTx */ - /* dd->num_msix_entries already zero */ - single_interrupt = 1; - dd_dev_err(dd, "MSI-X failed, using INTx interrupts\n"); - } else if (request < total) { - /* using MSI-X, with reduced interrupts */ - dd_dev_err(dd, "reduced interrupt found, wanted %u, got %u\n", - total, request); - ret = -EINVAL; - goto fail; } else { dd->msix_entries = kcalloc(total, sizeof(*dd->msix_entries), GFP_KERNEL); @@ -13394,10 +13354,7 @@ static int set_up_interrupts(struct hfi1_devdata *dd) /* reset general handler mask, chip MSI-X mappings */ reset_interrupts(dd); - if (single_interrupt) - ret = request_intx_irq(dd); - else - ret = request_msix_irqs(dd); + ret = request_msix_irqs(dd); if (ret) goto fail; @@ -13429,6 +13386,8 @@ static int set_up_context_variables(struct hfi1_devdata *dd) int qos_rmt_count; int user_rmt_reduced; u32 n_usr_ctxts; + u32 send_contexts = chip_send_contexts(dd); + u32 rcv_contexts = chip_rcv_contexts(dd); /* * Kernel receive contexts: @@ -13450,16 +13409,16 @@ static int set_up_context_variables(struct hfi1_devdata *dd) * Every kernel receive context needs an ACK send context. * one send context is allocated for each VL{0-7} and VL15 */ - if (num_kernel_contexts > (dd->chip_send_contexts - num_vls - 1)) { + if (num_kernel_contexts > (send_contexts - num_vls - 1)) { dd_dev_err(dd, "Reducing # kernel rcv contexts to: %d, from %lu\n", - (int)(dd->chip_send_contexts - num_vls - 1), + send_contexts - num_vls - 1, num_kernel_contexts); - num_kernel_contexts = dd->chip_send_contexts - num_vls - 1; + num_kernel_contexts = send_contexts - num_vls - 1; } /* Accommodate VNIC contexts if possible */ - if ((num_kernel_contexts + num_vnic_contexts) > dd->chip_rcv_contexts) { + if ((num_kernel_contexts + num_vnic_contexts) > rcv_contexts) { dd_dev_err(dd, "No receive contexts available for VNIC\n"); num_vnic_contexts = 0; } @@ -13477,13 +13436,13 @@ static int set_up_context_variables(struct hfi1_devdata *dd) /* * Adjust the counts given a global max. */ - if (total_contexts + n_usr_ctxts > dd->chip_rcv_contexts) { + if (total_contexts + n_usr_ctxts > rcv_contexts) { dd_dev_err(dd, "Reducing # user receive contexts to: %d, from %u\n", - (int)(dd->chip_rcv_contexts - total_contexts), + rcv_contexts - total_contexts, n_usr_ctxts); /* recalculate */ - n_usr_ctxts = dd->chip_rcv_contexts - total_contexts; + n_usr_ctxts = rcv_contexts - total_contexts; } /* each user context requires an entry in the RMT */ @@ -13509,7 +13468,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd) dd->freectxts = n_usr_ctxts; dd_dev_info(dd, "rcv contexts: chip %d, used %d (kernel %d, vnic %u, user %u)\n", - (int)dd->chip_rcv_contexts, + rcv_contexts, (int)dd->num_rcv_contexts, (int)dd->n_krcv_queues, dd->num_vnic_contexts, @@ -13527,7 +13486,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd) * contexts. */ dd->rcv_entries.group_size = RCV_INCREMENT; - ngroups = dd->chip_rcv_array_count / dd->rcv_entries.group_size; + ngroups = chip_rcv_array_count(dd) / dd->rcv_entries.group_size; dd->rcv_entries.ngroups = ngroups / dd->num_rcv_contexts; dd->rcv_entries.nctxt_extra = ngroups - (dd->num_rcv_contexts * dd->rcv_entries.ngroups); @@ -13552,7 +13511,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd) dd_dev_info( dd, "send contexts: chip %d, used %d (kernel %d, ack %d, user %d, vl15 %d)\n", - dd->chip_send_contexts, + send_contexts, dd->num_send_contexts, dd->sc_sizes[SC_KERNEL].count, dd->sc_sizes[SC_ACK].count, @@ -13610,7 +13569,7 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd) write_csr(dd, CCE_INT_MAP + (8 * i), 0); /* SendCtxtCreditReturnAddr */ - for (i = 0; i < dd->chip_send_contexts; i++) + for (i = 0; i < chip_send_contexts(dd); i++) write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_RETURN_ADDR, 0); /* PIO Send buffers */ @@ -13623,7 +13582,7 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd) /* RcvHdrAddr */ /* RcvHdrTailAddr */ /* RcvTidFlowTable */ - for (i = 0; i < dd->chip_rcv_contexts; i++) { + for (i = 0; i < chip_rcv_contexts(dd); i++) { write_kctxt_csr(dd, i, RCV_HDR_ADDR, 0); write_kctxt_csr(dd, i, RCV_HDR_TAIL_ADDR, 0); for (j = 0; j < RXE_NUM_TID_FLOWS; j++) @@ -13631,7 +13590,7 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd) } /* RcvArray */ - for (i = 0; i < dd->chip_rcv_array_count; i++) + for (i = 0; i < chip_rcv_array_count(dd); i++) hfi1_put_tid(dd, i, PT_INVALID_FLUSH, 0, 0); /* RcvQPMapTable */ @@ -13789,7 +13748,7 @@ static void reset_txe_csrs(struct hfi1_devdata *dd) write_csr(dd, SEND_LOW_PRIORITY_LIST + (8 * i), 0); for (i = 0; i < VL_ARB_HIGH_PRIO_TABLE_SIZE; i++) write_csr(dd, SEND_HIGH_PRIORITY_LIST + (8 * i), 0); - for (i = 0; i < dd->chip_send_contexts / NUM_CONTEXTS_PER_SET; i++) + for (i = 0; i < chip_send_contexts(dd) / NUM_CONTEXTS_PER_SET; i++) write_csr(dd, SEND_CONTEXT_SET_CTRL + (8 * i), 0); for (i = 0; i < TXE_NUM_32_BIT_COUNTER; i++) write_csr(dd, SEND_COUNTER_ARRAY32 + (8 * i), 0); @@ -13817,7 +13776,7 @@ static void reset_txe_csrs(struct hfi1_devdata *dd) /* * TXE Per-Context CSRs */ - for (i = 0; i < dd->chip_send_contexts; i++) { + for (i = 0; i < chip_send_contexts(dd); i++) { write_kctxt_csr(dd, i, SEND_CTXT_CTRL, 0); write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_CTRL, 0); write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_RETURN_ADDR, 0); @@ -13835,7 +13794,7 @@ static void reset_txe_csrs(struct hfi1_devdata *dd) /* * TXE Per-SDMA CSRs */ - for (i = 0; i < dd->chip_sdma_engines; i++) { + for (i = 0; i < chip_sdma_engines(dd); i++) { write_kctxt_csr(dd, i, SEND_DMA_CTRL, 0); /* SEND_DMA_STATUS read-only */ write_kctxt_csr(dd, i, SEND_DMA_BASE_ADDR, 0); @@ -13968,7 +13927,7 @@ static void reset_rxe_csrs(struct hfi1_devdata *dd) /* * RXE Kernel and User Per-Context CSRs */ - for (i = 0; i < dd->chip_rcv_contexts; i++) { + for (i = 0; i < chip_rcv_contexts(dd); i++) { /* kernel */ write_kctxt_csr(dd, i, RCV_CTXT_CTRL, 0); /* RCV_CTXT_STATUS read-only */ @@ -14084,13 +14043,13 @@ static int init_chip(struct hfi1_devdata *dd) /* disable send contexts and SDMA engines */ write_csr(dd, SEND_CTRL, 0); - for (i = 0; i < dd->chip_send_contexts; i++) + for (i = 0; i < chip_send_contexts(dd); i++) write_kctxt_csr(dd, i, SEND_CTXT_CTRL, 0); - for (i = 0; i < dd->chip_sdma_engines; i++) + for (i = 0; i < chip_sdma_engines(dd); i++) write_kctxt_csr(dd, i, SEND_DMA_CTRL, 0); /* disable port (turn off RXE inbound traffic) and contexts */ write_csr(dd, RCV_CTRL, 0); - for (i = 0; i < dd->chip_rcv_contexts; i++) + for (i = 0; i < chip_rcv_contexts(dd); i++) write_csr(dd, RCV_CTXT_CTRL, 0); /* mask all interrupt sources */ for (i = 0; i < CCE_NUM_INT_CSRS; i++) @@ -14709,9 +14668,9 @@ static void init_txe(struct hfi1_devdata *dd) write_csr(dd, SEND_EGRESS_ERR_MASK, ~0ull); /* enable all per-context and per-SDMA engine errors */ - for (i = 0; i < dd->chip_send_contexts; i++) + for (i = 0; i < chip_send_contexts(dd); i++) write_kctxt_csr(dd, i, SEND_CTXT_ERR_MASK, ~0ull); - for (i = 0; i < dd->chip_sdma_engines; i++) + for (i = 0; i < chip_sdma_engines(dd); i++) write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_MASK, ~0ull); /* set the local CU to AU mapping */ @@ -14979,11 +14938,13 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, "Functional simulator" }; struct pci_dev *parent = pdev->bus->self; + u32 sdma_engines; dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS * sizeof(struct hfi1_pportdata)); if (IS_ERR(dd)) goto bail; + sdma_engines = chip_sdma_engines(dd); ppd = dd->pport; for (i = 0; i < dd->num_pports; i++, ppd++) { int vl; @@ -15081,11 +15042,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, /* give a reasonable active value, will be set on link up */ dd->pport->link_speed_active = OPA_LINK_SPEED_25G; - dd->chip_rcv_contexts = read_csr(dd, RCV_CONTEXTS); - dd->chip_send_contexts = read_csr(dd, SEND_CONTEXTS); - dd->chip_sdma_engines = read_csr(dd, SEND_DMA_ENGINES); - dd->chip_pio_mem_size = read_csr(dd, SEND_PIO_MEM_SIZE); - dd->chip_sdma_mem_size = read_csr(dd, SEND_DMA_MEM_SIZE); /* fix up link widths for emulation _p */ ppd = dd->pport; if (dd->icode == ICODE_FPGA_EMULATION && is_emulator_p(dd)) { @@ -15096,11 +15052,11 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, OPA_LINK_WIDTH_1X; } /* insure num_vls isn't larger than number of sdma engines */ - if (HFI1_CAP_IS_KSET(SDMA) && num_vls > dd->chip_sdma_engines) { + if (HFI1_CAP_IS_KSET(SDMA) && num_vls > sdma_engines) { dd_dev_err(dd, "num_vls %u too large, using %u VLs\n", - num_vls, dd->chip_sdma_engines); - num_vls = dd->chip_sdma_engines; - ppd->vls_supported = dd->chip_sdma_engines; + num_vls, sdma_engines); + num_vls = sdma_engines; + ppd->vls_supported = sdma_engines; ppd->vls_operational = ppd->vls_supported; } @@ -15216,13 +15172,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, */ aspm_init(dd); - dd->rcvhdrsize = DEFAULT_RCVHDRSIZE; - /* - * rcd[0] is guaranteed to be valid by this point. Also, all - * context are using the same value, as per the module parameter. - */ - dd->rhf_offset = dd->rcd[0]->rcvhdrqentsize - sizeof(u64) / sizeof(u32); - ret = init_pervl_scs(dd); if (ret) goto bail_cleanup; diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index fdf389e46e19..36b04d6300e5 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -656,6 +656,36 @@ static inline void write_uctxt_csr(struct hfi1_devdata *dd, int ctxt, write_csr(dd, offset0 + (0x1000 * ctxt), value); } +static inline u32 chip_rcv_contexts(struct hfi1_devdata *dd) +{ + return read_csr(dd, RCV_CONTEXTS); +} + +static inline u32 chip_send_contexts(struct hfi1_devdata *dd) +{ + return read_csr(dd, SEND_CONTEXTS); +} + +static inline u32 chip_sdma_engines(struct hfi1_devdata *dd) +{ + return read_csr(dd, SEND_DMA_ENGINES); +} + +static inline u32 chip_pio_mem_size(struct hfi1_devdata *dd) +{ + return read_csr(dd, SEND_PIO_MEM_SIZE); +} + +static inline u32 chip_sdma_mem_size(struct hfi1_devdata *dd) +{ + return read_csr(dd, SEND_DMA_MEM_SIZE); +} + +static inline u32 chip_rcv_array_count(struct hfi1_devdata *dd) +{ + return read_csr(dd, RCV_ARRAY_CNT); +} + u64 create_pbc(struct hfi1_pportdata *ppd, u64 flags, int srate_mbs, u32 vl, u32 dw_len); diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 94dca95db04f..a41f85558312 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -208,25 +208,25 @@ static inline void *get_egrbuf(const struct hfi1_ctxtdata *rcd, u64 rhf, (offset * RCV_BUF_BLOCK_SIZE)); } -static inline void *hfi1_get_header(struct hfi1_devdata *dd, +static inline void *hfi1_get_header(struct hfi1_ctxtdata *rcd, __le32 *rhf_addr) { u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr)); - return (void *)(rhf_addr - dd->rhf_offset + offset); + return (void *)(rhf_addr - rcd->rhf_offset + offset); } -static inline struct ib_header *hfi1_get_msgheader(struct hfi1_devdata *dd, +static inline struct ib_header *hfi1_get_msgheader(struct hfi1_ctxtdata *rcd, __le32 *rhf_addr) { - return (struct ib_header *)hfi1_get_header(dd, rhf_addr); + return (struct ib_header *)hfi1_get_header(rcd, rhf_addr); } static inline struct hfi1_16b_header - *hfi1_get_16B_header(struct hfi1_devdata *dd, + *hfi1_get_16B_header(struct hfi1_ctxtdata *rcd, __le32 *rhf_addr) { - return (struct hfi1_16b_header *)hfi1_get_header(dd, rhf_addr); + return (struct hfi1_16b_header *)hfi1_get_header(rcd, rhf_addr); } /* @@ -591,13 +591,12 @@ static void __prescan_rxq(struct hfi1_packet *packet) init_ps_mdata(&mdata, packet); while (1) { - struct hfi1_devdata *dd = rcd->dd; struct hfi1_ibport *ibp = rcd_to_iport(rcd); __le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head + - dd->rhf_offset; + packet->rcd->rhf_offset; struct rvt_qp *qp; struct ib_header *hdr; - struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; + struct rvt_dev_info *rdi = &rcd->dd->verbs_dev.rdi; u64 rhf = rhf_to_cpu(rhf_addr); u32 etype = rhf_rcv_type(rhf), qpn, bth1; int is_ecn = 0; @@ -612,7 +611,7 @@ static void __prescan_rxq(struct hfi1_packet *packet) if (etype != RHF_RCV_TYPE_IB) goto next; - packet->hdr = hfi1_get_msgheader(dd, rhf_addr); + packet->hdr = hfi1_get_msgheader(packet->rcd, rhf_addr); hdr = packet->hdr; lnh = ib_get_lnh(hdr); @@ -718,7 +717,7 @@ static noinline int skip_rcv_packet(struct hfi1_packet *packet, int thread) ret = check_max_packet(packet, thread); packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff + - packet->rcd->dd->rhf_offset; + packet->rcd->rhf_offset; packet->rhf = rhf_to_cpu(packet->rhf_addr); return ret; @@ -757,7 +756,7 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread) * crashing down. There is no need to eat another * comparison in this performance critical code. */ - packet->rcd->dd->rhf_rcv_function_map[packet->etype](packet); + packet->rcd->rhf_rcv_function_map[packet->etype](packet); packet->numpkt++; /* Set up for the next packet */ @@ -768,7 +767,7 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread) ret = check_max_packet(packet, thread); packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff + - packet->rcd->dd->rhf_offset; + packet->rcd->rhf_offset; packet->rhf = rhf_to_cpu(packet->rhf_addr); return ret; @@ -949,12 +948,12 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd, u8 sc = SC15_PACKET; if (etype == RHF_RCV_TYPE_IB) { - struct ib_header *hdr = hfi1_get_msgheader(packet->rcd->dd, + struct ib_header *hdr = hfi1_get_msgheader(packet->rcd, packet->rhf_addr); sc = hfi1_9B_get_sc5(hdr, packet->rhf); } else if (etype == RHF_RCV_TYPE_BYPASS) { struct hfi1_16b_header *hdr = hfi1_get_16B_header( - packet->rcd->dd, + packet->rcd, packet->rhf_addr); sc = hfi1_16B_get_sc(hdr); } @@ -1034,7 +1033,7 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread) packet.rhqoff += packet.rsize; packet.rhf_addr = (__le32 *)rcd->rcvhdrq + packet.rhqoff + - dd->rhf_offset; + rcd->rhf_offset; packet.rhf = rhf_to_cpu(packet.rhf_addr); } else if (skip_pkt) { @@ -1384,7 +1383,7 @@ bail: static inline void hfi1_setup_ib_header(struct hfi1_packet *packet) { packet->hdr = (struct hfi1_ib_message_header *) - hfi1_get_msgheader(packet->rcd->dd, + hfi1_get_msgheader(packet->rcd, packet->rhf_addr); packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr; } @@ -1485,7 +1484,7 @@ static int hfi1_setup_bypass_packet(struct hfi1_packet *packet) u8 l4; packet->hdr = (struct hfi1_16b_header *) - hfi1_get_16B_header(packet->rcd->dd, + hfi1_get_16B_header(packet->rcd, packet->rhf_addr); l4 = hfi1_16B_get_l4(packet->hdr); if (l4 == OPA_16B_L4_IB_LOCAL) { @@ -1575,7 +1574,7 @@ void handle_eflags(struct hfi1_packet *packet) * The following functions are called by the interrupt handler. They are type * specific handlers for each packet type. */ -int process_receive_ib(struct hfi1_packet *packet) +static int process_receive_ib(struct hfi1_packet *packet) { if (hfi1_setup_9B_packet(packet)) return RHF_RCV_CONTINUE; @@ -1607,7 +1606,7 @@ static inline bool hfi1_is_vnic_packet(struct hfi1_packet *packet) return false; } -int process_receive_bypass(struct hfi1_packet *packet) +static int process_receive_bypass(struct hfi1_packet *packet) { struct hfi1_devdata *dd = packet->rcd->dd; @@ -1649,7 +1648,7 @@ int process_receive_bypass(struct hfi1_packet *packet) return RHF_RCV_CONTINUE; } -int process_receive_error(struct hfi1_packet *packet) +static int process_receive_error(struct hfi1_packet *packet) { /* KHdrHCRCErr -- KDETH packet with a bad HCRC */ if (unlikely( @@ -1668,7 +1667,7 @@ int process_receive_error(struct hfi1_packet *packet) return RHF_RCV_CONTINUE; } -int kdeth_process_expected(struct hfi1_packet *packet) +static int kdeth_process_expected(struct hfi1_packet *packet) { hfi1_setup_9B_packet(packet); if (unlikely(hfi1_dbg_should_fault_rx(packet))) @@ -1682,7 +1681,7 @@ int kdeth_process_expected(struct hfi1_packet *packet) return RHF_RCV_CONTINUE; } -int kdeth_process_eager(struct hfi1_packet *packet) +static int kdeth_process_eager(struct hfi1_packet *packet) { hfi1_setup_9B_packet(packet); if (unlikely(hfi1_dbg_should_fault_rx(packet))) @@ -1695,7 +1694,7 @@ int kdeth_process_eager(struct hfi1_packet *packet) return RHF_RCV_CONTINUE; } -int process_receive_invalid(struct hfi1_packet *packet) +static int process_receive_invalid(struct hfi1_packet *packet) { dd_dev_err(packet->rcd->dd, "Invalid packet type %d. Dropping\n", rhf_rcv_type(packet->rhf)); @@ -1719,9 +1718,8 @@ void seqfile_dump_rcd(struct seq_file *s, struct hfi1_ctxtdata *rcd) init_ps_mdata(&mdata, &packet); while (1) { - struct hfi1_devdata *dd = rcd->dd; __le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head + - dd->rhf_offset; + rcd->rhf_offset; struct ib_header *hdr; u64 rhf = rhf_to_cpu(rhf_addr); u32 etype = rhf_rcv_type(rhf), qpn; @@ -1738,7 +1736,7 @@ void seqfile_dump_rcd(struct seq_file *s, struct hfi1_ctxtdata *rcd) if (etype > RHF_RCV_TYPE_IB) goto next; - packet.hdr = hfi1_get_msgheader(dd, rhf_addr); + packet.hdr = hfi1_get_msgheader(rcd, rhf_addr); hdr = packet.hdr; lnh = be16_to_cpu(hdr->lrh[0]) & 3; @@ -1760,3 +1758,14 @@ next: update_ps_mdata(&mdata, rcd); } } + +const rhf_rcv_function_ptr normal_rhf_rcv_functions[] = { + [RHF_RCV_TYPE_EXPECTED] = kdeth_process_expected, + [RHF_RCV_TYPE_EAGER] = kdeth_process_eager, + [RHF_RCV_TYPE_IB] = process_receive_ib, + [RHF_RCV_TYPE_ERROR] = process_receive_error, + [RHF_RCV_TYPE_BYPASS] = process_receive_bypass, + [RHF_RCV_TYPE_INVALID5] = process_receive_invalid, + [RHF_RCV_TYPE_INVALID6] = process_receive_invalid, + [RHF_RCV_TYPE_INVALID7] = process_receive_invalid, +}; diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 0fc4aa9455c3..1fc75647e47b 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -411,7 +411,7 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) mapio = 1; break; case RCV_HDRQ: - memlen = uctxt->rcvhdrq_size; + memlen = rcvhdrq_size(uctxt); memvirt = uctxt->rcvhdrq; break; case RCV_EGRBUF: { @@ -521,7 +521,7 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) break; case SUBCTXT_RCV_HDRQ: memaddr = (u64)uctxt->subctxt_rcvhdr_base; - memlen = uctxt->rcvhdrq_size * uctxt->subctxt_cnt; + memlen = rcvhdrq_size(uctxt) * uctxt->subctxt_cnt; flags |= VM_IO | VM_DONTEXPAND; vmf = 1; break; @@ -985,7 +985,11 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, * sub contexts. * This has to be done here so the rest of the sub-contexts find the * proper base context. + * NOTE: _set_bit() can be used here because the context creation is + * protected by the mutex (rather than the spin_lock), and will be the + * very first instance of this context. */ + __set_bit(0, uctxt->in_use_ctxts); if (uinfo->subctxt_cnt) init_subctxts(uctxt, uinfo); uctxt->userversion = uinfo->userversion; @@ -1040,7 +1044,7 @@ static int setup_subctxt(struct hfi1_ctxtdata *uctxt) return -ENOMEM; /* We can take the size of the RcvHdr Queue from the master */ - uctxt->subctxt_rcvhdr_base = vmalloc_user(uctxt->rcvhdrq_size * + uctxt->subctxt_rcvhdr_base = vmalloc_user(rcvhdrq_size(uctxt) * num_subctxts); if (!uctxt->subctxt_rcvhdr_base) { ret = -ENOMEM; diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 4ab8b5bfbed1..d9470317983f 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -169,12 +169,6 @@ extern const struct pci_error_handlers hfi1_pci_err_handler; struct hfi1_opcode_stats_perctx; struct ctxt_eager_bufs { - ssize_t size; /* total size of eager buffers */ - u32 count; /* size of buffers array */ - u32 numbufs; /* number of buffers allocated */ - u32 alloced; /* number of rcvarray entries used */ - u32 rcvtid_size; /* size of each eager rcv tid */ - u32 threshold; /* head update threshold */ struct eager_buffer { void *addr; dma_addr_t dma; @@ -184,6 +178,12 @@ struct ctxt_eager_bufs { void *addr; dma_addr_t dma; } *rcvtids; + u32 size; /* total size of eager buffers */ + u32 rcvtid_size; /* size of each eager rcv tid */ + u16 count; /* size of buffers array */ + u16 numbufs; /* number of buffers allocated */ + u16 alloced; /* number of rcvarray entries used */ + u16 threshold; /* head update threshold */ }; struct exp_tid_set { @@ -191,43 +191,84 @@ struct exp_tid_set { u32 count; }; +typedef int (*rhf_rcv_function_ptr)(struct hfi1_packet *packet); struct hfi1_ctxtdata { - /* shadow the ctxt's RcvCtrl register */ - u64 rcvctrl; /* rcvhdrq base, needs mmap before useful */ void *rcvhdrq; /* kernel virtual address where hdrqtail is updated */ volatile __le64 *rcvhdrtail_kvaddr; - /* when waiting for rcv or pioavail */ - wait_queue_head_t wait; - /* rcvhdrq size (for freeing) */ - size_t rcvhdrq_size; + /* so functions that need physical port can get it easily */ + struct hfi1_pportdata *ppd; + /* so file ops can get at unit */ + struct hfi1_devdata *dd; + /* this receive context's assigned PIO ACK send context */ + struct send_context *sc; + /* per context recv functions */ + const rhf_rcv_function_ptr *rhf_rcv_function_map; + /* + * The interrupt handler for a particular receive context can vary + * throughout it's lifetime. This is not a lock protected data member so + * it must be updated atomically and the prev and new value must always + * be valid. Worst case is we process an extra interrupt and up to 64 + * packets with the wrong interrupt handler. + */ + int (*do_interrupt)(struct hfi1_ctxtdata *rcd, int threaded); + /* verbs rx_stats per rcd */ + struct hfi1_opcode_stats_perctx *opstats; + /* clear interrupt mask */ + u64 imask; + /* ctxt rcvhdrq head offset */ + u32 head; /* number of rcvhdrq entries */ u16 rcvhdrq_cnt; + u8 ireg; /* clear interrupt register */ + /* receive packet sequence counter */ + u8 seq_cnt; /* size of each of the rcvhdrq entries */ - u16 rcvhdrqentsize; + u8 rcvhdrqentsize; + /* offset of RHF within receive header entry */ + u8 rhf_offset; + /* dynamic receive available interrupt timeout */ + u8 rcvavail_timeout; + /* Indicates that this is vnic context */ + bool is_vnic; + /* vnic queue index this context is mapped to */ + u8 vnic_q_idx; + /* Is ASPM interrupt supported for this context */ + bool aspm_intr_supported; + /* ASPM state (enabled/disabled) for this context */ + bool aspm_enabled; + /* Is ASPM processing enabled for this context (in intr context) */ + bool aspm_intr_enable; + struct ctxt_eager_bufs egrbufs; + /* QPs waiting for context processing */ + struct list_head qp_wait_list; + /* tid allocation lists */ + struct exp_tid_set tid_group_list; + struct exp_tid_set tid_used_list; + struct exp_tid_set tid_full_list; + + /* Timer for re-enabling ASPM if interrupt activity quiets down */ + struct timer_list aspm_timer; + /* per-context configuration flags */ + unsigned long flags; + /* array of tid_groups */ + struct tid_group *groups; /* mmap of hdrq, must fit in 44 bits */ dma_addr_t rcvhdrq_dma; dma_addr_t rcvhdrqtailaddr_dma; - struct ctxt_eager_bufs egrbufs; - /* this receive context's assigned PIO ACK send context */ - struct send_context *sc; - - /* dynamic receive available interrupt timeout */ - u32 rcvavail_timeout; + /* Last interrupt timestamp */ + ktime_t aspm_ts_last_intr; + /* Last timestamp at which we scheduled a timer for this context */ + ktime_t aspm_ts_timer_sched; + /* Lock to serialize between intr, timer intr and user threads */ + spinlock_t aspm_lock; /* Reference count the base context usage */ struct kref kref; - - /* Device context index */ - u16 ctxt; - /* - * non-zero if ctxt can be shared, and defines the maximum number of - * sub-contexts for this device context. - */ - u16 subctxt_cnt; - /* non-zero if ctxt is being shared. */ - u16 subctxt_id; - u8 uuid[16]; + /* numa node of this context */ + int numa_id; + /* associated msix interrupt. */ + s16 msix_intr; /* job key */ u16 jkey; /* number of RcvArray groups for this context. */ @@ -238,87 +279,59 @@ struct hfi1_ctxtdata { u16 expected_count; /* index of first expected TID entry. */ u16 expected_base; - /* array of tid_groups */ - struct tid_group *groups; - - struct exp_tid_set tid_group_list; - struct exp_tid_set tid_used_list; - struct exp_tid_set tid_full_list; + /* Device context index */ + u8 ctxt; - /* lock protecting all Expected TID data of user contexts */ + /* PSM Specific fields */ + /* lock protecting all Expected TID data */ struct mutex exp_mutex; - /* per-context configuration flags */ - unsigned long flags; - /* per-context event flags for fileops/intr communication */ - unsigned long event_flags; - /* total number of polled urgent packets */ - u32 urgent; - /* saved total number of polled urgent packets for poll edge trigger */ - u32 urgent_poll; + /* when waiting for rcv or pioavail */ + wait_queue_head_t wait; + /* uuid from PSM */ + u8 uuid[16]; /* same size as task_struct .comm[], command that opened context */ char comm[TASK_COMM_LEN]; - /* so file ops can get at unit */ - struct hfi1_devdata *dd; - /* so functions that need physical port can get it easily */ - struct hfi1_pportdata *ppd; - /* associated msix interrupt */ - u32 msix_intr; + /* Bitmask of in use context(s) */ + DECLARE_BITMAP(in_use_ctxts, HFI1_MAX_SHARED_CTXTS); + /* per-context event flags for fileops/intr communication */ + unsigned long event_flags; /* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */ void *subctxt_uregbase; /* An array of pages for the eager receive buffers * N */ void *subctxt_rcvegrbuf; /* An array of pages for the eager header queue entries * N */ void *subctxt_rcvhdr_base; - /* Bitmask of in use context(s) */ - DECLARE_BITMAP(in_use_ctxts, HFI1_MAX_SHARED_CTXTS); - /* The version of the library which opened this ctxt */ - u32 userversion; + /* total number of polled urgent packets */ + u32 urgent; + /* saved total number of polled urgent packets for poll edge trigger */ + u32 urgent_poll; /* Type of packets or conditions we want to poll for */ u16 poll_type; - /* receive packet sequence counter */ - u8 seq_cnt; - /* ctxt rcvhdrq head offset */ - u32 head; - /* QPs waiting for context processing */ - struct list_head qp_wait_list; - /* interrupt handling */ - u64 imask; /* clear interrupt mask */ - int ireg; /* clear interrupt register */ - int numa_id; /* numa node of this context */ - /* verbs rx_stats per rcd */ - struct hfi1_opcode_stats_perctx *opstats; - - /* Is ASPM interrupt supported for this context */ - bool aspm_intr_supported; - /* ASPM state (enabled/disabled) for this context */ - bool aspm_enabled; - /* Timer for re-enabling ASPM if interrupt activity quietens down */ - struct timer_list aspm_timer; - /* Lock to serialize between intr, timer intr and user threads */ - spinlock_t aspm_lock; - /* Is ASPM processing enabled for this context (in intr context) */ - bool aspm_intr_enable; - /* Last interrupt timestamp */ - ktime_t aspm_ts_last_intr; - /* Last timestamp at which we scheduled a timer for this context */ - ktime_t aspm_ts_timer_sched; - + /* non-zero if ctxt is being shared. */ + u16 subctxt_id; + /* The version of the library which opened this ctxt */ + u32 userversion; /* - * The interrupt handler for a particular receive context can vary - * throughout it's lifetime. This is not a lock protected data member so - * it must be updated atomically and the prev and new value must always - * be valid. Worst case is we process an extra interrupt and up to 64 - * packets with the wrong interrupt handler. + * non-zero if ctxt can be shared, and defines the maximum number of + * sub-contexts for this device context. */ - int (*do_interrupt)(struct hfi1_ctxtdata *rcd, int threaded); - - /* Indicates that this is vnic context */ - bool is_vnic; + u8 subctxt_cnt; - /* vnic queue index this context is mapped to */ - u8 vnic_q_idx; }; +/** + * rcvhdrq_size - return total size in bytes for header queue + * @rcd: the receive context + * + * rcvhdrqentsize is in DWs, so we have to convert to bytes + * + */ +static inline u32 rcvhdrq_size(struct hfi1_ctxtdata *rcd) +{ + return PAGE_ALIGN(rcd->rcvhdrq_cnt * + rcd->rcvhdrqentsize * sizeof(u32)); +} + /* * Represents a single packet at a high level. Put commonly computed things in * here so we do not have to keep doing them over and over. The rule of thumb is @@ -897,12 +910,11 @@ struct hfi1_pportdata { u64 vl_xmit_flit_cnt[C_VL_COUNT + 1]; }; -typedef int (*rhf_rcv_function_ptr)(struct hfi1_packet *packet); - typedef void (*opcode_handler)(struct hfi1_packet *packet); typedef void (*hfi1_make_req)(struct rvt_qp *qp, struct hfi1_pkt_state *ps, struct rvt_swqe *wqe); +extern const rhf_rcv_function_ptr normal_rhf_rcv_functions[]; /* return values for the RHF receive functions */ @@ -1046,8 +1058,6 @@ struct hfi1_devdata { dma_addr_t sdma_pad_phys; /* for deallocation */ size_t sdma_heads_size; - /* number from the chip */ - u32 chip_sdma_engines; /* num used */ u32 num_sdma; /* array of engines sized by num_sdma */ @@ -1102,8 +1112,6 @@ struct hfi1_devdata { /* base receive interrupt timeout, in CSR units */ u32 rcv_intr_timeout_csr; - u32 freezelen; /* max length of freezemsg */ - u64 __iomem *egrtidbase; spinlock_t sendctrl_lock; /* protect changes to SendCtrl */ spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */ spinlock_t uctxt_lock; /* protect rcd changes */ @@ -1130,25 +1138,6 @@ struct hfi1_devdata { /* Base GUID for device (network order) */ u64 base_guid; - /* these are the "32 bit" regs */ - - /* value we put in kr_rcvhdrsize */ - u32 rcvhdrsize; - /* number of receive contexts the chip supports */ - u32 chip_rcv_contexts; - /* number of receive array entries */ - u32 chip_rcv_array_count; - /* number of PIO send contexts the chip supports */ - u32 chip_send_contexts; - /* number of bytes in the PIO memory buffer */ - u32 chip_pio_mem_size; - /* number of bytes in the SDMA memory buffer */ - u32 chip_sdma_mem_size; - - /* size of each rcvegrbuffer */ - u32 rcvegrbufsize; - /* log2 of above */ - u16 rcvegrbufsize_shift; /* both sides of the PCIe link are gen3 capable */ u8 link_gen3_capable; u8 dc_shutdown; @@ -1221,9 +1210,6 @@ struct hfi1_devdata { u32 num_msix_entries; u32 first_dyn_msix_idx; - /* INTx information */ - u32 requested_intx_irq; /* did we request one? */ - /* general interrupt: mask of handled interrupts */ u64 gi_mask[CCE_NUM_INT_CSRS]; @@ -1289,8 +1275,6 @@ struct hfi1_devdata { u64 sw_cce_err_status_aggregate; /* Software counter that aggregates all bypass packet rcv errors */ u64 sw_rcv_bypass_packet_errors; - /* receive interrupt function */ - rhf_rcv_function_ptr normal_rhf_rcv_functions[8]; /* Save the enabled LCB error bits */ u64 lcb_err_en; @@ -1329,10 +1313,7 @@ struct hfi1_devdata { /* seqlock for sc2vl */ seqlock_t sc2vl_lock ____cacheline_aligned_in_smp; u64 sc2vl[4]; - /* receive interrupt functions */ - rhf_rcv_function_ptr *rhf_rcv_function_map; u64 __percpu *rcv_limit; - u16 rhf_offset; /* offset of RHF within receive header entry */ /* adding a new field here would make it part of this cacheline */ /* OUI comes from the HW. Used everywhere as 3 separate bytes. */ @@ -1471,7 +1452,7 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp, /* calculate the current RHF address */ static inline __le32 *get_rhf_addr(struct hfi1_ctxtdata *rcd) { - return (__le32 *)rcd->rcvhdrq + rcd->head + rcd->dd->rhf_offset; + return (__le32 *)rcd->rcvhdrq + rcd->head + rcd->rhf_offset; } int hfi1_reset_device(int); @@ -2021,12 +2002,6 @@ static inline void flush_wc(void) } void handle_eflags(struct hfi1_packet *packet); -int process_receive_ib(struct hfi1_packet *packet); -int process_receive_bypass(struct hfi1_packet *packet); -int process_receive_error(struct hfi1_packet *packet); -int kdeth_process_expected(struct hfi1_packet *packet); -int kdeth_process_eager(struct hfi1_packet *packet); -int process_receive_invalid(struct hfi1_packet *packet); void seqfile_dump_rcd(struct seq_file *s, struct hfi1_ctxtdata *rcd); /* global module parameter variables */ diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index f110842b91f5..758d273c32cf 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -364,9 +364,9 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, hfi1_exp_tid_group_init(rcd); rcd->ppd = ppd; rcd->dd = dd; - __set_bit(0, rcd->in_use_ctxts); rcd->numa_id = numa; rcd->rcv_array_groups = dd->rcv_entries.ngroups; + rcd->rhf_rcv_function_map = normal_rhf_rcv_functions; mutex_init(&rcd->exp_mutex); @@ -404,6 +404,8 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, rcd->rcvhdrq_cnt = rcvhdrcnt; rcd->rcvhdrqentsize = hfi1_hdrq_entsize; + rcd->rhf_offset = + rcd->rcvhdrqentsize - sizeof(u64) / sizeof(u32); /* * Simple Eager buffer allocation: we have already pre-allocated * the number of RcvArray entry groups. Each ctxtdata structure @@ -853,24 +855,6 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) struct hfi1_ctxtdata *rcd; struct hfi1_pportdata *ppd; - /* Set up recv low level handlers */ - dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_EXPECTED] = - kdeth_process_expected; - dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_EAGER] = - kdeth_process_eager; - dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_IB] = process_receive_ib; - dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_ERROR] = - process_receive_error; - dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_BYPASS] = - process_receive_bypass; - dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_INVALID5] = - process_receive_invalid; - dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_INVALID6] = - process_receive_invalid; - dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_INVALID7] = - process_receive_invalid; - dd->rhf_rcv_function_map = dd->normal_rhf_rcv_functions; - /* Set up send low level handlers */ dd->process_pio_send = hfi1_verbs_send_pio; dd->process_dma_send = hfi1_verbs_send_dma; @@ -936,7 +920,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) } /* Allocate enough memory for user event notification. */ - len = PAGE_ALIGN(dd->chip_rcv_contexts * HFI1_MAX_SHARED_CTXTS * + len = PAGE_ALIGN(chip_rcv_contexts(dd) * HFI1_MAX_SHARED_CTXTS * sizeof(*dd->events)); dd->events = vmalloc_user(len); if (!dd->events) @@ -948,9 +932,6 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) dd->status = vmalloc_user(PAGE_SIZE); if (!dd->status) dd_dev_err(dd, "Failed to allocate dev status page\n"); - else - dd->freezelen = PAGE_SIZE - (sizeof(*dd->status) - - sizeof(dd->status->freezemsg)); for (pidx = 0; pidx < dd->num_pports; ++pidx) { ppd = dd->pport + pidx; if (dd->status) @@ -1144,7 +1125,7 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) return; if (rcd->rcvhdrq) { - dma_free_coherent(&dd->pcidev->dev, rcd->rcvhdrq_size, + dma_free_coherent(&dd->pcidev->dev, rcvhdrq_size(rcd), rcd->rcvhdrq, rcd->rcvhdrq_dma); rcd->rcvhdrq = NULL; if (rcd->rcvhdrtail_kvaddr) { @@ -1855,12 +1836,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) if (!rcd->rcvhdrq) { gfp_t gfp_flags; - /* - * rcvhdrqentsize is in DWs, so we have to convert to bytes - * (* sizeof(u32)). - */ - amt = PAGE_ALIGN(rcd->rcvhdrq_cnt * rcd->rcvhdrqentsize * - sizeof(u32)); + amt = rcvhdrq_size(rcd); if (rcd->ctxt < dd->first_dyn_alloc_ctxt || rcd->is_vnic) gfp_flags = GFP_KERNEL; @@ -1885,8 +1861,6 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) if (!rcd->rcvhdrtail_kvaddr) goto bail_free; } - - rcd->rcvhdrq_size = amt; } /* * These values are per-context: @@ -1902,7 +1876,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) & RCV_HDR_ENT_SIZE_ENT_SIZE_MASK) << RCV_HDR_ENT_SIZE_ENT_SIZE_SHIFT; write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_ENT_SIZE, reg); - reg = (dd->rcvhdrsize & RCV_HDR_SIZE_HDR_SIZE_MASK) + reg = ((u64)DEFAULT_RCVHDRSIZE & RCV_HDR_SIZE_HDR_SIZE_MASK) << RCV_HDR_SIZE_HDR_SIZE_SHIFT; write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_SIZE, reg); @@ -1938,9 +1912,9 @@ bail: int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) { struct hfi1_devdata *dd = rcd->dd; - u32 max_entries, egrtop, alloced_bytes = 0, idx = 0; + u32 max_entries, egrtop, alloced_bytes = 0; gfp_t gfp_flags; - u16 order; + u16 order, idx = 0; int ret = 0; u16 round_mtu = roundup_pow_of_two(hfi1_max_mtu); diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index baf7c324f7b8..eec83757d55f 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -157,6 +157,7 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev) unsigned long len; resource_size_t addr; int ret = 0; + u32 rcv_array_count; addr = pci_resource_start(pdev, 0); len = pci_resource_len(pdev, 0); @@ -186,9 +187,9 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev) goto nomem; } - dd->chip_rcv_array_count = readq(dd->kregbase1 + RCV_ARRAY_CNT); - dd_dev_info(dd, "RcvArray count: %u\n", dd->chip_rcv_array_count); - dd->base2_start = RCV_ARRAY + dd->chip_rcv_array_count * 8; + rcv_array_count = readq(dd->kregbase1 + RCV_ARRAY_CNT); + dd_dev_info(dd, "RcvArray count: %u\n", rcv_array_count); + dd->base2_start = RCV_ARRAY + rcv_array_count * 8; dd->kregbase2 = ioremap_nocache( addr + dd->base2_start, @@ -214,13 +215,13 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev) * to write an entire cacheline worth of entries in one shot. */ dd->rcvarray_wc = ioremap_wc(addr + RCV_ARRAY, - dd->chip_rcv_array_count * 8); + rcv_array_count * 8); if (!dd->rcvarray_wc) { dd_dev_err(dd, "WC mapping of receive array failed\n"); goto nomem; } dd_dev_info(dd, "WC RcvArray: %p for %x\n", - dd->rcvarray_wc, dd->chip_rcv_array_count * 8); + dd->rcvarray_wc, rcv_array_count * 8); dd->flags |= HFI1_PRESENT; /* chip.c CSR routines now work */ return 0; @@ -346,15 +347,13 @@ int pcie_speeds(struct hfi1_devdata *dd) /* * Returns: * - actual number of interrupts allocated or - * - 0 if fell back to INTx. * - error */ int request_msix(struct hfi1_devdata *dd, u32 msireq) { int nvec; - nvec = pci_alloc_irq_vectors(dd->pcidev, 1, msireq, - PCI_IRQ_MSIX | PCI_IRQ_LEGACY); + nvec = pci_alloc_irq_vectors(dd->pcidev, msireq, msireq, PCI_IRQ_MSIX); if (nvec < 0) { dd_dev_err(dd, "pci_alloc_irq_vectors() failed: %d\n", nvec); return nvec; @@ -362,10 +361,6 @@ int request_msix(struct hfi1_devdata *dd, u32 msireq) tune_pcie_caps(dd); - /* check for legacy IRQ */ - if (nvec == 1 && !dd->pcidev->msix_enabled) - return 0; - return nvec; } diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 9cac15d10c4f..c2c1cba5b23b 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015-2017 Intel Corporation. + * Copyright(c) 2015-2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -226,7 +226,7 @@ static const char *sc_type_name(int index) int init_sc_pools_and_sizes(struct hfi1_devdata *dd) { struct mem_pool_info mem_pool_info[NUM_SC_POOLS] = { { 0 } }; - int total_blocks = (dd->chip_pio_mem_size / PIO_BLOCK_SIZE) - 1; + int total_blocks = (chip_pio_mem_size(dd) / PIO_BLOCK_SIZE) - 1; int total_contexts = 0; int fixed_blocks; int pool_blocks; @@ -343,8 +343,8 @@ int init_sc_pools_and_sizes(struct hfi1_devdata *dd) sc_type_name(i), count); return -EINVAL; } - if (total_contexts + count > dd->chip_send_contexts) - count = dd->chip_send_contexts - total_contexts; + if (total_contexts + count > chip_send_contexts(dd)) + count = chip_send_contexts(dd) - total_contexts; total_contexts += count; @@ -507,7 +507,7 @@ static int sc_hw_alloc(struct hfi1_devdata *dd, int type, u32 *sw_index, if (sci->type == type && sci->allocated == 0) { sci->allocated = 1; /* use a 1:1 mapping, but make them non-equal */ - context = dd->chip_send_contexts - index - 1; + context = chip_send_contexts(dd) - index - 1; dd->hw_to_sw[context] = index; *sw_index = index; *hw_context = context; @@ -1618,11 +1618,11 @@ static void sc_piobufavail(struct send_context *sc) /* Wake up the most starved one first */ if (n) hfi1_qp_wakeup(qps[max_idx], - RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN); + RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN); for (i = 0; i < n; i++) if (i != max_idx) hfi1_qp_wakeup(qps[i], - RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN); + RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN); } /* translate a send credit update to a bit code of reasons */ diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 1697d96151bd..9b1e84a6b1cc 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2017 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -273,7 +273,7 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_PATH_MIG_STATE && attr->path_mig_state == IB_MIG_MIGRATED && qp->s_mig_state == IB_MIG_ARMED) { - qp->s_flags |= RVT_S_AHG_CLEAR; + qp->s_flags |= HFI1_S_AHG_CLEAR; priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr); priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc); @@ -717,7 +717,7 @@ void hfi1_migrate_qp(struct rvt_qp *qp) qp->remote_ah_attr = qp->alt_ah_attr; qp->port_num = rdma_ah_get_port_num(&qp->alt_ah_attr); qp->s_pkey_index = qp->s_alt_pkey_index; - qp->s_flags |= RVT_S_AHG_CLEAR; + qp->s_flags |= HFI1_S_AHG_CLEAR; priv->s_sc = ah_to_sc(qp->ibqp.device, &qp->remote_ah_attr); priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); qp_set_16b(qp); diff --git a/drivers/infiniband/hw/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h index b2d4cba8d15b..078cff7560b6 100644 --- a/drivers/infiniband/hw/hfi1/qp.h +++ b/drivers/infiniband/hw/hfi1/qp.h @@ -1,7 +1,7 @@ #ifndef _QP_H #define _QP_H /* - * Copyright(c) 2015 - 2017 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -70,6 +70,26 @@ static inline int hfi1_send_ok(struct rvt_qp *qp) } /* + * Driver specific s_flags starting at bit 31 down to HFI1_S_MIN_BIT_MASK + * + * HFI1_S_AHG_VALID - ahg header valid on chip + * HFI1_S_AHG_CLEAR - have send engine clear ahg state + * HFI1_S_WAIT_PIO_DRAIN - qp waiting for PIOs to drain + * HFI1_S_MIN_BIT_MASK - the lowest bit that can be used by hfi1 + */ +#define HFI1_S_AHG_VALID 0x80000000 +#define HFI1_S_AHG_CLEAR 0x40000000 +#define HFI1_S_WAIT_PIO_DRAIN 0x20000000 +#define HFI1_S_MIN_BIT_MASK 0x01000000 + +/* + * overload wait defines + */ + +#define HFI1_S_ANY_WAIT_IO (RVT_S_ANY_WAIT_IO | HFI1_S_WAIT_PIO_DRAIN) +#define HFI1_S_ANY_WAIT (HFI1_S_ANY_WAIT_IO | RVT_S_ANY_WAIT_SEND) + +/* * free_ahg - clear ahg from QP */ static inline void clear_ahg(struct rvt_qp *qp) @@ -77,7 +97,7 @@ static inline void clear_ahg(struct rvt_qp *qp) struct hfi1_qp_priv *priv = qp->priv; priv->s_ahg->ahgcount = 0; - qp->s_flags &= ~(RVT_S_AHG_VALID | RVT_S_AHG_CLEAR); + qp->s_flags &= ~(HFI1_S_AHG_VALID | HFI1_S_AHG_CLEAR); if (priv->s_sde && qp->s_ahgidx >= 0) sdma_ahg_free(priv->s_sde, qp->s_ahgidx); qp->s_ahgidx = -1; diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index f15c93102081..9bd63abb2dfe 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -241,7 +241,7 @@ bail: smp_wmb(); qp->s_flags &= ~(RVT_S_RESP_PENDING | RVT_S_ACK_PENDING - | RVT_S_AHG_VALID); + | HFI1_S_AHG_VALID); return 0; } @@ -1024,7 +1024,7 @@ done: if ((cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) && (cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)) qp->s_flags |= RVT_S_WAIT_PSN; - qp->s_flags &= ~RVT_S_AHG_VALID; + qp->s_flags &= ~HFI1_S_AHG_VALID; } /* diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index ef4c566e206f..5f56f3c1b4c4 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2017 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -194,7 +194,7 @@ static void ruc_loopback(struct rvt_qp *sqp) spin_lock_irqsave(&sqp->s_lock, flags); /* Return if we are already busy processing a work request. */ - if ((sqp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT)) || + if ((sqp->s_flags & (RVT_S_BUSY | HFI1_S_ANY_WAIT)) || !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND)) goto unlock; @@ -533,9 +533,9 @@ static inline void build_ahg(struct rvt_qp *qp, u32 npsn) { struct hfi1_qp_priv *priv = qp->priv; - if (unlikely(qp->s_flags & RVT_S_AHG_CLEAR)) + if (unlikely(qp->s_flags & HFI1_S_AHG_CLEAR)) clear_ahg(qp); - if (!(qp->s_flags & RVT_S_AHG_VALID)) { + if (!(qp->s_flags & HFI1_S_AHG_VALID)) { /* first middle that needs copy */ if (qp->s_ahgidx < 0) qp->s_ahgidx = sdma_ahg_alloc(priv->s_sde); @@ -544,7 +544,7 @@ static inline void build_ahg(struct rvt_qp *qp, u32 npsn) priv->s_ahg->tx_flags |= SDMA_TXREQ_F_AHG_COPY; /* save to protect a change in another thread */ priv->s_ahg->ahgidx = qp->s_ahgidx; - qp->s_flags |= RVT_S_AHG_VALID; + qp->s_flags |= HFI1_S_AHG_VALID; } } else { /* subsequent middle after valid */ @@ -650,7 +650,7 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp, if (middle) build_ahg(qp, bth2); else - qp->s_flags &= ~RVT_S_AHG_VALID; + qp->s_flags &= ~HFI1_S_AHG_VALID; bth0 |= pkey; bth0 |= extra_bytes << 20; @@ -727,7 +727,7 @@ static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp, if (middle) build_ahg(qp, bth2); else - qp->s_flags &= ~RVT_S_AHG_VALID; + qp->s_flags &= ~HFI1_S_AHG_VALID; bth0 |= pkey; bth0 |= extra_bytes << 20; diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 7fb350b87b49..88e326d6cc49 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -1351,7 +1351,7 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) struct hfi1_pportdata *ppd = dd->pport + port; u32 per_sdma_credits; uint idle_cnt = sdma_idle_cnt; - size_t num_engines = dd->chip_sdma_engines; + size_t num_engines = chip_sdma_engines(dd); int ret = -ENOMEM; if (!HFI1_CAP_IS_KSET(SDMA)) { @@ -1360,18 +1360,18 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) } if (mod_num_sdma && /* can't exceed chip support */ - mod_num_sdma <= dd->chip_sdma_engines && + mod_num_sdma <= chip_sdma_engines(dd) && /* count must be >= vls */ mod_num_sdma >= num_vls) num_engines = mod_num_sdma; dd_dev_info(dd, "SDMA mod_num_sdma: %u\n", mod_num_sdma); - dd_dev_info(dd, "SDMA chip_sdma_engines: %u\n", dd->chip_sdma_engines); + dd_dev_info(dd, "SDMA chip_sdma_engines: %u\n", chip_sdma_engines(dd)); dd_dev_info(dd, "SDMA chip_sdma_mem_size: %u\n", - dd->chip_sdma_mem_size); + chip_sdma_mem_size(dd)); per_sdma_credits = - dd->chip_sdma_mem_size / (num_engines * SDMA_BLOCK_SIZE); + chip_sdma_mem_size(dd) / (num_engines * SDMA_BLOCK_SIZE); /* set up freeze waitqueue */ init_waitqueue_head(&dd->sdma_unfreeze_wq); diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 08991874c0e2..13374c727b14 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1007,7 +1007,7 @@ static int pio_wait(struct rvt_qp *qp, int was_empty; dev->n_piowait += !!(flag & RVT_S_WAIT_PIO); - dev->n_piodrain += !!(flag & RVT_S_WAIT_PIO_DRAIN); + dev->n_piodrain += !!(flag & HFI1_S_WAIT_PIO_DRAIN); qp->s_flags |= flag; was_empty = list_empty(&sc->piowait); iowait_queue(ps->pkts_sent, &priv->s_iowait, @@ -1376,7 +1376,7 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) return pio_wait(qp, ps->s_txreq->psc, ps, - RVT_S_WAIT_PIO_DRAIN); + HFI1_S_WAIT_PIO_DRAIN); return sr(qp, ps, 0); } @@ -1410,7 +1410,8 @@ static void hfi1_fill_device_attr(struct hfi1_devdata *dd) rdi->dparms.props.max_fast_reg_page_list_len = UINT_MAX; rdi->dparms.props.max_qp = hfi1_max_qps; rdi->dparms.props.max_qp_wr = hfi1_max_qp_wrs; - rdi->dparms.props.max_sge = hfi1_max_sges; + rdi->dparms.props.max_send_sge = hfi1_max_sges; + rdi->dparms.props.max_recv_sge = hfi1_max_sges; rdi->dparms.props.max_sge_rd = hfi1_max_sges; rdi->dparms.props.max_cq = hfi1_max_cqs; rdi->dparms.props.max_ah = hfi1_max_ahs; @@ -1497,15 +1498,6 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num, props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu : mtu_to_enum(ppd->ibmtu, IB_MTU_4096); - /* - * sm_lid of 0xFFFF needs special handling so that it can - * be differentiated from a permissve LID of 0xFFFF. - * We set the grh_required flag here so the SA can program - * the DGID in the address handle appropriately - */ - if (props->sm_lid == be16_to_cpu(IB_LID_PERMISSIVE)) - props->grh_required = true; - return 0; } @@ -1892,7 +1884,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) ibdev->process_mad = hfi1_process_mad; ibdev->get_dev_fw_str = hfi1_get_dev_fw_str; - strncpy(ibdev->node_desc, init_utsname()->nodename, + strlcpy(ibdev->node_desc, init_utsname()->nodename, sizeof(ibdev->node_desc)); /* diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index 616fc9b6fad8..c643d80c5a53 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2017 Intel Corporation. + * Copyright(c) 2017 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -120,8 +120,7 @@ static int allocate_vnic_ctxt(struct hfi1_devdata *dd, uctxt->seq_cnt = 1; uctxt->is_vnic = true; - if (dd->num_msix_entries) - hfi1_set_vnic_msix_info(uctxt); + hfi1_set_vnic_msix_info(uctxt); hfi1_stats.sps_ctxts++; dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt); @@ -136,8 +135,7 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd, dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt); flush_wc(); - if (dd->num_msix_entries) - hfi1_reset_vnic_msix_info(uctxt); + hfi1_reset_vnic_msix_info(uctxt); /* * Disable receive context and interrupt available, reset all @@ -818,14 +816,14 @@ struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device, size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo); netdev = alloc_netdev_mqs(size, name, name_assign_type, setup, - dd->chip_sdma_engines, dd->num_vnic_contexts); + chip_sdma_engines(dd), dd->num_vnic_contexts); if (!netdev) return ERR_PTR(-ENOMEM); rn = netdev_priv(netdev); vinfo = opa_vnic_dev_priv(netdev); vinfo->dd = dd; - vinfo->num_tx_q = dd->chip_sdma_engines; + vinfo->num_tx_q = chip_sdma_engines(dd); vinfo->num_rx_q = dd->num_vnic_contexts; vinfo->netdev = netdev; rn->free_rdma_netdev = hfi1_vnic_free_rn; diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index d74928621559..0d96c5bb38cd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -44,13 +44,11 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibpd->device); + const struct ib_gid_attr *gid_attr; struct device *dev = hr_dev->dev; - struct ib_gid_attr gid_attr; struct hns_roce_ah *ah; u16 vlan_tag = 0xffff; const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); - union ib_gid sgid; - int ret; ah = kzalloc(sizeof(*ah), GFP_ATOMIC); if (!ah) @@ -59,18 +57,9 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, /* Get mac address */ memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN); - /* Get source gid */ - ret = ib_get_cached_gid(ibpd->device, rdma_ah_get_port_num(ah_attr), - grh->sgid_index, &sgid, &gid_attr); - if (ret) { - dev_err(dev, "get sgid failed! ret = %d\n", ret); - kfree(ah); - return ERR_PTR(ret); - } - - if (is_vlan_dev(gid_attr.ndev)) - vlan_tag = vlan_dev_vlan_id(gid_attr.ndev); - dev_put(gid_attr.ndev); + gid_attr = ah_attr->grh.sgid_attr; + if (is_vlan_dev(gid_attr->ndev)) + vlan_tag = vlan_dev_vlan_id(gid_attr->ndev); if (vlan_tag < 0x1000) vlan_tag |= (rdma_ah_get_sl(ah_attr) & @@ -108,7 +97,7 @@ int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) rdma_ah_set_static_rate(ah_attr, ah->av.stat_rate); rdma_ah_set_grh(ah_attr, NULL, (le32_to_cpu(ah->av.sl_tclass_flowlabel) & - HNS_ROCE_FLOW_LABLE_MASK), ah->av.gid_index, + HNS_ROCE_FLOW_LABEL_MASK), ah->av.gid_index, ah->av.hop_limit, (le32_to_cpu(ah->av.sl_tclass_flowlabel) >> HNS_ROCE_TCLASS_SHIFT)); diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h index 319cb74aebaf..93d4b4ec002d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_common.h +++ b/drivers/infiniband/hw/hns/hns_roce_common.h @@ -382,15 +382,6 @@ #define ROCEE_VF_EQ_DB_CFG0_REG 0x238 #define ROCEE_VF_EQ_DB_CFG1_REG 0x23C -#define ROCEE_VF_SMAC_CFG0_REG 0x12000 -#define ROCEE_VF_SMAC_CFG1_REG 0x12004 - -#define ROCEE_VF_SGID_CFG0_REG 0x10000 -#define ROCEE_VF_SGID_CFG1_REG 0x10004 -#define ROCEE_VF_SGID_CFG2_REG 0x10008 -#define ROCEE_VF_SGID_CFG3_REG 0x1000c -#define ROCEE_VF_SGID_CFG4_REG 0x10010 - #define ROCEE_VF_ABN_INT_CFG_REG 0x13000 #define ROCEE_VF_ABN_INT_ST_REG 0x13004 #define ROCEE_VF_ABN_INT_EN_REG 0x13008 diff --git a/drivers/infiniband/hw/hns/hns_roce_db.c b/drivers/infiniband/hw/hns/hns_roce_db.c index ebee2782a573..e2f93c1ce86a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_db.c +++ b/drivers/infiniband/hw/hns/hns_roce_db.c @@ -41,6 +41,8 @@ int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt, found: db->dma = sg_dma_address(page->umem->sg_head.sgl) + (virt & ~PAGE_MASK); + page->umem->sg_head.sgl->offset = virt & ~PAGE_MASK; + db->virt_addr = sg_virt(page->umem->sg_head.sgl); db->u.user_page = page; refcount_inc(&page->refcount); diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 31221d506d9a..9a24fd0ee3e7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -76,7 +76,7 @@ /* 4G/4K = 1M */ #define HNS_ROCE_SL_SHIFT 28 #define HNS_ROCE_TCLASS_SHIFT 20 -#define HNS_ROCE_FLOW_LABLE_MASK 0xfffff +#define HNS_ROCE_FLOW_LABEL_MASK 0xfffff #define HNS_ROCE_MAX_PORTS 6 #define HNS_ROCE_MAX_GID_NUM 16 @@ -110,6 +110,7 @@ enum { HNS_ROCE_SUPPORT_RQ_RECORD_DB = 1 << 0, + HNS_ROCE_SUPPORT_SQ_RECORD_DB = 1 << 1, }; enum { @@ -190,7 +191,8 @@ enum { HNS_ROCE_CAP_FLAG_REREG_MR = BIT(0), HNS_ROCE_CAP_FLAG_ROCE_V1_V2 = BIT(1), HNS_ROCE_CAP_FLAG_RQ_INLINE = BIT(2), - HNS_ROCE_CAP_FLAG_RECORD_DB = BIT(3) + HNS_ROCE_CAP_FLAG_RECORD_DB = BIT(3), + HNS_ROCE_CAP_FLAG_SQ_RECORD_DB = BIT(4), }; enum hns_roce_mtt_type { @@ -385,6 +387,7 @@ struct hns_roce_db { struct hns_roce_user_db_page *user_page; } u; dma_addr_t dma; + void *virt_addr; int index; int order; }; @@ -524,7 +527,9 @@ struct hns_roce_qp { struct hns_roce_buf hr_buf; struct hns_roce_wq rq; struct hns_roce_db rdb; + struct hns_roce_db sdb; u8 rdb_en; + u8 sdb_en; u32 doorbell_qpn; __le32 sq_signal_bits; u32 sq_next_wqe; @@ -579,22 +584,22 @@ struct hns_roce_ceqe { }; struct hns_roce_aeqe { - u32 asyn; + __le32 asyn; union { struct { - u32 qp; + __le32 qp; u32 rsv0; u32 rsv1; } qp_event; struct { - u32 cq; + __le32 cq; u32 rsv0; u32 rsv1; } cq_event; struct { - u32 ceqe; + __le32 ceqe; u32 rsv0; u32 rsv1; } ce_event; @@ -641,6 +646,8 @@ struct hns_roce_eq { int shift; dma_addr_t cur_eqe_ba; dma_addr_t nxt_eqe_ba; + int event_type; + int sub_type; }; struct hns_roce_eq_table { @@ -720,10 +727,21 @@ struct hns_roce_caps { u32 eqe_ba_pg_sz; u32 eqe_buf_pg_sz; u32 eqe_hop_num; + u32 sl_num; + u32 tsq_buf_pg_sz; + u32 tpq_buf_pg_sz; u32 chunk_sz; /* chunk size in non multihop mode*/ u64 flags; }; +struct hns_roce_work { + struct hns_roce_dev *hr_dev; + struct work_struct work; + u32 qpn; + int event_type; + int sub_type; +}; + struct hns_roce_hw { int (*reset)(struct hns_roce_dev *hr_dev, bool enable); int (*cmq_init)(struct hns_roce_dev *hr_dev); @@ -736,7 +754,7 @@ struct hns_roce_hw { u16 token, int event); int (*chk_mbox)(struct hns_roce_dev *hr_dev, unsigned long timeout); int (*set_gid)(struct hns_roce_dev *hr_dev, u8 port, int gid_index, - union ib_gid *gid, const struct ib_gid_attr *attr); + const union ib_gid *gid, const struct ib_gid_attr *attr); int (*set_mac)(struct hns_roce_dev *hr_dev, u8 phy_port, u8 *addr); void (*set_mtu)(struct hns_roce_dev *hr_dev, u8 phy_port, enum ib_mtu mtu); @@ -760,10 +778,10 @@ struct hns_roce_hw { int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state); int (*destroy_qp)(struct ib_qp *ibqp); - int (*post_send)(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr); - int (*post_recv)(struct ib_qp *qp, struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr); + int (*post_send)(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr); + int (*post_recv)(struct ib_qp *qp, const struct ib_recv_wr *recv_wr, + const struct ib_recv_wr **bad_recv_wr); int (*req_notify_cq)(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); int (*poll_cq)(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int (*dereg_mr)(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr); @@ -816,6 +834,7 @@ struct hns_roce_dev { u32 tptr_size; /*only for hw v1*/ const struct hns_roce_hw *hw; void *priv; + struct workqueue_struct *irq_workq; }; static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev) @@ -864,7 +883,7 @@ static inline struct hns_roce_sqp *hr_to_hr_sqp(struct hns_roce_qp *hr_qp) return container_of(hr_qp, struct hns_roce_sqp, hr_qp); } -static inline void hns_roce_write64_k(__be32 val[2], void __iomem *dest) +static inline void hns_roce_write64_k(__le32 val[2], void __iomem *dest) { __raw_writeq(*(u64 *) val, dest); } @@ -982,7 +1001,7 @@ void hns_roce_qp_remove(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp); void hns_roce_qp_free(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp); void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn, int cnt); -__be32 send_ieth(struct ib_send_wr *wr); +__be32 send_ieth(const struct ib_send_wr *wr); int to_hr_qp_type(int qp_type); struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 63b5b3edabcb..f6faefed96e8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -170,7 +170,7 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev, case 3: mhop->l2_idx = table_idx & (chunk_ba_num - 1); mhop->l1_idx = table_idx / chunk_ba_num & (chunk_ba_num - 1); - mhop->l0_idx = table_idx / chunk_ba_num / chunk_ba_num; + mhop->l0_idx = (table_idx / chunk_ba_num) / chunk_ba_num; break; case 2: mhop->l1_idx = table_idx & (chunk_ba_num - 1); @@ -342,7 +342,7 @@ static int hns_roce_set_hem(struct hns_roce_dev *hr_dev, } else { break; } - msleep(HW_SYNC_SLEEP_TIME_INTERVAL); + mdelay(HW_SYNC_SLEEP_TIME_INTERVAL); } bt_cmd_l = (u32)bt_ba; @@ -494,6 +494,9 @@ static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev, step_idx = 1; } else if (hop_num == HNS_ROCE_HOP_NUM_0) { step_idx = 0; + } else { + ret = -EINVAL; + goto err_dma_alloc_l1; } /* set HEM base address to hardware */ diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 8444234ed092..081aa91fc162 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -58,8 +58,9 @@ static void set_raddr_seg(struct hns_roce_wqe_raddr_seg *rseg, u64 remote_addr, rseg->len = 0; } -static int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +static int hns_roce_v1_post_send(struct ib_qp *ibqp, + const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_ah *ah = to_hr_ah(ud_wr(wr)->ah); @@ -173,12 +174,14 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, roce_set_field(ud_sq_wqe->u32_36, UD_SEND_WQE_U32_36_FLOW_LABEL_M, - UD_SEND_WQE_U32_36_FLOW_LABEL_S, 0); + UD_SEND_WQE_U32_36_FLOW_LABEL_S, + ah->av.sl_tclass_flowlabel & + HNS_ROCE_FLOW_LABEL_MASK); roce_set_field(ud_sq_wqe->u32_36, - UD_SEND_WQE_U32_36_PRIORITY_M, - UD_SEND_WQE_U32_36_PRIORITY_S, - ah->av.sl_tclass_flowlabel >> - HNS_ROCE_SL_SHIFT); + UD_SEND_WQE_U32_36_PRIORITY_M, + UD_SEND_WQE_U32_36_PRIORITY_S, + le32_to_cpu(ah->av.sl_tclass_flowlabel) >> + HNS_ROCE_SL_SHIFT); roce_set_field(ud_sq_wqe->u32_36, UD_SEND_WQE_U32_36_SGID_INDEX_M, UD_SEND_WQE_U32_36_SGID_INDEX_S, @@ -191,7 +194,9 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ah->av.hop_limit); roce_set_field(ud_sq_wqe->u32_40, UD_SEND_WQE_U32_40_TRAFFIC_CLASS_M, - UD_SEND_WQE_U32_40_TRAFFIC_CLASS_S, 0); + UD_SEND_WQE_U32_40_TRAFFIC_CLASS_S, + ah->av.sl_tclass_flowlabel >> + HNS_ROCE_TCLASS_SHIFT); memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0], GID_LEN); @@ -333,7 +338,7 @@ out: doorbell[0] = le32_to_cpu(sq_db.u32_4); doorbell[1] = le32_to_cpu(sq_db.u32_8); - hns_roce_write64_k(doorbell, qp->sq.db_reg_l); + hns_roce_write64_k((__le32 *)doorbell, qp->sq.db_reg_l); qp->sq_next_wqe = ind; } @@ -342,14 +347,15 @@ out: return ret; } -static int hns_roce_v1_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +static int hns_roce_v1_post_recv(struct ib_qp *ibqp, + const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { int ret = 0; int nreq = 0; int ind = 0; int i = 0; - u32 reg_val = 0; + u32 reg_val; unsigned long flags = 0; struct hns_roce_rq_wqe_ctrl *ctrl = NULL; struct hns_roce_wqe_data_seg *scat = NULL; @@ -402,14 +408,18 @@ out: wmb(); if (ibqp->qp_type == IB_QPT_GSI) { + __le32 tmp; + /* SW update GSI rq header */ reg_val = roce_read(to_hr_dev(ibqp->device), ROCEE_QP1C_CFG3_0_REG + QP1C_CFGN_OFFSET * hr_qp->phy_port); - roce_set_field(reg_val, + tmp = cpu_to_le32(reg_val); + roce_set_field(tmp, ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_M, ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_S, hr_qp->rq.head); + reg_val = le32_to_cpu(tmp); roce_write(to_hr_dev(ibqp->device), ROCEE_QP1C_CFG3_0_REG + QP1C_CFGN_OFFSET * hr_qp->phy_port, reg_val); @@ -430,7 +440,8 @@ out: doorbell[0] = le32_to_cpu(rq_db.u32_4); doorbell[1] = le32_to_cpu(rq_db.u32_8); - hns_roce_write64_k(doorbell, hr_qp->rq.db_reg_l); + hns_roce_write64_k((__le32 *)doorbell, + hr_qp->rq.db_reg_l); } } spin_unlock_irqrestore(&hr_qp->rq.lock, flags); @@ -441,51 +452,63 @@ out: static void hns_roce_set_db_event_mode(struct hns_roce_dev *hr_dev, int sdb_mode, int odb_mode) { + __le32 tmp; u32 val; val = roce_read(hr_dev, ROCEE_GLB_CFG_REG); - roce_set_bit(val, ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S, sdb_mode); - roce_set_bit(val, ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S, odb_mode); + tmp = cpu_to_le32(val); + roce_set_bit(tmp, ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S, sdb_mode); + roce_set_bit(tmp, ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S, odb_mode); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_GLB_CFG_REG, val); } static void hns_roce_set_db_ext_mode(struct hns_roce_dev *hr_dev, u32 sdb_mode, u32 odb_mode) { + __le32 tmp; u32 val; /* Configure SDB/ODB extend mode */ val = roce_read(hr_dev, ROCEE_GLB_CFG_REG); - roce_set_bit(val, ROCEE_GLB_CFG_SQ_EXT_DB_MODE_S, sdb_mode); - roce_set_bit(val, ROCEE_GLB_CFG_OTH_EXT_DB_MODE_S, odb_mode); + tmp = cpu_to_le32(val); + roce_set_bit(tmp, ROCEE_GLB_CFG_SQ_EXT_DB_MODE_S, sdb_mode); + roce_set_bit(tmp, ROCEE_GLB_CFG_OTH_EXT_DB_MODE_S, odb_mode); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_GLB_CFG_REG, val); } static void hns_roce_set_sdb(struct hns_roce_dev *hr_dev, u32 sdb_alept, u32 sdb_alful) { + __le32 tmp; u32 val; /* Configure SDB */ val = roce_read(hr_dev, ROCEE_DB_SQ_WL_REG); - roce_set_field(val, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_M, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_S, sdb_alful); - roce_set_field(val, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_M, + roce_set_field(tmp, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_M, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_S, sdb_alept); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_DB_SQ_WL_REG, val); } static void hns_roce_set_odb(struct hns_roce_dev *hr_dev, u32 odb_alept, u32 odb_alful) { + __le32 tmp; u32 val; /* Configure ODB */ val = roce_read(hr_dev, ROCEE_DB_OTHERS_WL_REG); - roce_set_field(val, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_M, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_S, odb_alful); - roce_set_field(val, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_M, + roce_set_field(tmp, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_M, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_S, odb_alept); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_DB_OTHERS_WL_REG, val); } @@ -496,6 +519,7 @@ static void hns_roce_set_sdb_ext(struct hns_roce_dev *hr_dev, u32 ext_sdb_alept, struct hns_roce_v1_priv *priv; struct hns_roce_db_table *db; dma_addr_t sdb_dma_addr; + __le32 tmp; u32 val; priv = (struct hns_roce_v1_priv *)hr_dev->priv; @@ -511,7 +535,8 @@ static void hns_roce_set_sdb_ext(struct hns_roce_dev *hr_dev, u32 ext_sdb_alept, /* Configure extend SDB depth */ val = roce_read(hr_dev, ROCEE_EXT_DB_SQ_H_REG); - roce_set_field(val, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_M, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_S, db->ext_db->esdb_dep); /* @@ -519,8 +544,9 @@ static void hns_roce_set_sdb_ext(struct hns_roce_dev *hr_dev, u32 ext_sdb_alept, * using 4K page, and shift more 32 because of * caculating the high 32 bit value evaluated to hardware. */ - roce_set_field(val, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_M, + roce_set_field(tmp, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_M, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_S, sdb_dma_addr >> 44); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_EXT_DB_SQ_H_REG, val); dev_dbg(dev, "ext SDB depth: 0x%x\n", db->ext_db->esdb_dep); @@ -535,6 +561,7 @@ static void hns_roce_set_odb_ext(struct hns_roce_dev *hr_dev, u32 ext_odb_alept, struct hns_roce_v1_priv *priv; struct hns_roce_db_table *db; dma_addr_t odb_dma_addr; + __le32 tmp; u32 val; priv = (struct hns_roce_v1_priv *)hr_dev->priv; @@ -550,12 +577,14 @@ static void hns_roce_set_odb_ext(struct hns_roce_dev *hr_dev, u32 ext_odb_alept, /* Configure extend ODB depth */ val = roce_read(hr_dev, ROCEE_EXT_DB_OTH_H_REG); - roce_set_field(val, ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_M, ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_S, db->ext_db->eodb_dep); - roce_set_field(val, ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_M, + roce_set_field(tmp, ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_M, ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_S, db->ext_db->eodb_dep); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_EXT_DB_OTH_H_REG, val); dev_dbg(dev, "ext ODB depth: 0x%x\n", db->ext_db->eodb_dep); @@ -762,6 +791,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) free_mr->mr_free_qp[i] = hns_roce_v1_create_lp_qp(hr_dev, pd); if (!free_mr->mr_free_qp[i]) { dev_err(dev, "Create loop qp failed!\n"); + ret = -ENOMEM; goto create_lp_qp_failed; } hr_qp = free_mr->mr_free_qp[i]; @@ -831,7 +861,7 @@ alloc_pd_failed: if (hns_roce_ib_destroy_cq(cq)) dev_err(dev, "Destroy cq for create_lp_qp failed!\n"); - return -EINVAL; + return ret; } static void hns_roce_v1_release_lp_qp(struct hns_roce_dev *hr_dev) @@ -969,7 +999,8 @@ static int hns_roce_v1_send_lp_wqe(struct hns_roce_qp *hr_qp) { struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device); struct device *dev = &hr_dev->pdev->dev; - struct ib_send_wr send_wr, *bad_wr; + struct ib_send_wr send_wr; + const struct ib_send_wr *bad_wr; int ret; memset(&send_wr, 0, sizeof(send_wr)); @@ -1161,9 +1192,10 @@ static void hns_roce_db_free(struct hns_roce_dev *hr_dev) static int hns_roce_raq_init(struct hns_roce_dev *hr_dev) { int ret; + u32 val; + __le32 tmp; int raq_shift = 0; dma_addr_t addr; - u32 val; struct hns_roce_v1_priv *priv; struct hns_roce_raq_table *raq; struct device *dev = &hr_dev->pdev->dev; @@ -1189,46 +1221,54 @@ static int hns_roce_raq_init(struct hns_roce_dev *hr_dev) /* Configure raq_shift */ raq_shift = ilog2(HNS_ROCE_V1_RAQ_SIZE / HNS_ROCE_V1_RAQ_ENTRY); val = roce_read(hr_dev, ROCEE_EXT_RAQ_H_REG); - roce_set_field(val, ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_M, ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_S, raq_shift); /* * 44 = 32 + 12, When evaluating addr to hardware, shift 12 because of * using 4K page, and shift more 32 because of * caculating the high 32 bit value evaluated to hardware. */ - roce_set_field(val, ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_M, + roce_set_field(tmp, ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_M, ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_S, raq->e_raq_buf->map >> 44); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_EXT_RAQ_H_REG, val); dev_dbg(dev, "Configure raq_shift 0x%x.\n", val); /* Configure raq threshold */ val = roce_read(hr_dev, ROCEE_RAQ_WL_REG); - roce_set_field(val, ROCEE_RAQ_WL_ROCEE_RAQ_WL_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_RAQ_WL_ROCEE_RAQ_WL_M, ROCEE_RAQ_WL_ROCEE_RAQ_WL_S, HNS_ROCE_V1_EXT_RAQ_WF); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_RAQ_WL_REG, val); dev_dbg(dev, "Configure raq_wl 0x%x.\n", val); /* Enable extend raq */ val = roce_read(hr_dev, ROCEE_WRMS_POL_TIME_INTERVAL_REG); - roce_set_field(val, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_M, ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_S, POL_TIME_INTERVAL_VAL); - roce_set_bit(val, ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_EXT_RAQ_MODE, 1); - roce_set_field(val, + roce_set_bit(tmp, ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_EXT_RAQ_MODE, 1); + roce_set_field(tmp, ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_M, ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_S, 2); - roce_set_bit(val, + roce_set_bit(tmp, ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_EN_S, 1); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_WRMS_POL_TIME_INTERVAL_REG, val); dev_dbg(dev, "Configure WrmsPolTimeInterval 0x%x.\n", val); /* Enable raq drop */ val = roce_read(hr_dev, ROCEE_GLB_CFG_REG); - roce_set_bit(val, ROCEE_GLB_CFG_TRP_RAQ_DROP_EN_S, 1); + tmp = cpu_to_le32(val); + roce_set_bit(tmp, ROCEE_GLB_CFG_TRP_RAQ_DROP_EN_S, 1); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_GLB_CFG_REG, val); dev_dbg(dev, "Configure GlbCfg = 0x%x.\n", val); @@ -1255,20 +1295,25 @@ static void hns_roce_raq_free(struct hns_roce_dev *hr_dev) static void hns_roce_port_enable(struct hns_roce_dev *hr_dev, int enable_flag) { + __le32 tmp; u32 val; if (enable_flag) { val = roce_read(hr_dev, ROCEE_GLB_CFG_REG); /* Open all ports */ - roce_set_field(val, ROCEE_GLB_CFG_ROCEE_PORT_ST_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_GLB_CFG_ROCEE_PORT_ST_M, ROCEE_GLB_CFG_ROCEE_PORT_ST_S, ALL_PORT_VAL_OPEN); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_GLB_CFG_REG, val); } else { val = roce_read(hr_dev, ROCEE_GLB_CFG_REG); /* Close all ports */ - roce_set_field(val, ROCEE_GLB_CFG_ROCEE_PORT_ST_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_GLB_CFG_ROCEE_PORT_ST_M, ROCEE_GLB_CFG_ROCEE_PORT_ST_S, 0x0); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_GLB_CFG_REG, val); } } @@ -1498,13 +1543,11 @@ static int hns_roce_v1_profile(struct hns_roce_dev *hr_dev) int i = 0; struct hns_roce_caps *caps = &hr_dev->caps; - hr_dev->vendor_id = le32_to_cpu(roce_read(hr_dev, ROCEE_VENDOR_ID_REG)); - hr_dev->vendor_part_id = le32_to_cpu(roce_read(hr_dev, - ROCEE_VENDOR_PART_ID_REG)); - hr_dev->sys_image_guid = le32_to_cpu(roce_read(hr_dev, - ROCEE_SYS_IMAGE_GUID_L_REG)) | - ((u64)le32_to_cpu(roce_read(hr_dev, - ROCEE_SYS_IMAGE_GUID_H_REG)) << 32); + hr_dev->vendor_id = roce_read(hr_dev, ROCEE_VENDOR_ID_REG); + hr_dev->vendor_part_id = roce_read(hr_dev, ROCEE_VENDOR_PART_ID_REG); + hr_dev->sys_image_guid = roce_read(hr_dev, ROCEE_SYS_IMAGE_GUID_L_REG) | + ((u64)roce_read(hr_dev, + ROCEE_SYS_IMAGE_GUID_H_REG) << 32); hr_dev->hw_rev = HNS_ROCE_HW_VER1; caps->num_qps = HNS_ROCE_V1_MAX_QP_NUM; @@ -1557,8 +1600,7 @@ static int hns_roce_v1_profile(struct hns_roce_dev *hr_dev) caps->ceqe_depth = HNS_ROCE_V1_COMP_EQE_NUM; caps->aeqe_depth = HNS_ROCE_V1_ASYNC_EQE_NUM; - caps->local_ca_ack_delay = le32_to_cpu(roce_read(hr_dev, - ROCEE_ACK_DELAY_REG)); + caps->local_ca_ack_delay = roce_read(hr_dev, ROCEE_ACK_DELAY_REG); caps->max_mtu = IB_MTU_2048; return 0; @@ -1568,21 +1610,25 @@ static int hns_roce_v1_init(struct hns_roce_dev *hr_dev) { int ret; u32 val; + __le32 tmp; struct device *dev = &hr_dev->pdev->dev; /* DMAE user config */ val = roce_read(hr_dev, ROCEE_DMAE_USER_CFG1_REG); - roce_set_field(val, ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_M, ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_S, 0xf); - roce_set_field(val, ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_M, + roce_set_field(tmp, ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_M, ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_S, 1 << PAGES_SHIFT_16); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_DMAE_USER_CFG1_REG, val); val = roce_read(hr_dev, ROCEE_DMAE_USER_CFG2_REG); - roce_set_field(val, ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_M, ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_S, 0xf); - roce_set_field(val, ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_M, + roce_set_field(tmp, ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_M, ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_S, 1 << PAGES_SHIFT_16); @@ -1668,6 +1714,7 @@ static int hns_roce_v1_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u32 __iomem *hcr = (u32 __iomem *)(hr_dev->reg_base + ROCEE_MB1_REG); unsigned long end; u32 val = 0; + __le32 tmp; end = msecs_to_jiffies(GO_BIT_TIMEOUT_MSECS) + jiffies; while (hns_roce_v1_cmd_pending(hr_dev)) { @@ -1679,15 +1726,17 @@ static int hns_roce_v1_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param, cond_resched(); } - roce_set_field(val, ROCEE_MB6_ROCEE_MB_CMD_M, ROCEE_MB6_ROCEE_MB_CMD_S, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_MB6_ROCEE_MB_CMD_M, ROCEE_MB6_ROCEE_MB_CMD_S, op); - roce_set_field(val, ROCEE_MB6_ROCEE_MB_CMD_MDF_M, + roce_set_field(tmp, ROCEE_MB6_ROCEE_MB_CMD_MDF_M, ROCEE_MB6_ROCEE_MB_CMD_MDF_S, op_modifier); - roce_set_bit(val, ROCEE_MB6_ROCEE_MB_EVENT_S, event); - roce_set_bit(val, ROCEE_MB6_ROCEE_MB_HW_RUN_S, 1); - roce_set_field(val, ROCEE_MB6_ROCEE_MB_TOKEN_M, + roce_set_bit(tmp, ROCEE_MB6_ROCEE_MB_EVENT_S, event); + roce_set_bit(tmp, ROCEE_MB6_ROCEE_MB_HW_RUN_S, 1); + roce_set_field(tmp, ROCEE_MB6_ROCEE_MB_TOKEN_M, ROCEE_MB6_ROCEE_MB_TOKEN_S, token); + val = le32_to_cpu(tmp); writeq(in_param, hcr + 0); writeq(out_param, hcr + 2); writel(in_modifier, hcr + 4); @@ -1717,7 +1766,7 @@ static int hns_roce_v1_chk_mbox(struct hns_roce_dev *hr_dev, return -ETIMEDOUT; } - status = le32_to_cpu((__force __be32) + status = le32_to_cpu((__force __le32) __raw_readl(hcr + HCR_STATUS_OFFSET)); if ((status & STATUS_MASK) != 0x1) { dev_err(hr_dev->dev, "mailbox status 0x%x!\n", status); @@ -1728,7 +1777,7 @@ static int hns_roce_v1_chk_mbox(struct hns_roce_dev *hr_dev, } static int hns_roce_v1_set_gid(struct hns_roce_dev *hr_dev, u8 port, - int gid_index, union ib_gid *gid, + int gid_index, const union ib_gid *gid, const struct ib_gid_attr *attr) { u32 *p = NULL; @@ -1760,6 +1809,7 @@ static int hns_roce_v1_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, { u32 reg_smac_l; u16 reg_smac_h; + __le32 tmp; u16 *p_h; u32 *p; u32 val; @@ -1784,10 +1834,12 @@ static int hns_roce_v1_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, val = roce_read(hr_dev, ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET); + tmp = cpu_to_le32(val); p_h = (u16 *)(&addr[4]); reg_smac_h = *p_h; - roce_set_field(val, ROCEE_SMAC_H_ROCEE_SMAC_H_M, + roce_set_field(tmp, ROCEE_SMAC_H_ROCEE_SMAC_H_M, ROCEE_SMAC_H_ROCEE_SMAC_H_S, reg_smac_h); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET, val); @@ -1797,12 +1849,15 @@ static int hns_roce_v1_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, static void hns_roce_v1_set_mtu(struct hns_roce_dev *hr_dev, u8 phy_port, enum ib_mtu mtu) { + __le32 tmp; u32 val; val = roce_read(hr_dev, ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET); - roce_set_field(val, ROCEE_SMAC_H_ROCEE_PORT_MTU_M, + tmp = cpu_to_le32(val); + roce_set_field(tmp, ROCEE_SMAC_H_ROCEE_PORT_MTU_M, ROCEE_SMAC_H_ROCEE_PORT_MTU_S, mtu); + val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET, val); } @@ -1848,9 +1903,9 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, roce_set_field(mpt_entry->mpt_byte_12, MPT_BYTE_12_MW_BIND_COUNTER_M, MPT_BYTE_12_MW_BIND_COUNTER_S, 0); - mpt_entry->virt_addr_l = (u32)mr->iova; - mpt_entry->virt_addr_h = (u32)(mr->iova >> 32); - mpt_entry->length = (u32)mr->size; + mpt_entry->virt_addr_l = cpu_to_le32((u32)mr->iova); + mpt_entry->virt_addr_h = cpu_to_le32((u32)(mr->iova >> 32)); + mpt_entry->length = cpu_to_le32((u32)mr->size); roce_set_field(mpt_entry->mpt_byte_28, MPT_BYTE_28_PD_M, MPT_BYTE_28_PD_S, mr->pd); @@ -1885,64 +1940,59 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, roce_set_field(mpt_entry->mpt_byte_36, MPT_BYTE_36_PA0_H_M, MPT_BYTE_36_PA0_H_S, - cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_32))); + (u32)(pages[i] >> PAGES_SHIFT_32)); break; case 1: roce_set_field(mpt_entry->mpt_byte_36, MPT_BYTE_36_PA1_L_M, - MPT_BYTE_36_PA1_L_S, - cpu_to_le32((u32)(pages[i]))); + MPT_BYTE_36_PA1_L_S, (u32)(pages[i])); roce_set_field(mpt_entry->mpt_byte_40, MPT_BYTE_40_PA1_H_M, MPT_BYTE_40_PA1_H_S, - cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_24))); + (u32)(pages[i] >> PAGES_SHIFT_24)); break; case 2: roce_set_field(mpt_entry->mpt_byte_40, MPT_BYTE_40_PA2_L_M, - MPT_BYTE_40_PA2_L_S, - cpu_to_le32((u32)(pages[i]))); + MPT_BYTE_40_PA2_L_S, (u32)(pages[i])); roce_set_field(mpt_entry->mpt_byte_44, MPT_BYTE_44_PA2_H_M, MPT_BYTE_44_PA2_H_S, - cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_16))); + (u32)(pages[i] >> PAGES_SHIFT_16)); break; case 3: roce_set_field(mpt_entry->mpt_byte_44, MPT_BYTE_44_PA3_L_M, - MPT_BYTE_44_PA3_L_S, - cpu_to_le32((u32)(pages[i]))); + MPT_BYTE_44_PA3_L_S, (u32)(pages[i])); roce_set_field(mpt_entry->mpt_byte_48, MPT_BYTE_48_PA3_H_M, MPT_BYTE_48_PA3_H_S, - cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_8))); + (u32)(pages[i] >> PAGES_SHIFT_8)); break; case 4: mpt_entry->pa4_l = cpu_to_le32((u32)(pages[i])); roce_set_field(mpt_entry->mpt_byte_56, MPT_BYTE_56_PA4_H_M, MPT_BYTE_56_PA4_H_S, - cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_32))); + (u32)(pages[i] >> PAGES_SHIFT_32)); break; case 5: roce_set_field(mpt_entry->mpt_byte_56, MPT_BYTE_56_PA5_L_M, - MPT_BYTE_56_PA5_L_S, - cpu_to_le32((u32)(pages[i]))); + MPT_BYTE_56_PA5_L_S, (u32)(pages[i])); roce_set_field(mpt_entry->mpt_byte_60, MPT_BYTE_60_PA5_H_M, MPT_BYTE_60_PA5_H_S, - cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_24))); + (u32)(pages[i] >> PAGES_SHIFT_24)); break; case 6: roce_set_field(mpt_entry->mpt_byte_60, MPT_BYTE_60_PA6_L_M, - MPT_BYTE_60_PA6_L_S, - cpu_to_le32((u32)(pages[i]))); + MPT_BYTE_60_PA6_L_S, (u32)(pages[i])); roce_set_field(mpt_entry->mpt_byte_64, MPT_BYTE_64_PA6_H_M, MPT_BYTE_64_PA6_H_S, - cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_16))); + (u32)(pages[i] >> PAGES_SHIFT_16)); break; default: break; @@ -1951,7 +2001,7 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, free_page((unsigned long) pages); - mpt_entry->pbl_addr_l = (u32)(mr->pbl_dma_addr); + mpt_entry->pbl_addr_l = cpu_to_le32((u32)(mr->pbl_dma_addr)); roce_set_field(mpt_entry->mpt_byte_12, MPT_BYTE_12_PBL_ADDR_H_M, MPT_BYTE_12_PBL_ADDR_H_S, @@ -1982,9 +2032,9 @@ static struct hns_roce_cqe *next_cqe_sw(struct hns_roce_cq *hr_cq) static void hns_roce_v1_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index) { - u32 doorbell[2]; + __le32 doorbell[2]; - doorbell[0] = cons_index & ((hr_cq->cq_depth << 1) - 1); + doorbell[0] = cpu_to_le32(cons_index & ((hr_cq->cq_depth << 1) - 1)); doorbell[1] = 0; roce_set_bit(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S, 1); roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M, @@ -2081,10 +2131,8 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_S, CQ_STATE_VALID); roce_set_field(cq_context->cqc_byte_4, CQ_CONTEXT_CQC_BYTE_4_CQN_M, CQ_CONTEXT_CQC_BYTE_4_CQN_S, hr_cq->cqn); - cq_context->cqc_byte_4 = cpu_to_le32(cq_context->cqc_byte_4); - cq_context->cq_bt_l = (u32)dma_handle; - cq_context->cq_bt_l = cpu_to_le32(cq_context->cq_bt_l); + cq_context->cq_bt_l = cpu_to_le32((u32)dma_handle); roce_set_field(cq_context->cqc_byte_12, CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_M, @@ -2096,15 +2144,12 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, ilog2((unsigned int)nent)); roce_set_field(cq_context->cqc_byte_12, CQ_CONTEXT_CQC_BYTE_12_CEQN_M, CQ_CONTEXT_CQC_BYTE_12_CEQN_S, vector); - cq_context->cqc_byte_12 = cpu_to_le32(cq_context->cqc_byte_12); - cq_context->cur_cqe_ba0_l = (u32)(mtts[0]); - cq_context->cur_cqe_ba0_l = cpu_to_le32(cq_context->cur_cqe_ba0_l); + cq_context->cur_cqe_ba0_l = cpu_to_le32((u32)(mtts[0])); roce_set_field(cq_context->cqc_byte_20, CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_M, - CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_S, - cpu_to_le32((mtts[0]) >> 32)); + CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_S, (mtts[0]) >> 32); /* Dedicated hardware, directly set 0 */ roce_set_field(cq_context->cqc_byte_20, CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_M, @@ -2118,9 +2163,8 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_M, CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_S, tptr_dma_addr >> 44); - cq_context->cqc_byte_20 = cpu_to_le32(cq_context->cqc_byte_20); - cq_context->cqe_tptr_addr_l = (u32)(tptr_dma_addr >> 12); + cq_context->cqe_tptr_addr_l = cpu_to_le32((u32)(tptr_dma_addr >> 12)); roce_set_field(cq_context->cqc_byte_32, CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_M, @@ -2138,7 +2182,6 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, roce_set_field(cq_context->cqc_byte_32, CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_M, CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_S, 0); - cq_context->cqc_byte_32 = cpu_to_le32(cq_context->cqc_byte_32); } static int hns_roce_v1_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) @@ -2151,7 +2194,7 @@ static int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq, { struct hns_roce_cq *hr_cq = to_hr_cq(ibcq); u32 notification_flag; - u32 doorbell[2]; + __le32 doorbell[2]; notification_flag = (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? CQ_DB_REQ_NOT : CQ_DB_REQ_NOT_SOL; @@ -2159,7 +2202,8 @@ static int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq, * flags = 0; Notification Flag = 1, next * flags = 1; Notification Flag = 0, solocited */ - doorbell[0] = hr_cq->cons_index & ((hr_cq->cq_depth << 1) - 1); + doorbell[0] = + cpu_to_le32(hr_cq->cons_index & ((hr_cq->cq_depth << 1) - 1)); roce_set_bit(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S, 1); roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M, ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_S, 3); @@ -2416,7 +2460,7 @@ static int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev, struct device *dev = &hr_dev->pdev->dev; struct hns_roce_v1_priv *priv; unsigned long end = 0, flags = 0; - uint32_t bt_cmd_val[2] = {0}; + __le32 bt_cmd_val[2] = {0}; void __iomem *bt_cmd; u64 bt_ba = 0; @@ -2468,7 +2512,7 @@ static int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev, msleep(HW_SYNC_SLEEP_TIME_INTERVAL); } - bt_cmd_val[0] = (uint32_t)bt_ba; + bt_cmd_val[0] = (__le32)bt_ba; roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M, ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S, bt_ba >> 32); hns_roce_write64_k(bt_cmd_val, hr_dev->reg_base + ROCEE_BT_CMD_L_REG); @@ -2569,10 +2613,11 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, struct hns_roce_sqp_context *context; struct device *dev = &hr_dev->pdev->dev; dma_addr_t dma_handle = 0; + u32 __iomem *addr; int rq_pa_start; + __le32 tmp; u32 reg_val; u64 *mtts; - u32 __iomem *addr; context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) @@ -2598,7 +2643,7 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, roce_set_field(context->qp1c_bytes_4, QP1C_BYTES_4_PD_M, QP1C_BYTES_4_PD_S, to_hr_pd(ibqp->pd)->pdn); - context->sq_rq_bt_l = (u32)(dma_handle); + context->sq_rq_bt_l = cpu_to_le32((u32)(dma_handle)); roce_set_field(context->qp1c_bytes_12, QP1C_BYTES_12_SQ_RQ_BT_H_M, QP1C_BYTES_12_SQ_RQ_BT_H_S, @@ -2610,7 +2655,7 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP1C_BYTES_16_PORT_NUM_S, hr_qp->phy_port); roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_SIGNALING_TYPE_S, - hr_qp->sq_signal_bits); + le32_to_cpu(hr_qp->sq_signal_bits)); roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_RQ_BA_FLG_S, 1); roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_SQ_BA_FLG_S, @@ -2624,7 +2669,8 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP1C_BYTES_20_PKEY_IDX_S, attr->pkey_index); rq_pa_start = (u32)hr_qp->rq.offset / PAGE_SIZE; - context->cur_rq_wqe_ba_l = (u32)(mtts[rq_pa_start]); + context->cur_rq_wqe_ba_l = + cpu_to_le32((u32)(mtts[rq_pa_start])); roce_set_field(context->qp1c_bytes_28, QP1C_BYTES_28_CUR_RQ_WQE_BA_H_M, @@ -2643,7 +2689,7 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP1C_BYTES_32_TX_CQ_NUM_S, to_hr_cq(ibqp->send_cq)->cqn); - context->cur_sq_wqe_ba_l = (u32)mtts[0]; + context->cur_sq_wqe_ba_l = cpu_to_le32((u32)mtts[0]); roce_set_field(context->qp1c_bytes_40, QP1C_BYTES_40_CUR_SQ_WQE_BA_H_M, @@ -2658,23 +2704,25 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, ROCEE_QP1C_CFG0_0_REG + hr_qp->phy_port * sizeof(*context)); - writel(context->qp1c_bytes_4, addr); - writel(context->sq_rq_bt_l, addr + 1); - writel(context->qp1c_bytes_12, addr + 2); - writel(context->qp1c_bytes_16, addr + 3); - writel(context->qp1c_bytes_20, addr + 4); - writel(context->cur_rq_wqe_ba_l, addr + 5); - writel(context->qp1c_bytes_28, addr + 6); - writel(context->qp1c_bytes_32, addr + 7); - writel(context->cur_sq_wqe_ba_l, addr + 8); - writel(context->qp1c_bytes_40, addr + 9); + writel(le32_to_cpu(context->qp1c_bytes_4), addr); + writel(le32_to_cpu(context->sq_rq_bt_l), addr + 1); + writel(le32_to_cpu(context->qp1c_bytes_12), addr + 2); + writel(le32_to_cpu(context->qp1c_bytes_16), addr + 3); + writel(le32_to_cpu(context->qp1c_bytes_20), addr + 4); + writel(le32_to_cpu(context->cur_rq_wqe_ba_l), addr + 5); + writel(le32_to_cpu(context->qp1c_bytes_28), addr + 6); + writel(le32_to_cpu(context->qp1c_bytes_32), addr + 7); + writel(le32_to_cpu(context->cur_sq_wqe_ba_l), addr + 8); + writel(le32_to_cpu(context->qp1c_bytes_40), addr + 9); } /* Modify QP1C status */ reg_val = roce_read(hr_dev, ROCEE_QP1C_CFG0_0_REG + hr_qp->phy_port * sizeof(*context)); - roce_set_field(reg_val, ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_M, + tmp = cpu_to_le32(reg_val); + roce_set_field(tmp, ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_M, ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_S, new_state); + reg_val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_QP1C_CFG0_0_REG + hr_qp->phy_port * sizeof(*context), reg_val); @@ -2712,7 +2760,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); dma_addr_t dma_handle_2 = 0; dma_addr_t dma_handle = 0; - uint32_t doorbell[2] = {0}; + __le32 doorbell[2] = {0}; int rq_pa_start = 0; u64 *mtts_2 = NULL; int ret = -EINVAL; @@ -2887,7 +2935,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, dmac = (u8 *)attr->ah_attr.roce.dmac; - context->sq_rq_bt_l = (u32)(dma_handle); + context->sq_rq_bt_l = cpu_to_le32((u32)(dma_handle)); roce_set_field(context->qpc_bytes_24, QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_M, QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_S, @@ -2899,7 +2947,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_M, QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_S, attr->min_rnr_timer); - context->irrl_ba_l = (u32)(dma_handle_2); + context->irrl_ba_l = cpu_to_le32((u32)(dma_handle_2)); roce_set_field(context->qpc_bytes_32, QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_M, QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_S, @@ -2913,7 +2961,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, 1); roce_set_bit(context->qpc_bytes_32, QP_CONTEXT_QPC_BYTE_32_SIGNALING_TYPE_S, - hr_qp->sq_signal_bits); + le32_to_cpu(hr_qp->sq_signal_bits)); port = (attr_mask & IB_QP_PORT) ? (attr->port_num - 1) : hr_qp->port; @@ -2991,7 +3039,8 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_S, 0); rq_pa_start = (u32)hr_qp->rq.offset / PAGE_SIZE; - context->cur_rq_wqe_ba_l = (u32)(mtts[rq_pa_start]); + context->cur_rq_wqe_ba_l = + cpu_to_le32((u32)(mtts[rq_pa_start])); roce_set_field(context->qpc_bytes_76, QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_M, @@ -3071,7 +3120,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, goto out; } - context->rx_cur_sq_wqe_ba_l = (u32)(mtts[0]); + context->rx_cur_sq_wqe_ba_l = cpu_to_le32((u32)(mtts[0])); roce_set_field(context->qpc_bytes_120, QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_M, @@ -3219,7 +3268,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_M, QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_S, 0); - context->tx_cur_sq_wqe_ba_l = (u32)(mtts[0]); + context->tx_cur_sq_wqe_ba_l = cpu_to_le32((u32)(mtts[0])); roce_set_field(context->qpc_bytes_188, QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_M, @@ -3386,16 +3435,16 @@ static int hns_roce_v1_q_sqp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, addr = ROCEE_QP1C_CFG0_0_REG + hr_qp->port * sizeof(struct hns_roce_sqp_context); - context.qp1c_bytes_4 = roce_read(hr_dev, addr); - context.sq_rq_bt_l = roce_read(hr_dev, addr + 1); - context.qp1c_bytes_12 = roce_read(hr_dev, addr + 2); - context.qp1c_bytes_16 = roce_read(hr_dev, addr + 3); - context.qp1c_bytes_20 = roce_read(hr_dev, addr + 4); - context.cur_rq_wqe_ba_l = roce_read(hr_dev, addr + 5); - context.qp1c_bytes_28 = roce_read(hr_dev, addr + 6); - context.qp1c_bytes_32 = roce_read(hr_dev, addr + 7); - context.cur_sq_wqe_ba_l = roce_read(hr_dev, addr + 8); - context.qp1c_bytes_40 = roce_read(hr_dev, addr + 9); + context.qp1c_bytes_4 = cpu_to_le32(roce_read(hr_dev, addr)); + context.sq_rq_bt_l = cpu_to_le32(roce_read(hr_dev, addr + 1)); + context.qp1c_bytes_12 = cpu_to_le32(roce_read(hr_dev, addr + 2)); + context.qp1c_bytes_16 = cpu_to_le32(roce_read(hr_dev, addr + 3)); + context.qp1c_bytes_20 = cpu_to_le32(roce_read(hr_dev, addr + 4)); + context.cur_rq_wqe_ba_l = cpu_to_le32(roce_read(hr_dev, addr + 5)); + context.qp1c_bytes_28 = cpu_to_le32(roce_read(hr_dev, addr + 6)); + context.qp1c_bytes_32 = cpu_to_le32(roce_read(hr_dev, addr + 7)); + context.cur_sq_wqe_ba_l = cpu_to_le32(roce_read(hr_dev, addr + 8)); + context.qp1c_bytes_40 = cpu_to_le32(roce_read(hr_dev, addr + 9)); hr_qp->state = roce_get_field(context.qp1c_bytes_4, QP1C_BYTES_4_QP_STATE_M, @@ -3557,7 +3606,7 @@ static int hns_roce_v1_q_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, qp_attr->retry_cnt = roce_get_field(context->qpc_bytes_148, QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_M, QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_S); - qp_attr->rnr_retry = context->rnr_retry; + qp_attr->rnr_retry = (u8)context->rnr_retry; done: qp_attr->cur_qp_state = qp_attr->qp_state; @@ -3595,42 +3644,47 @@ static void hns_roce_check_sdb_status(struct hns_roce_dev *hr_dev, u32 *old_send, u32 *old_retry, u32 *tsp_st, u32 *success_flags) { + __le32 *old_send_tmp, *old_retry_tmp; u32 sdb_retry_cnt; u32 sdb_send_ptr; u32 cur_cnt, old_cnt; + __le32 tmp, tmp1; u32 send_ptr; sdb_send_ptr = roce_read(hr_dev, ROCEE_SDB_SEND_PTR_REG); sdb_retry_cnt = roce_read(hr_dev, ROCEE_SDB_RETRY_CNT_REG); - cur_cnt = roce_get_field(sdb_send_ptr, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, + tmp = cpu_to_le32(sdb_send_ptr); + tmp1 = cpu_to_le32(sdb_retry_cnt); + cur_cnt = roce_get_field(tmp, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) + - roce_get_field(sdb_retry_cnt, - ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M, + roce_get_field(tmp1, ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M, ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S); + + old_send_tmp = (__le32 *)old_send; + old_retry_tmp = (__le32 *)old_retry; if (!roce_get_bit(*tsp_st, ROCEE_CNT_CLR_CE_CNT_CLR_CE_S)) { - old_cnt = roce_get_field(*old_send, + old_cnt = roce_get_field(*old_send_tmp, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) + - roce_get_field(*old_retry, + roce_get_field(*old_retry_tmp, ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M, ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S); if (cur_cnt - old_cnt > SDB_ST_CMP_VAL) *success_flags = 1; } else { - old_cnt = roce_get_field(*old_send, + old_cnt = roce_get_field(*old_send_tmp, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S); if (cur_cnt - old_cnt > SDB_ST_CMP_VAL) { *success_flags = 1; } else { - send_ptr = roce_get_field(*old_send, + send_ptr = roce_get_field(*old_send_tmp, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) + - roce_get_field(sdb_retry_cnt, + roce_get_field(tmp1, ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M, ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S); - roce_set_field(*old_send, + roce_set_field(*old_send_tmp, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S, send_ptr); @@ -3646,11 +3700,14 @@ static int check_qp_db_process_status(struct hns_roce_dev *hr_dev, { struct device *dev = &hr_dev->pdev->dev; u32 sdb_send_ptr, old_send; + __le32 sdb_issue_ptr_tmp; + __le32 sdb_send_ptr_tmp; u32 success_flags = 0; unsigned long end; u32 old_retry; u32 inv_cnt; u32 tsp_st; + __le32 tmp; if (*wait_stage > HNS_ROCE_V1_DB_STAGE2 || *wait_stage < HNS_ROCE_V1_DB_STAGE1) { @@ -3679,10 +3736,12 @@ static int check_qp_db_process_status(struct hns_roce_dev *hr_dev, ROCEE_SDB_SEND_PTR_REG); } - if (roce_get_field(sdb_issue_ptr, + sdb_send_ptr_tmp = cpu_to_le32(sdb_send_ptr); + sdb_issue_ptr_tmp = cpu_to_le32(sdb_issue_ptr); + if (roce_get_field(sdb_issue_ptr_tmp, ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_M, ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_S) == - roce_get_field(sdb_send_ptr, + roce_get_field(sdb_send_ptr_tmp, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S)) { old_send = roce_read(hr_dev, ROCEE_SDB_SEND_PTR_REG); @@ -3690,7 +3749,8 @@ static int check_qp_db_process_status(struct hns_roce_dev *hr_dev, do { tsp_st = roce_read(hr_dev, ROCEE_TSP_BP_ST_REG); - if (roce_get_bit(tsp_st, + tmp = cpu_to_le32(tsp_st); + if (roce_get_bit(tmp, ROCEE_TSP_BP_ST_QH_FIFO_ENTRY_S) == 1) { *wait_stage = HNS_ROCE_V1_DB_WAIT_OK; return 0; @@ -3699,8 +3759,9 @@ static int check_qp_db_process_status(struct hns_roce_dev *hr_dev, if (!time_before(jiffies, end)) { dev_dbg(dev, "QP(0x%lx) db process stage1 timeout when send ptr equals issue ptr.\n" "issue 0x%x send 0x%x.\n", - hr_qp->qpn, sdb_issue_ptr, - sdb_send_ptr); + hr_qp->qpn, + le32_to_cpu(sdb_issue_ptr_tmp), + le32_to_cpu(sdb_send_ptr_tmp)); return 0; } @@ -4102,9 +4163,9 @@ static void hns_roce_v1_cq_err_handle(struct hns_roce_dev *hr_dev, struct device *dev = &hr_dev->pdev->dev; u32 cqn; - cqn = le32_to_cpu(roce_get_field(aeqe->event.cq_event.cq, + cqn = roce_get_field(aeqe->event.cq_event.cq, HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M, - HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S)); + HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S); switch (event_type) { case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: @@ -4340,6 +4401,7 @@ static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id) u32 aeshift_val; u32 ceshift_val; u32 cemask_val; + __le32 tmp; int i; /* @@ -4348,30 +4410,34 @@ static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id) * interrupt, mask irq, clear irq, cancel mask operation */ aeshift_val = roce_read(hr_dev, ROCEE_CAEP_AEQC_AEQE_SHIFT_REG); + tmp = cpu_to_le32(aeshift_val); /* AEQE overflow */ - if (roce_get_bit(aeshift_val, + if (roce_get_bit(tmp, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQ_ALM_OVF_INT_ST_S) == 1) { dev_warn(dev, "AEQ overflow!\n"); /* Set mask */ caepaemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG); - roce_set_bit(caepaemask_val, - ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S, + tmp = cpu_to_le32(caepaemask_val); + roce_set_bit(tmp, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S, HNS_ROCE_INT_MASK_ENABLE); + caepaemask_val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, caepaemask_val); /* Clear int state(INT_WC : write 1 clear) */ caepaest_val = roce_read(hr_dev, ROCEE_CAEP_AE_ST_REG); - roce_set_bit(caepaest_val, - ROCEE_CAEP_AE_ST_CAEP_AEQ_ALM_OVF_S, 1); + tmp = cpu_to_le32(caepaest_val); + roce_set_bit(tmp, ROCEE_CAEP_AE_ST_CAEP_AEQ_ALM_OVF_S, 1); + caepaest_val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_CAEP_AE_ST_REG, caepaest_val); /* Clear mask */ caepaemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG); - roce_set_bit(caepaemask_val, - ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S, + tmp = cpu_to_le32(caepaemask_val); + roce_set_bit(tmp, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S, HNS_ROCE_INT_MASK_DISABLE); + caepaemask_val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, caepaemask_val); } @@ -4379,8 +4445,9 @@ static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id) for (i = 0; i < hr_dev->caps.num_comp_vectors; i++) { ceshift_val = roce_read(hr_dev, ROCEE_CAEP_CEQC_SHIFT_0_REG + i * CEQ_REG_OFFSET); + tmp = cpu_to_le32(ceshift_val); - if (roce_get_bit(ceshift_val, + if (roce_get_bit(tmp, ROCEE_CAEP_CEQC_SHIFT_CAEP_CEQ_ALM_OVF_INT_ST_S) == 1) { dev_warn(dev, "CEQ[%d] almost overflow!\n", i); int_work++; @@ -4389,9 +4456,11 @@ static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id) cemask_val = roce_read(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG + i * CEQ_REG_OFFSET); - roce_set_bit(cemask_val, + tmp = cpu_to_le32(cemask_val); + roce_set_bit(tmp, ROCEE_CAEP_CE_IRQ_MASK_CAEP_CEQ_ALM_OVF_MASK_S, HNS_ROCE_INT_MASK_ENABLE); + cemask_val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG + i * CEQ_REG_OFFSET, cemask_val); @@ -4399,9 +4468,11 @@ static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id) cealmovf_val = roce_read(hr_dev, ROCEE_CAEP_CEQ_ALM_OVF_0_REG + i * CEQ_REG_OFFSET); - roce_set_bit(cealmovf_val, + tmp = cpu_to_le32(cealmovf_val); + roce_set_bit(tmp, ROCEE_CAEP_CEQ_ALM_OVF_CAEP_CEQ_ALM_OVF_S, 1); + cealmovf_val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_CAEP_CEQ_ALM_OVF_0_REG + i * CEQ_REG_OFFSET, cealmovf_val); @@ -4409,9 +4480,11 @@ static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id) cemask_val = roce_read(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG + i * CEQ_REG_OFFSET); - roce_set_bit(cemask_val, + tmp = cpu_to_le32(cemask_val); + roce_set_bit(tmp, ROCEE_CAEP_CE_IRQ_MASK_CAEP_CEQ_ALM_OVF_MASK_S, HNS_ROCE_INT_MASK_DISABLE); + cemask_val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG + i * CEQ_REG_OFFSET, cemask_val); } @@ -4435,13 +4508,16 @@ static void hns_roce_v1_int_mask_enable(struct hns_roce_dev *hr_dev) { u32 aemask_val; int masken = 0; + __le32 tmp; int i; /* AEQ INT */ aemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG); - roce_set_bit(aemask_val, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S, + tmp = cpu_to_le32(aemask_val); + roce_set_bit(tmp, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S, masken); - roce_set_bit(aemask_val, ROCEE_CAEP_AE_MASK_CAEP_AE_IRQ_MASK_S, masken); + roce_set_bit(tmp, ROCEE_CAEP_AE_MASK_CAEP_AE_IRQ_MASK_S, masken); + aemask_val = le32_to_cpu(tmp); roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, aemask_val); /* CEQ INT */ @@ -4473,20 +4549,24 @@ static void hns_roce_v1_enable_eq(struct hns_roce_dev *hr_dev, int eq_num, int enable_flag) { void __iomem *eqc = hr_dev->eq_table.eqc_base[eq_num]; + __le32 tmp; u32 val; val = readl(eqc); + tmp = cpu_to_le32(val); if (enable_flag) - roce_set_field(val, + roce_set_field(tmp, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S, HNS_ROCE_EQ_STAT_VALID); else - roce_set_field(val, + roce_set_field(tmp, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S, HNS_ROCE_EQ_STAT_INVALID); + + val = le32_to_cpu(tmp); writel(val, eqc); } @@ -4499,6 +4579,9 @@ static int hns_roce_v1_create_eq(struct hns_roce_dev *hr_dev, u32 eqconsindx_val = 0; u32 eqcuridx_val = 0; u32 eqshift_val = 0; + __le32 tmp2 = 0; + __le32 tmp1 = 0; + __le32 tmp = 0; int num_bas; int ret; int i; @@ -4530,14 +4613,13 @@ static int hns_roce_v1_create_eq(struct hns_roce_dev *hr_dev, memset(eq->buf_list[i].buf, 0, HNS_ROCE_BA_SIZE); } eq->cons_index = 0; - roce_set_field(eqshift_val, - ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M, + roce_set_field(tmp, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S, HNS_ROCE_EQ_STAT_INVALID); - roce_set_field(eqshift_val, - ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_M, + roce_set_field(tmp, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_M, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_S, eq->log_entries); + eqshift_val = le32_to_cpu(tmp); writel(eqshift_val, eqc); /* Configure eq extended address 12~44bit */ @@ -4549,18 +4631,18 @@ static int hns_roce_v1_create_eq(struct hns_roce_dev *hr_dev, * using 4K page, and shift more 32 because of * caculating the high 32 bit value evaluated to hardware. */ - roce_set_field(eqcuridx_val, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_M, + roce_set_field(tmp1, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_M, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_S, eq->buf_list[0].map >> 44); - roce_set_field(eqcuridx_val, - ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_M, + roce_set_field(tmp1, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_M, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_S, 0); + eqcuridx_val = le32_to_cpu(tmp1); writel(eqcuridx_val, eqc + 8); /* Configure eq consumer index */ - roce_set_field(eqconsindx_val, - ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_M, + roce_set_field(tmp2, ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_M, ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_S, 0); + eqconsindx_val = le32_to_cpu(tmp2); writel(eqconsindx_val, eqc + 0xc); return 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h index e9a2717ea7cd..66440147d9eb 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h @@ -260,7 +260,7 @@ struct hns_roce_cqe { __le32 cqe_byte_4; union { __le32 r_key; - __be32 immediate_data; + __le32 immediate_data; }; __le32 byte_cnt; __le32 cqe_byte_16; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index a6e11be0ea0f..0218c0f8c2a7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -36,6 +36,7 @@ #include <linux/kernel.h> #include <linux/types.h> #include <net/addrconf.h> +#include <rdma/ib_addr.h> #include <rdma/ib_umem.h> #include "hnae3.h" @@ -53,7 +54,7 @@ static void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg, dseg->len = cpu_to_le32(sg->length); } -static void set_extend_sge(struct hns_roce_qp *qp, struct ib_send_wr *wr, +static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, unsigned int *sge_ind) { struct hns_roce_v2_wqe_data_seg *dseg; @@ -100,10 +101,10 @@ static void set_extend_sge(struct hns_roce_qp *qp, struct ib_send_wr *wr, } } -static int set_rwqe_data_seg(struct ib_qp *ibqp, struct ib_send_wr *wr, +static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, void *wqe, unsigned int *sge_ind, - struct ib_send_wr **bad_wr) + const struct ib_send_wr **bad_wr) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_v2_wqe_data_seg *dseg = wqe; @@ -164,23 +165,30 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, struct ib_send_wr *wr, return 0; } -static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, + const struct ib_qp_attr *attr, + int attr_mask, enum ib_qp_state cur_state, + enum ib_qp_state new_state); + +static int hns_roce_v2_post_send(struct ib_qp *ibqp, + const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_ah *ah = to_hr_ah(ud_wr(wr)->ah); struct hns_roce_v2_ud_send_wqe *ud_sq_wqe; struct hns_roce_v2_rc_send_wqe *rc_sq_wqe; struct hns_roce_qp *qp = to_hr_qp(ibqp); - struct hns_roce_v2_wqe_data_seg *dseg; struct device *dev = hr_dev->dev; struct hns_roce_v2_db sq_db; + struct ib_qp_attr attr; unsigned int sge_ind = 0; unsigned int owner_bit; unsigned long flags; unsigned int ind; void *wqe = NULL; bool loopback; + int attr_mask; u32 tmp_len; int ret = 0; u8 *smac; @@ -273,7 +281,8 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_SEND_WITH_IMM: case IB_WR_RDMA_WRITE_WITH_IMM: - ud_sq_wqe->immtdata = wr->ex.imm_data; + ud_sq_wqe->immtdata = + cpu_to_le32(be32_to_cpu(wr->ex.imm_data)); break; default: ud_sq_wqe->immtdata = 0; @@ -330,14 +339,13 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_TCLASS_M, V2_UD_SEND_WQE_BYTE_36_TCLASS_S, - 0); - roce_set_field(ud_sq_wqe->byte_36, - V2_UD_SEND_WQE_BYTE_36_TCLASS_M, - V2_UD_SEND_WQE_BYTE_36_TCLASS_S, - 0); + ah->av.sl_tclass_flowlabel >> + HNS_ROCE_TCLASS_SHIFT); roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M, - V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S, 0); + V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S, + ah->av.sl_tclass_flowlabel & + HNS_ROCE_FLOW_LABEL_MASK); roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_SL_M, V2_UD_SEND_WQE_BYTE_40_SL_S, @@ -371,7 +379,8 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_SEND_WITH_IMM: case IB_WR_RDMA_WRITE_WITH_IMM: - rc_sq_wqe->immtdata = wr->ex.imm_data; + rc_sq_wqe->immtdata = + cpu_to_le32(be32_to_cpu(wr->ex.imm_data)); break; case IB_WR_SEND_WITH_INV: rc_sq_wqe->inv_key = @@ -485,7 +494,6 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } wqe += sizeof(struct hns_roce_v2_rc_send_wqe); - dseg = wqe; ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe, wqe, &sge_ind, bad_wr); @@ -523,6 +531,19 @@ out: qp->sq_next_wqe = ind; qp->next_sge = sge_ind; + + if (qp->state == IB_QPS_ERR) { + attr_mask = IB_QP_STATE; + attr.qp_state = IB_QPS_ERR; + + ret = hns_roce_v2_modify_qp(&qp->ibqp, &attr, attr_mask, + qp->state, IB_QPS_ERR); + if (ret) { + spin_unlock_irqrestore(&qp->sq.lock, flags); + *bad_wr = wr; + return ret; + } + } } spin_unlock_irqrestore(&qp->sq.lock, flags); @@ -530,16 +551,19 @@ out: return ret; } -static int hns_roce_v2_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +static int hns_roce_v2_post_recv(struct ib_qp *ibqp, + const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); struct hns_roce_v2_wqe_data_seg *dseg; struct hns_roce_rinl_sge *sge_list; struct device *dev = hr_dev->dev; + struct ib_qp_attr attr; unsigned long flags; void *wqe = NULL; + int attr_mask; int ret = 0; int nreq; int ind; @@ -608,6 +632,20 @@ out: wmb(); *hr_qp->rdb.db_record = hr_qp->rq.head & 0xffff; + + if (hr_qp->state == IB_QPS_ERR) { + attr_mask = IB_QP_STATE; + attr.qp_state = IB_QPS_ERR; + + ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, &attr, + attr_mask, hr_qp->state, + IB_QPS_ERR); + if (ret) { + spin_unlock_irqrestore(&hr_qp->rq.lock, flags); + *bad_wr = wr; + return ret; + } + } } spin_unlock_irqrestore(&hr_qp->rq.lock, flags); @@ -702,8 +740,8 @@ static int hns_roce_v2_cmq_init(struct hns_roce_dev *hr_dev) int ret; /* Setup the queue entries for command queue */ - priv->cmq.csq.desc_num = 1024; - priv->cmq.crq.desc_num = 1024; + priv->cmq.csq.desc_num = CMD_CSQ_DESC_NUM; + priv->cmq.crq.desc_num = CMD_CRQ_DESC_NUM; /* Setup the lock for command queue */ spin_lock_init(&priv->cmq.csq.lock); @@ -925,7 +963,8 @@ static int hns_roce_config_global_param(struct hns_roce_dev *hr_dev) static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev) { struct hns_roce_cmq_desc desc[2]; - struct hns_roce_pf_res *res; + struct hns_roce_pf_res_a *req_a; + struct hns_roce_pf_res_b *req_b; int ret; int i; @@ -943,21 +982,26 @@ static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev) if (ret) return ret; - res = (struct hns_roce_pf_res *)desc[0].data; + req_a = (struct hns_roce_pf_res_a *)desc[0].data; + req_b = (struct hns_roce_pf_res_b *)desc[1].data; - hr_dev->caps.qpc_bt_num = roce_get_field(res->qpc_bt_idx_num, + hr_dev->caps.qpc_bt_num = roce_get_field(req_a->qpc_bt_idx_num, PF_RES_DATA_1_PF_QPC_BT_NUM_M, PF_RES_DATA_1_PF_QPC_BT_NUM_S); - hr_dev->caps.srqc_bt_num = roce_get_field(res->srqc_bt_idx_num, + hr_dev->caps.srqc_bt_num = roce_get_field(req_a->srqc_bt_idx_num, PF_RES_DATA_2_PF_SRQC_BT_NUM_M, PF_RES_DATA_2_PF_SRQC_BT_NUM_S); - hr_dev->caps.cqc_bt_num = roce_get_field(res->cqc_bt_idx_num, + hr_dev->caps.cqc_bt_num = roce_get_field(req_a->cqc_bt_idx_num, PF_RES_DATA_3_PF_CQC_BT_NUM_M, PF_RES_DATA_3_PF_CQC_BT_NUM_S); - hr_dev->caps.mpt_bt_num = roce_get_field(res->mpt_bt_idx_num, + hr_dev->caps.mpt_bt_num = roce_get_field(req_a->mpt_bt_idx_num, PF_RES_DATA_4_PF_MPT_BT_NUM_M, PF_RES_DATA_4_PF_MPT_BT_NUM_S); + hr_dev->caps.sl_num = roce_get_field(req_b->qid_idx_sl_num, + PF_RES_DATA_3_PF_SL_NUM_M, + PF_RES_DATA_3_PF_SL_NUM_S); + return 0; } @@ -1203,12 +1247,14 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->eqe_ba_pg_sz = 0; caps->eqe_buf_pg_sz = 0; caps->eqe_hop_num = HNS_ROCE_EQE_HOP_NUM; + caps->tsq_buf_pg_sz = 0; caps->chunk_sz = HNS_ROCE_V2_TABLE_CHUNK_SIZE; caps->flags = HNS_ROCE_CAP_FLAG_REREG_MR | HNS_ROCE_CAP_FLAG_ROCE_V1_V2 | HNS_ROCE_CAP_FLAG_RQ_INLINE | - HNS_ROCE_CAP_FLAG_RECORD_DB; + HNS_ROCE_CAP_FLAG_RECORD_DB | + HNS_ROCE_CAP_FLAG_SQ_RECORD_DB; caps->pkey_table_len[0] = 1; caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM; caps->ceqe_depth = HNS_ROCE_V2_COMP_EQE_NUM; @@ -1224,6 +1270,228 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) return ret; } +static int hns_roce_config_link_table(struct hns_roce_dev *hr_dev, + enum hns_roce_link_table_type type) +{ + struct hns_roce_cmq_desc desc[2]; + struct hns_roce_cfg_llm_a *req_a = + (struct hns_roce_cfg_llm_a *)desc[0].data; + struct hns_roce_cfg_llm_b *req_b = + (struct hns_roce_cfg_llm_b *)desc[1].data; + struct hns_roce_v2_priv *priv = hr_dev->priv; + struct hns_roce_link_table *link_tbl; + struct hns_roce_link_table_entry *entry; + enum hns_roce_opcode_type opcode; + u32 page_num; + int i; + + switch (type) { + case TSQ_LINK_TABLE: + link_tbl = &priv->tsq; + opcode = HNS_ROCE_OPC_CFG_EXT_LLM; + break; + case TPQ_LINK_TABLE: + link_tbl = &priv->tpq; + opcode = HNS_ROCE_OPC_CFG_TMOUT_LLM; + break; + default: + return -EINVAL; + } + + page_num = link_tbl->npages; + entry = link_tbl->table.buf; + memset(req_a, 0, sizeof(*req_a)); + memset(req_b, 0, sizeof(*req_b)); + + for (i = 0; i < 2; i++) { + hns_roce_cmq_setup_basic_desc(&desc[i], opcode, false); + + if (i == 0) + desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); + else + desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); + + if (i == 0) { + req_a->base_addr_l = link_tbl->table.map & 0xffffffff; + req_a->base_addr_h = (link_tbl->table.map >> 32) & + 0xffffffff; + roce_set_field(req_a->depth_pgsz_init_en, + CFG_LLM_QUE_DEPTH_M, + CFG_LLM_QUE_DEPTH_S, + link_tbl->npages); + roce_set_field(req_a->depth_pgsz_init_en, + CFG_LLM_QUE_PGSZ_M, + CFG_LLM_QUE_PGSZ_S, + link_tbl->pg_sz); + req_a->head_ba_l = entry[0].blk_ba0; + req_a->head_ba_h_nxtptr = entry[0].blk_ba1_nxt_ptr; + roce_set_field(req_a->head_ptr, + CFG_LLM_HEAD_PTR_M, + CFG_LLM_HEAD_PTR_S, 0); + } else { + req_b->tail_ba_l = entry[page_num - 1].blk_ba0; + roce_set_field(req_b->tail_ba_h, + CFG_LLM_TAIL_BA_H_M, + CFG_LLM_TAIL_BA_H_S, + entry[page_num - 1].blk_ba1_nxt_ptr & + HNS_ROCE_LINK_TABLE_BA1_M); + roce_set_field(req_b->tail_ptr, + CFG_LLM_TAIL_PTR_M, + CFG_LLM_TAIL_PTR_S, + (entry[page_num - 2].blk_ba1_nxt_ptr & + HNS_ROCE_LINK_TABLE_NXT_PTR_M) >> + HNS_ROCE_LINK_TABLE_NXT_PTR_S); + } + } + roce_set_field(req_a->depth_pgsz_init_en, + CFG_LLM_INIT_EN_M, CFG_LLM_INIT_EN_S, 1); + + return hns_roce_cmq_send(hr_dev, desc, 2); +} + +static int hns_roce_init_link_table(struct hns_roce_dev *hr_dev, + enum hns_roce_link_table_type type) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + struct hns_roce_link_table *link_tbl; + struct hns_roce_link_table_entry *entry; + struct device *dev = hr_dev->dev; + u32 buf_chk_sz; + dma_addr_t t; + int func_num = 1; + int pg_num_a; + int pg_num_b; + int pg_num; + int size; + int i; + + switch (type) { + case TSQ_LINK_TABLE: + link_tbl = &priv->tsq; + buf_chk_sz = 1 << (hr_dev->caps.tsq_buf_pg_sz + PAGE_SHIFT); + pg_num_a = hr_dev->caps.num_qps * 8 / buf_chk_sz; + pg_num_b = hr_dev->caps.sl_num * 4 + 2; + break; + case TPQ_LINK_TABLE: + link_tbl = &priv->tpq; + buf_chk_sz = 1 << (hr_dev->caps.tpq_buf_pg_sz + PAGE_SHIFT); + pg_num_a = hr_dev->caps.num_cqs * 4 / buf_chk_sz; + pg_num_b = 2 * 4 * func_num + 2; + break; + default: + return -EINVAL; + } + + pg_num = max(pg_num_a, pg_num_b); + size = pg_num * sizeof(struct hns_roce_link_table_entry); + + link_tbl->table.buf = dma_alloc_coherent(dev, size, + &link_tbl->table.map, + GFP_KERNEL); + if (!link_tbl->table.buf) + goto out; + + link_tbl->pg_list = kcalloc(pg_num, sizeof(*link_tbl->pg_list), + GFP_KERNEL); + if (!link_tbl->pg_list) + goto err_kcalloc_failed; + + entry = link_tbl->table.buf; + for (i = 0; i < pg_num; ++i) { + link_tbl->pg_list[i].buf = dma_alloc_coherent(dev, buf_chk_sz, + &t, GFP_KERNEL); + if (!link_tbl->pg_list[i].buf) + goto err_alloc_buf_failed; + + link_tbl->pg_list[i].map = t; + memset(link_tbl->pg_list[i].buf, 0, buf_chk_sz); + + entry[i].blk_ba0 = (t >> 12) & 0xffffffff; + roce_set_field(entry[i].blk_ba1_nxt_ptr, + HNS_ROCE_LINK_TABLE_BA1_M, + HNS_ROCE_LINK_TABLE_BA1_S, + t >> 44); + + if (i < (pg_num - 1)) + roce_set_field(entry[i].blk_ba1_nxt_ptr, + HNS_ROCE_LINK_TABLE_NXT_PTR_M, + HNS_ROCE_LINK_TABLE_NXT_PTR_S, + i + 1); + } + link_tbl->npages = pg_num; + link_tbl->pg_sz = buf_chk_sz; + + return hns_roce_config_link_table(hr_dev, type); + +err_alloc_buf_failed: + for (i -= 1; i >= 0; i--) + dma_free_coherent(dev, buf_chk_sz, + link_tbl->pg_list[i].buf, + link_tbl->pg_list[i].map); + kfree(link_tbl->pg_list); + +err_kcalloc_failed: + dma_free_coherent(dev, size, link_tbl->table.buf, + link_tbl->table.map); + +out: + return -ENOMEM; +} + +static void hns_roce_free_link_table(struct hns_roce_dev *hr_dev, + struct hns_roce_link_table *link_tbl) +{ + struct device *dev = hr_dev->dev; + int size; + int i; + + size = link_tbl->npages * sizeof(struct hns_roce_link_table_entry); + + for (i = 0; i < link_tbl->npages; ++i) + if (link_tbl->pg_list[i].buf) + dma_free_coherent(dev, link_tbl->pg_sz, + link_tbl->pg_list[i].buf, + link_tbl->pg_list[i].map); + kfree(link_tbl->pg_list); + + dma_free_coherent(dev, size, link_tbl->table.buf, + link_tbl->table.map); +} + +static int hns_roce_v2_init(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + int ret; + + /* TSQ includes SQ doorbell and ack doorbell */ + ret = hns_roce_init_link_table(hr_dev, TSQ_LINK_TABLE); + if (ret) { + dev_err(hr_dev->dev, "TSQ init failed, ret = %d.\n", ret); + return ret; + } + + ret = hns_roce_init_link_table(hr_dev, TPQ_LINK_TABLE); + if (ret) { + dev_err(hr_dev->dev, "TPQ init failed, ret = %d.\n", ret); + goto err_tpq_init_failed; + } + + return 0; + +err_tpq_init_failed: + hns_roce_free_link_table(hr_dev, &priv->tsq); + + return ret; +} + +static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + + hns_roce_free_link_table(hr_dev, &priv->tpq); + hns_roce_free_link_table(hr_dev, &priv->tsq); +} + static int hns_roce_v2_cmd_pending(struct hns_roce_dev *hr_dev) { u32 status = readl(hr_dev->reg_base + ROCEE_VF_MB_STATUS_REG); @@ -1307,13 +1575,45 @@ static int hns_roce_v2_chk_mbox(struct hns_roce_dev *hr_dev, return 0; } +static int hns_roce_config_sgid_table(struct hns_roce_dev *hr_dev, + int gid_index, const union ib_gid *gid, + enum hns_roce_sgid_type sgid_type) +{ + struct hns_roce_cmq_desc desc; + struct hns_roce_cfg_sgid_tb *sgid_tb = + (struct hns_roce_cfg_sgid_tb *)desc.data; + u32 *p; + + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_SGID_TB, false); + + roce_set_field(sgid_tb->table_idx_rsv, + CFG_SGID_TB_TABLE_IDX_M, + CFG_SGID_TB_TABLE_IDX_S, gid_index); + roce_set_field(sgid_tb->vf_sgid_type_rsv, + CFG_SGID_TB_VF_SGID_TYPE_M, + CFG_SGID_TB_VF_SGID_TYPE_S, sgid_type); + + p = (u32 *)&gid->raw[0]; + sgid_tb->vf_sgid_l = cpu_to_le32(*p); + + p = (u32 *)&gid->raw[4]; + sgid_tb->vf_sgid_ml = cpu_to_le32(*p); + + p = (u32 *)&gid->raw[8]; + sgid_tb->vf_sgid_mh = cpu_to_le32(*p); + + p = (u32 *)&gid->raw[0xc]; + sgid_tb->vf_sgid_h = cpu_to_le32(*p); + + return hns_roce_cmq_send(hr_dev, &desc, 1); +} + static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u8 port, - int gid_index, union ib_gid *gid, + int gid_index, const union ib_gid *gid, const struct ib_gid_attr *attr) { enum hns_roce_sgid_type sgid_type = GID_TYPE_FLAG_ROCE_V1; - u32 *p; - u32 val; + int ret; if (!gid || !attr) return -EINVAL; @@ -1328,49 +1628,37 @@ static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u8 port, sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV6; } - p = (u32 *)&gid->raw[0]; - roce_raw_write(*p, hr_dev->reg_base + ROCEE_VF_SGID_CFG0_REG + - 0x20 * gid_index); - - p = (u32 *)&gid->raw[4]; - roce_raw_write(*p, hr_dev->reg_base + ROCEE_VF_SGID_CFG1_REG + - 0x20 * gid_index); - - p = (u32 *)&gid->raw[8]; - roce_raw_write(*p, hr_dev->reg_base + ROCEE_VF_SGID_CFG2_REG + - 0x20 * gid_index); - - p = (u32 *)&gid->raw[0xc]; - roce_raw_write(*p, hr_dev->reg_base + ROCEE_VF_SGID_CFG3_REG + - 0x20 * gid_index); - - val = roce_read(hr_dev, ROCEE_VF_SGID_CFG4_REG + 0x20 * gid_index); - roce_set_field(val, ROCEE_VF_SGID_CFG4_SGID_TYPE_M, - ROCEE_VF_SGID_CFG4_SGID_TYPE_S, sgid_type); - - roce_write(hr_dev, ROCEE_VF_SGID_CFG4_REG + 0x20 * gid_index, val); + ret = hns_roce_config_sgid_table(hr_dev, gid_index, gid, sgid_type); + if (ret) + dev_err(hr_dev->dev, "Configure sgid table failed(%d)!\n", ret); - return 0; + return ret; } static int hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, u8 *addr) { + struct hns_roce_cmq_desc desc; + struct hns_roce_cfg_smac_tb *smac_tb = + (struct hns_roce_cfg_smac_tb *)desc.data; u16 reg_smac_h; u32 reg_smac_l; - u32 val; + + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_SMAC_TB, false); reg_smac_l = *(u32 *)(&addr[0]); - roce_raw_write(reg_smac_l, hr_dev->reg_base + ROCEE_VF_SMAC_CFG0_REG + - 0x08 * phy_port); - val = roce_read(hr_dev, ROCEE_VF_SMAC_CFG1_REG + 0x08 * phy_port); + reg_smac_h = *(u16 *)(&addr[4]); - reg_smac_h = *(u16 *)(&addr[4]); - roce_set_field(val, ROCEE_VF_SMAC_CFG1_VF_SMAC_H_M, - ROCEE_VF_SMAC_CFG1_VF_SMAC_H_S, reg_smac_h); - roce_write(hr_dev, ROCEE_VF_SMAC_CFG1_REG + 0x08 * phy_port, val); + memset(smac_tb, 0, sizeof(*smac_tb)); + roce_set_field(smac_tb->tb_idx_rsv, + CFG_SMAC_TB_IDX_M, + CFG_SMAC_TB_IDX_S, phy_port); + roce_set_field(smac_tb->vf_smac_h_rsv, + CFG_SMAC_TB_VF_SMAC_H_M, + CFG_SMAC_TB_VF_SMAC_H_S, reg_smac_h); + smac_tb->vf_smac_l = reg_smac_l; - return 0; + return hns_roce_cmq_send(hr_dev, &desc, 1); } static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, @@ -1758,6 +2046,8 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, struct hns_roce_v2_cqe *cqe; struct hns_roce_qp *hr_qp; struct hns_roce_wq *wq; + struct ib_qp_attr attr; + int attr_mask; int is_send; u16 wqe_ctr; u32 opcode; @@ -1844,8 +2134,17 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, break; } - /* CQE status error, directly return */ - if (wc->status != IB_WC_SUCCESS) + /* flush cqe if wc status is error, excluding flush error */ + if ((wc->status != IB_WC_SUCCESS) && + (wc->status != IB_WC_WR_FLUSH_ERR)) { + attr_mask = IB_QP_STATE; + attr.qp_state = IB_QPS_ERR; + return hns_roce_v2_modify_qp(&(*cur_qp)->ibqp, + &attr, attr_mask, + (*cur_qp)->state, IB_QPS_ERR); + } + + if (wc->status == IB_WC_WR_FLUSH_ERR) return 0; if (is_send) { @@ -1931,7 +2230,8 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, case HNS_ROCE_V2_OPCODE_RDMA_WRITE_IMM: wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; wc->wc_flags = IB_WC_WITH_IMM; - wc->ex.imm_data = cqe->immtdata; + wc->ex.imm_data = + cpu_to_be32(le32_to_cpu(cqe->immtdata)); break; case HNS_ROCE_V2_OPCODE_SEND: wc->opcode = IB_WC_RECV; @@ -1940,7 +2240,8 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, case HNS_ROCE_V2_OPCODE_SEND_WITH_IMM: wc->opcode = IB_WC_RECV; wc->wc_flags = IB_WC_WITH_IMM; - wc->ex.imm_data = cqe->immtdata; + wc->ex.imm_data = + cpu_to_be32(le32_to_cpu(cqe->immtdata)); break; case HNS_ROCE_V2_OPCODE_SEND_WITH_INV: wc->opcode = IB_WC_RECV; @@ -2273,10 +2574,10 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0); /* No VLAN need to set 0xFFF */ - roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_IDX_M, - V2_QPC_BYTE_24_VLAN_IDX_S, 0xfff); - roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_IDX_M, - V2_QPC_BYTE_24_VLAN_IDX_S, 0); + roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M, + V2_QPC_BYTE_24_VLAN_ID_S, 0xfff); + roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M, + V2_QPC_BYTE_24_VLAN_ID_S, 0); /* * Set some fields in context to zero, Because the default values @@ -2886,21 +3187,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_LP_PKTN_INI_M, V2_QPC_BYTE_56_LP_PKTN_INI_S, 0); - roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_HOP_LIMIT_M, - V2_QPC_BYTE_24_HOP_LIMIT_S, grh->hop_limit); - roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_HOP_LIMIT_M, - V2_QPC_BYTE_24_HOP_LIMIT_S, 0); - - roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M, - V2_QPC_BYTE_28_FL_S, grh->flow_label); - roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_FL_M, - V2_QPC_BYTE_28_FL_S, 0); - - roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, - V2_QPC_BYTE_24_TC_S, grh->traffic_class); - roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, - V2_QPC_BYTE_24_TC_S, 0); - if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_UD) roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M, V2_QPC_BYTE_24_MTU_S, IB_MTU_4096); @@ -2911,9 +3197,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M, V2_QPC_BYTE_24_MTU_S, 0); - memcpy(context->dgid, grh->dgid.raw, sizeof(grh->dgid.raw)); - memset(qpc_mask->dgid, 0, sizeof(grh->dgid.raw)); - roce_set_field(context->byte_84_rq_ci_pi, V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M, V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, hr_qp->rq.head); @@ -2952,12 +3235,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, V2_QPC_BYTE_168_LP_SGEN_INI_M, V2_QPC_BYTE_168_LP_SGEN_INI_S, 0); - roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_SL_M, - V2_QPC_BYTE_28_SL_S, rdma_ah_get_sl(&attr->ah_attr)); - roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_SL_M, - V2_QPC_BYTE_28_SL_S, 0); - hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr); - return 0; } @@ -3135,13 +3412,6 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, V2_QPC_BYTE_28_AT_S, 0); } - roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_SL_M, - V2_QPC_BYTE_28_SL_S, - rdma_ah_get_sl(&attr->ah_attr)); - roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_SL_M, - V2_QPC_BYTE_28_SL_S, 0); - hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr); - roce_set_field(context->byte_172_sq_psn, V2_QPC_BYTE_172_SQ_CUR_PSN_M, V2_QPC_BYTE_172_SQ_CUR_PSN_S, attr->sq_psn); roce_set_field(qpc_mask->byte_172_sq_psn, V2_QPC_BYTE_172_SQ_CUR_PSN_M, @@ -3224,9 +3494,114 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, ; } else { dev_err(dev, "Illegal state for QP!\n"); + ret = -EINVAL; goto out; } + /* When QP state is err, SQ and RQ WQE should be flushed */ + if (new_state == IB_QPS_ERR) { + roce_set_field(context->byte_160_sq_ci_pi, + V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M, + V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S, + hr_qp->sq.head); + roce_set_field(qpc_mask->byte_160_sq_ci_pi, + V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M, + V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S, 0); + roce_set_field(context->byte_84_rq_ci_pi, + V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M, + V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, + hr_qp->rq.head); + roce_set_field(qpc_mask->byte_84_rq_ci_pi, + V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M, + V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, 0); + } + + if (attr_mask & IB_QP_AV) { + const struct ib_global_route *grh = + rdma_ah_read_grh(&attr->ah_attr); + const struct ib_gid_attr *gid_attr = NULL; + u8 src_mac[ETH_ALEN]; + int is_roce_protocol; + u16 vlan = 0xffff; + u8 ib_port; + u8 hr_port; + + ib_port = (attr_mask & IB_QP_PORT) ? attr->port_num : + hr_qp->port + 1; + hr_port = ib_port - 1; + is_roce_protocol = rdma_cap_eth_ah(&hr_dev->ib_dev, ib_port) && + rdma_ah_get_ah_flags(&attr->ah_attr) & IB_AH_GRH; + + if (is_roce_protocol) { + gid_attr = attr->ah_attr.grh.sgid_attr; + vlan = rdma_vlan_dev_vlan_id(gid_attr->ndev); + memcpy(src_mac, gid_attr->ndev->dev_addr, ETH_ALEN); + } + + roce_set_field(context->byte_24_mtu_tc, + V2_QPC_BYTE_24_VLAN_ID_M, + V2_QPC_BYTE_24_VLAN_ID_S, vlan); + roce_set_field(qpc_mask->byte_24_mtu_tc, + V2_QPC_BYTE_24_VLAN_ID_M, + V2_QPC_BYTE_24_VLAN_ID_S, 0); + + if (grh->sgid_index >= hr_dev->caps.gid_table_len[hr_port]) { + dev_err(hr_dev->dev, + "sgid_index(%u) too large. max is %d\n", + grh->sgid_index, + hr_dev->caps.gid_table_len[hr_port]); + ret = -EINVAL; + goto out; + } + + if (attr->ah_attr.type != RDMA_AH_ATTR_TYPE_ROCE) { + dev_err(hr_dev->dev, "ah attr is not RDMA roce type\n"); + ret = -EINVAL; + goto out; + } + + roce_set_field(context->byte_52_udpspn_dmac, + V2_QPC_BYTE_52_UDPSPN_M, V2_QPC_BYTE_52_UDPSPN_S, + (gid_attr->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP) ? + 0 : 0x12b7); + + roce_set_field(qpc_mask->byte_52_udpspn_dmac, + V2_QPC_BYTE_52_UDPSPN_M, + V2_QPC_BYTE_52_UDPSPN_S, 0); + + roce_set_field(context->byte_20_smac_sgid_idx, + V2_QPC_BYTE_20_SGID_IDX_M, + V2_QPC_BYTE_20_SGID_IDX_S, grh->sgid_index); + + roce_set_field(qpc_mask->byte_20_smac_sgid_idx, + V2_QPC_BYTE_20_SGID_IDX_M, + V2_QPC_BYTE_20_SGID_IDX_S, 0); + + roce_set_field(context->byte_24_mtu_tc, + V2_QPC_BYTE_24_HOP_LIMIT_M, + V2_QPC_BYTE_24_HOP_LIMIT_S, grh->hop_limit); + roce_set_field(qpc_mask->byte_24_mtu_tc, + V2_QPC_BYTE_24_HOP_LIMIT_M, + V2_QPC_BYTE_24_HOP_LIMIT_S, 0); + + roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, + V2_QPC_BYTE_24_TC_S, grh->traffic_class); + roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, + V2_QPC_BYTE_24_TC_S, 0); + roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M, + V2_QPC_BYTE_28_FL_S, grh->flow_label); + roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_FL_M, + V2_QPC_BYTE_28_FL_S, 0); + memcpy(context->dgid, grh->dgid.raw, sizeof(grh->dgid.raw)); + memset(qpc_mask->dgid, 0, sizeof(grh->dgid.raw)); + roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_SL_M, + V2_QPC_BYTE_28_SL_S, + rdma_ah_get_sl(&attr->ah_attr)); + roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_SL_M, + V2_QPC_BYTE_28_SL_S, 0); + hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr); + } + if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask); @@ -3497,6 +3872,11 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt); if (is_user) { + if (hr_qp->sq.wqe_cnt && (hr_qp->sdb_en == 1)) + hns_roce_db_unmap_user( + to_hr_ucontext(hr_qp->ibqp.uobject->context), + &hr_qp->sdb); + if (hr_qp->rq.wqe_cnt && (hr_qp->rdb_en == 1)) hns_roce_db_unmap_user( to_hr_ucontext(hr_qp->ibqp.uobject->context), @@ -3579,6 +3959,74 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) return ret; } +static void hns_roce_set_qps_to_err(struct hns_roce_dev *hr_dev, u32 qpn) +{ + struct hns_roce_qp *hr_qp; + struct ib_qp_attr attr; + int attr_mask; + int ret; + + hr_qp = __hns_roce_qp_lookup(hr_dev, qpn); + if (!hr_qp) { + dev_warn(hr_dev->dev, "no hr_qp can be found!\n"); + return; + } + + if (hr_qp->ibqp.uobject) { + if (hr_qp->sdb_en == 1) { + hr_qp->sq.head = *(int *)(hr_qp->sdb.virt_addr); + hr_qp->rq.head = *(int *)(hr_qp->rdb.virt_addr); + } else { + dev_warn(hr_dev->dev, "flush cqe is unsupported in userspace!\n"); + return; + } + } + + attr_mask = IB_QP_STATE; + attr.qp_state = IB_QPS_ERR; + ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, &attr, attr_mask, + hr_qp->state, IB_QPS_ERR); + if (ret) + dev_err(hr_dev->dev, "failed to modify qp %d to err state.\n", + qpn); +} + +static void hns_roce_irq_work_handle(struct work_struct *work) +{ + struct hns_roce_work *irq_work = + container_of(work, struct hns_roce_work, work); + u32 qpn = irq_work->qpn; + + switch (irq_work->event_type) { + case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: + case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: + case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: + hns_roce_set_qps_to_err(irq_work->hr_dev, qpn); + break; + default: + break; + } + + kfree(irq_work); +} + +static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev, + struct hns_roce_eq *eq, u32 qpn) +{ + struct hns_roce_work *irq_work; + + irq_work = kzalloc(sizeof(struct hns_roce_work), GFP_ATOMIC); + if (!irq_work) + return; + + INIT_WORK(&(irq_work->work), hns_roce_irq_work_handle); + irq_work->hr_dev = hr_dev; + irq_work->qpn = qpn; + irq_work->event_type = eq->event_type; + irq_work->sub_type = eq->sub_type; + queue_work(hr_dev->irq_workq, &(irq_work->work)); +} + static void set_eq_cons_index_v2(struct hns_roce_eq *eq) { u32 doorbell[2]; @@ -3681,14 +4129,9 @@ static void hns_roce_v2_local_wq_access_err_handle(struct hns_roce_dev *hr_dev, static void hns_roce_v2_qp_err_handle(struct hns_roce_dev *hr_dev, struct hns_roce_aeqe *aeqe, - int event_type) + int event_type, u32 qpn) { struct device *dev = hr_dev->dev; - u32 qpn; - - qpn = roce_get_field(aeqe->event.qp_event.qp, - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); switch (event_type) { case HNS_ROCE_EVENT_TYPE_COMM_EST: @@ -3715,14 +4158,9 @@ static void hns_roce_v2_qp_err_handle(struct hns_roce_dev *hr_dev, static void hns_roce_v2_cq_err_handle(struct hns_roce_dev *hr_dev, struct hns_roce_aeqe *aeqe, - int event_type) + int event_type, u32 cqn) { struct device *dev = hr_dev->dev; - u32 cqn; - - cqn = roce_get_field(aeqe->event.cq_event.cq, - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); switch (event_type) { case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: @@ -3787,6 +4225,9 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, struct hns_roce_aeqe *aeqe; int aeqe_found = 0; int event_type; + int sub_type; + u32 qpn; + u32 cqn; while ((aeqe = next_aeqe_sw_v2(eq))) { @@ -3798,6 +4239,15 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, event_type = roce_get_field(aeqe->asyn, HNS_ROCE_V2_AEQE_EVENT_TYPE_M, HNS_ROCE_V2_AEQE_EVENT_TYPE_S); + sub_type = roce_get_field(aeqe->asyn, + HNS_ROCE_V2_AEQE_SUB_TYPE_M, + HNS_ROCE_V2_AEQE_SUB_TYPE_S); + qpn = roce_get_field(aeqe->event.qp_event.qp, + HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, + HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); + cqn = roce_get_field(aeqe->event.cq_event.cq, + HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, + HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); switch (event_type) { case HNS_ROCE_EVENT_TYPE_PATH_MIG: @@ -3811,7 +4261,8 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: - hns_roce_v2_qp_err_handle(hr_dev, aeqe, event_type); + hns_roce_v2_qp_err_handle(hr_dev, aeqe, event_type, + qpn); break; case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH: @@ -3820,7 +4271,8 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, break; case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: - hns_roce_v2_cq_err_handle(hr_dev, aeqe, event_type); + hns_roce_v2_cq_err_handle(hr_dev, aeqe, event_type, + cqn); break; case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: dev_warn(dev, "DB overflow.\n"); @@ -3843,6 +4295,8 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, break; }; + eq->event_type = event_type; + eq->sub_type = sub_type; ++eq->cons_index; aeqe_found = 1; @@ -3850,6 +4304,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, dev_warn(dev, "cons_index overflow, set back to 0.\n"); eq->cons_index = 0; } + hns_roce_v2_init_irq_work(hr_dev, eq, qpn); } set_eq_cons_index_v2(eq); @@ -4052,15 +4507,12 @@ static void hns_roce_mhop_free_eq(struct hns_roce_dev *hr_dev, u32 bt_chk_sz; u32 mhop_num; int eqe_alloc; - int ba_num; int i = 0; int j = 0; mhop_num = hr_dev->caps.eqe_hop_num; buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT); bt_chk_sz = 1 << (hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT); - ba_num = (PAGE_ALIGN(eq->entries * eq->eqe_size) + buf_chk_sz - 1) / - buf_chk_sz; /* hop_num = 0 */ if (mhop_num == HNS_ROCE_HOP_NUM_0) { @@ -4669,6 +5121,13 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev) } } + hr_dev->irq_workq = + create_singlethread_workqueue("hns_roce_irq_workqueue"); + if (!hr_dev->irq_workq) { + dev_err(dev, "Create irq workqueue failed!\n"); + goto err_request_irq_fail; + } + return 0; err_request_irq_fail: @@ -4719,12 +5178,17 @@ static void hns_roce_v2_cleanup_eq_table(struct hns_roce_dev *hr_dev) kfree(hr_dev->irq_names[i]); kfree(eq_table->eq); + + flush_workqueue(hr_dev->irq_workq); + destroy_workqueue(hr_dev->irq_workq); } static const struct hns_roce_hw hns_roce_hw_v2 = { .cmq_init = hns_roce_v2_cmq_init, .cmq_exit = hns_roce_v2_cmq_exit, .hw_profile = hns_roce_v2_profile, + .hw_init = hns_roce_v2_init, + .hw_exit = hns_roce_v2_exit, .post_mbox = hns_roce_v2_post_mbox, .chk_mbox = hns_roce_v2_chk_mbox, .set_gid = hns_roce_v2_set_gid, @@ -4749,6 +5213,8 @@ static const struct hns_roce_hw hns_roce_hw_v2 = { static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = { {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA), 0}, {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA_MACSEC), 0}, + {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA), 0}, + {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA_MACSEC), 0}, {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC), 0}, /* required last entry */ {0, } diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index d47675f365c7..14aa308befef 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -112,6 +112,9 @@ (step_idx == 1 && hop_num == 1) || \ (step_idx == 2 && hop_num == 2)) +#define CMD_CSQ_DESC_NUM 1024 +#define CMD_CRQ_DESC_NUM 1024 + enum { NO_ARMED = 0x0, REG_NXT_CEQE = 0x2, @@ -203,6 +206,10 @@ enum hns_roce_opcode_type { HNS_ROCE_OPC_ALLOC_PF_RES = 0x8004, HNS_ROCE_OPC_QUERY_PF_RES = 0x8400, HNS_ROCE_OPC_ALLOC_VF_RES = 0x8401, + HNS_ROCE_OPC_CFG_EXT_LLM = 0x8403, + HNS_ROCE_OPC_CFG_TMOUT_LLM = 0x8404, + HNS_ROCE_OPC_CFG_SGID_TB = 0x8500, + HNS_ROCE_OPC_CFG_SMAC_TB = 0x8501, HNS_ROCE_OPC_CFG_BT_ATTR = 0x8506, }; @@ -447,8 +454,8 @@ struct hns_roce_v2_qp_context { #define V2_QPC_BYTE_24_TC_S 8 #define V2_QPC_BYTE_24_TC_M GENMASK(15, 8) -#define V2_QPC_BYTE_24_VLAN_IDX_S 16 -#define V2_QPC_BYTE_24_VLAN_IDX_M GENMASK(27, 16) +#define V2_QPC_BYTE_24_VLAN_ID_S 16 +#define V2_QPC_BYTE_24_VLAN_ID_M GENMASK(27, 16) #define V2_QPC_BYTE_24_MTU_S 28 #define V2_QPC_BYTE_24_MTU_M GENMASK(31, 28) @@ -768,7 +775,7 @@ struct hns_roce_v2_cqe { __le32 byte_4; union { __le32 rkey; - __be32 immtdata; + __le32 immtdata; }; __le32 byte_12; __le32 byte_16; @@ -926,7 +933,7 @@ struct hns_roce_v2_cq_db { struct hns_roce_v2_ud_send_wqe { __le32 byte_4; __le32 msg_len; - __be32 immtdata; + __le32 immtdata; __le32 byte_16; __le32 byte_20; __le32 byte_24; @@ -1012,7 +1019,7 @@ struct hns_roce_v2_rc_send_wqe { __le32 msg_len; union { __le32 inv_key; - __be32 immtdata; + __le32 immtdata; }; __le32 byte_16; __le32 byte_20; @@ -1061,6 +1068,40 @@ struct hns_roce_query_version { __le32 rsv[5]; }; +struct hns_roce_cfg_llm_a { + __le32 base_addr_l; + __le32 base_addr_h; + __le32 depth_pgsz_init_en; + __le32 head_ba_l; + __le32 head_ba_h_nxtptr; + __le32 head_ptr; +}; + +#define CFG_LLM_QUE_DEPTH_S 0 +#define CFG_LLM_QUE_DEPTH_M GENMASK(12, 0) + +#define CFG_LLM_QUE_PGSZ_S 16 +#define CFG_LLM_QUE_PGSZ_M GENMASK(19, 16) + +#define CFG_LLM_INIT_EN_S 20 +#define CFG_LLM_INIT_EN_M GENMASK(20, 20) + +#define CFG_LLM_HEAD_PTR_S 0 +#define CFG_LLM_HEAD_PTR_M GENMASK(11, 0) + +struct hns_roce_cfg_llm_b { + __le32 tail_ba_l; + __le32 tail_ba_h; + __le32 tail_ptr; + __le32 rsv[3]; +}; + +#define CFG_LLM_TAIL_BA_H_S 0 +#define CFG_LLM_TAIL_BA_H_M GENMASK(19, 0) + +#define CFG_LLM_TAIL_PTR_S 0 +#define CFG_LLM_TAIL_PTR_M GENMASK(11, 0) + struct hns_roce_cfg_global_param { __le32 time_cfg_udp_port; __le32 rsv[5]; @@ -1072,7 +1113,7 @@ struct hns_roce_cfg_global_param { #define CFG_GLOBAL_PARAM_DATA_0_ROCEE_UDP_PORT_S 16 #define CFG_GLOBAL_PARAM_DATA_0_ROCEE_UDP_PORT_M GENMASK(31, 16) -struct hns_roce_pf_res { +struct hns_roce_pf_res_a { __le32 rsv; __le32 qpc_bt_idx_num; __le32 srqc_bt_idx_num; @@ -1111,6 +1152,32 @@ struct hns_roce_pf_res { #define PF_RES_DATA_5_PF_EQC_BT_NUM_S 16 #define PF_RES_DATA_5_PF_EQC_BT_NUM_M GENMASK(25, 16) +struct hns_roce_pf_res_b { + __le32 rsv0; + __le32 smac_idx_num; + __le32 sgid_idx_num; + __le32 qid_idx_sl_num; + __le32 rsv[2]; +}; + +#define PF_RES_DATA_1_PF_SMAC_IDX_S 0 +#define PF_RES_DATA_1_PF_SMAC_IDX_M GENMASK(7, 0) + +#define PF_RES_DATA_1_PF_SMAC_NUM_S 8 +#define PF_RES_DATA_1_PF_SMAC_NUM_M GENMASK(16, 8) + +#define PF_RES_DATA_2_PF_SGID_IDX_S 0 +#define PF_RES_DATA_2_PF_SGID_IDX_M GENMASK(7, 0) + +#define PF_RES_DATA_2_PF_SGID_NUM_S 8 +#define PF_RES_DATA_2_PF_SGID_NUM_M GENMASK(16, 8) + +#define PF_RES_DATA_3_PF_QID_IDX_S 0 +#define PF_RES_DATA_3_PF_QID_IDX_M GENMASK(9, 0) + +#define PF_RES_DATA_3_PF_SL_NUM_S 16 +#define PF_RES_DATA_3_PF_SL_NUM_M GENMASK(26, 16) + struct hns_roce_vf_res_a { __le32 vf_id; __le32 vf_qpc_bt_idx_num; @@ -1179,13 +1246,6 @@ struct hns_roce_vf_res_b { #define VF_RES_B_DATA_3_VF_SL_NUM_S 16 #define VF_RES_B_DATA_3_VF_SL_NUM_M GENMASK(19, 16) -/* Reg field definition */ -#define ROCEE_VF_SMAC_CFG1_VF_SMAC_H_S 0 -#define ROCEE_VF_SMAC_CFG1_VF_SMAC_H_M GENMASK(15, 0) - -#define ROCEE_VF_SGID_CFG4_SGID_TYPE_S 0 -#define ROCEE_VF_SGID_CFG4_SGID_TYPE_M GENMASK(1, 0) - struct hns_roce_cfg_bt_attr { __le32 vf_qpc_cfg; __le32 vf_srqc_cfg; @@ -1230,6 +1290,32 @@ struct hns_roce_cfg_bt_attr { #define CFG_BT_ATTR_DATA_3_VF_MPT_HOPNUM_S 8 #define CFG_BT_ATTR_DATA_3_VF_MPT_HOPNUM_M GENMASK(9, 8) +struct hns_roce_cfg_sgid_tb { + __le32 table_idx_rsv; + __le32 vf_sgid_l; + __le32 vf_sgid_ml; + __le32 vf_sgid_mh; + __le32 vf_sgid_h; + __le32 vf_sgid_type_rsv; +}; +#define CFG_SGID_TB_TABLE_IDX_S 0 +#define CFG_SGID_TB_TABLE_IDX_M GENMASK(7, 0) + +#define CFG_SGID_TB_VF_SGID_TYPE_S 0 +#define CFG_SGID_TB_VF_SGID_TYPE_M GENMASK(1, 0) + +struct hns_roce_cfg_smac_tb { + __le32 tb_idx_rsv; + __le32 vf_smac_l; + __le32 vf_smac_h_rsv; + __le32 rsv[3]; +}; +#define CFG_SMAC_TB_IDX_S 0 +#define CFG_SMAC_TB_IDX_M GENMASK(7, 0) + +#define CFG_SMAC_TB_VF_SMAC_H_S 0 +#define CFG_SMAC_TB_VF_SMAC_H_M GENMASK(15, 0) + struct hns_roce_cmq_desc { __le16 opcode; __le16 flag; @@ -1276,8 +1362,32 @@ struct hns_roce_v2_cmq { u16 last_status; }; +enum hns_roce_link_table_type { + TSQ_LINK_TABLE, + TPQ_LINK_TABLE, +}; + +struct hns_roce_link_table { + struct hns_roce_buf_list table; + struct hns_roce_buf_list *pg_list; + u32 npages; + u32 pg_sz; +}; + +struct hns_roce_link_table_entry { + u32 blk_ba0; + u32 blk_ba1_nxt_ptr; +}; +#define HNS_ROCE_LINK_TABLE_BA1_S 0 +#define HNS_ROCE_LINK_TABLE_BA1_M GENMASK(19, 0) + +#define HNS_ROCE_LINK_TABLE_NXT_PTR_S 20 +#define HNS_ROCE_LINK_TABLE_NXT_PTR_M GENMASK(31, 20) + struct hns_roce_v2_priv { struct hns_roce_v2_cmq cmq; + struct hns_roce_link_table tsq; + struct hns_roce_link_table tpq; }; struct hns_roce_eq_context { diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 21b901cfa2d6..c5cae9a38c04 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -74,8 +74,7 @@ static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr) return hr_dev->hw->set_mac(hr_dev, phy_port, addr); } -static int hns_roce_add_gid(const union ib_gid *gid, - const struct ib_gid_attr *attr, void **context) +static int hns_roce_add_gid(const struct ib_gid_attr *attr, void **context) { struct hns_roce_dev *hr_dev = to_hr_dev(attr->device); u8 port = attr->port_num - 1; @@ -87,8 +86,7 @@ static int hns_roce_add_gid(const union ib_gid *gid, spin_lock_irqsave(&hr_dev->iboe.lock, flags); - ret = hr_dev->hw->set_gid(hr_dev, port, attr->index, - (union ib_gid *)gid, attr); + ret = hr_dev->hw->set_gid(hr_dev, port, attr->index, &attr->gid, attr); spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); @@ -208,7 +206,8 @@ static int hns_roce_query_device(struct ib_device *ib_dev, props->max_qp_wr = hr_dev->caps.max_wqes; props->device_cap_flags = IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_RC_RNR_NAK_GEN; - props->max_sge = max(hr_dev->caps.max_sq_sg, hr_dev->caps.max_rq_sg); + props->max_send_sge = hr_dev->caps.max_sq_sg; + props->max_recv_sge = hr_dev->caps.max_rq_sg; props->max_sge_rd = 1; props->max_cq = hr_dev->caps.num_cqs; props->max_cqe = hr_dev->caps.max_cqes; @@ -535,6 +534,9 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) (1ULL << IB_USER_VERBS_CMD_QUERY_QP) | (1ULL << IB_USER_VERBS_CMD_DESTROY_QP); + ib_dev->uverbs_ex_cmd_mask |= + (1ULL << IB_USER_VERBS_EX_CMD_MODIFY_CQ); + /* HCA||device||port */ ib_dev->modify_device = hns_roce_modify_device; ib_dev->query_device = hns_roce_query_device; @@ -887,8 +889,7 @@ error_failed_cmd_init: error_failed_cmq_init: if (hr_dev->hw->reset) { - ret = hr_dev->hw->reset(hr_dev, false); - if (ret) + if (hr_dev->hw->reset(hr_dev, false)) dev_err(dev, "Dereset RoCE engine failed!\n"); } diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index b9f2c871ff9a..e11c149da04d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -37,7 +37,7 @@ static int hns_roce_pd_alloc(struct hns_roce_dev *hr_dev, unsigned long *pdn) { - return hns_roce_bitmap_alloc(&hr_dev->pd_bitmap, pdn); + return hns_roce_bitmap_alloc(&hr_dev->pd_bitmap, pdn) ? -ENOMEM : 0; } static void hns_roce_pd_free(struct hns_roce_dev *hr_dev, unsigned long pdn) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index baaf906f7c2e..efb7e961ca65 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -115,7 +115,10 @@ static int hns_roce_reserve_range_qp(struct hns_roce_dev *hr_dev, int cnt, { struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; - return hns_roce_bitmap_alloc_range(&qp_table->bitmap, cnt, align, base); + return hns_roce_bitmap_alloc_range(&qp_table->bitmap, cnt, align, + base) ? + -ENOMEM : + 0; } enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state) @@ -489,6 +492,14 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev, return 0; } +static int hns_roce_qp_has_sq(struct ib_qp_init_attr *attr) +{ + if (attr->qp_type == IB_QPT_XRC_TGT) + return 0; + + return 1; +} + static int hns_roce_qp_has_rq(struct ib_qp_init_attr *attr) { if (attr->qp_type == IB_QPT_XRC_INI || @@ -613,6 +624,23 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, goto err_mtt; } + if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SQ_RECORD_DB) && + (udata->inlen >= sizeof(ucmd)) && + (udata->outlen >= sizeof(resp)) && + hns_roce_qp_has_sq(init_attr)) { + ret = hns_roce_db_map_user( + to_hr_ucontext(ib_pd->uobject->context), + ucmd.sdb_addr, &hr_qp->sdb); + if (ret) { + dev_err(dev, "sq record doorbell map failed!\n"); + goto err_mtt; + } + + /* indicate kernel supports sq record db */ + resp.cap_flags |= HNS_ROCE_SUPPORT_SQ_RECORD_DB; + hr_qp->sdb_en = 1; + } + if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) && (udata->outlen >= sizeof(resp)) && hns_roce_qp_has_rq(init_attr)) { @@ -621,7 +649,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, ucmd.db_addr, &hr_qp->rdb); if (ret) { dev_err(dev, "rq record doorbell map failed!\n"); - goto err_mtt; + goto err_sq_dbmap; } } } else { @@ -734,7 +762,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, if (ib_pd->uobject && (udata->outlen >= sizeof(resp)) && (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)) { - /* indicate kernel supports record db */ + /* indicate kernel supports rq record db */ resp.cap_flags |= HNS_ROCE_SUPPORT_RQ_RECORD_DB; ret = ib_copy_to_udata(udata, &resp, sizeof(resp)); if (ret) @@ -770,6 +798,16 @@ err_wrid: kfree(hr_qp->rq.wrid); } +err_sq_dbmap: + if (ib_pd->uobject) + if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SQ_RECORD_DB) && + (udata->inlen >= sizeof(ucmd)) && + (udata->outlen >= sizeof(resp)) && + hns_roce_qp_has_sq(init_attr)) + hns_roce_db_unmap_user( + to_hr_ucontext(ib_pd->uobject->context), + &hr_qp->sdb); + err_mtt: hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt); @@ -903,6 +941,17 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; + if (ibqp->uobject && + (attr_mask & IB_QP_STATE) && new_state == IB_QPS_ERR) { + if (hr_qp->sdb_en == 1) { + hr_qp->sq.head = *(int *)(hr_qp->sdb.virt_addr); + hr_qp->rq.head = *(int *)(hr_qp->rdb.virt_addr); + } else { + dev_warn(dev, "flush cqe is not supported in userspace!\n"); + goto out; + } + } + if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask, IB_LINK_LAYER_ETHERNET)) { dev_err(dev, "ib_modify_qp_is_ok failed\n"); diff --git a/drivers/infiniband/hw/i40iw/Kconfig b/drivers/infiniband/hw/i40iw/Kconfig index 2962979c06e9..d867ef1ac72a 100644 --- a/drivers/infiniband/hw/i40iw/Kconfig +++ b/drivers/infiniband/hw/i40iw/Kconfig @@ -1,6 +1,7 @@ config INFINIBAND_I40IW tristate "Intel(R) Ethernet X722 iWARP Driver" depends on INET && I40E + depends on IPV6 || !IPV6 depends on PCI select GENERIC_ALLOCATOR ---help--- diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 7b2655128b9f..423818a7d333 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -57,6 +57,7 @@ #include <net/addrconf.h> #include <net/ip6_route.h> #include <net/ip_fib.h> +#include <net/secure_seq.h> #include <net/tcp.h> #include <asm/checksum.h> @@ -2164,7 +2165,6 @@ static struct i40iw_cm_node *i40iw_make_cm_node( struct i40iw_cm_listener *listener) { struct i40iw_cm_node *cm_node; - struct timespec ts; int oldarpindex; int arpindex; struct net_device *netdev = iwdev->netdev; @@ -2214,10 +2214,26 @@ static struct i40iw_cm_node *i40iw_make_cm_node( cm_node->tcp_cntxt.rcv_wscale = I40IW_CM_DEFAULT_RCV_WND_SCALE; cm_node->tcp_cntxt.rcv_wnd = I40IW_CM_DEFAULT_RCV_WND_SCALED >> I40IW_CM_DEFAULT_RCV_WND_SCALE; - ts = current_kernel_time(); - cm_node->tcp_cntxt.loc_seq_num = ts.tv_nsec; - cm_node->tcp_cntxt.mss = (cm_node->ipv4) ? (iwdev->vsi.mtu - I40IW_MTU_TO_MSS_IPV4) : - (iwdev->vsi.mtu - I40IW_MTU_TO_MSS_IPV6); + if (cm_node->ipv4) { + cm_node->tcp_cntxt.loc_seq_num = secure_tcp_seq(htonl(cm_node->loc_addr[0]), + htonl(cm_node->rem_addr[0]), + htons(cm_node->loc_port), + htons(cm_node->rem_port)); + cm_node->tcp_cntxt.mss = iwdev->vsi.mtu - I40IW_MTU_TO_MSS_IPV4; + } else if (IS_ENABLED(CONFIG_IPV6)) { + __be32 loc[4] = { + htonl(cm_node->loc_addr[0]), htonl(cm_node->loc_addr[1]), + htonl(cm_node->loc_addr[2]), htonl(cm_node->loc_addr[3]) + }; + __be32 rem[4] = { + htonl(cm_node->rem_addr[0]), htonl(cm_node->rem_addr[1]), + htonl(cm_node->rem_addr[2]), htonl(cm_node->rem_addr[3]) + }; + cm_node->tcp_cntxt.loc_seq_num = secure_tcpv6_seq(loc, rem, + htons(cm_node->loc_port), + htons(cm_node->rem_port)); + cm_node->tcp_cntxt.mss = iwdev->vsi.mtu - I40IW_MTU_TO_MSS_IPV6; + } cm_node->iwdev = iwdev; cm_node->dev = &iwdev->sc_dev; diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c index 2836c5420d60..55a1fbf0e670 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_hw.c +++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c @@ -435,45 +435,24 @@ void i40iw_process_aeq(struct i40iw_device *iwdev) } /** - * i40iw_manage_apbvt - add or delete tcp port + * i40iw_cqp_manage_abvpt_cmd - send cqp command manage abpvt * @iwdev: iwarp device * @accel_local_port: port for apbvt * @add_port: add or delete port */ -int i40iw_manage_apbvt(struct i40iw_device *iwdev, u16 accel_local_port, bool add_port) +static enum i40iw_status_code +i40iw_cqp_manage_abvpt_cmd(struct i40iw_device *iwdev, + u16 accel_local_port, + bool add_port) { struct i40iw_apbvt_info *info; struct i40iw_cqp_request *cqp_request; struct cqp_commands_info *cqp_info; - unsigned long flags; - struct i40iw_cm_core *cm_core = &iwdev->cm_core; - enum i40iw_status_code status = 0; - bool in_use; - - /* apbvt_lock is held across CQP delete APBVT OP (non-waiting) to - * protect against race where add APBVT CQP can race ahead of the delete - * APBVT for same port. - */ - spin_lock_irqsave(&cm_core->apbvt_lock, flags); - - if (!add_port) { - in_use = i40iw_port_in_use(cm_core, accel_local_port); - if (in_use) - goto exit; - clear_bit(accel_local_port, cm_core->ports_in_use); - } else { - in_use = test_and_set_bit(accel_local_port, - cm_core->ports_in_use); - spin_unlock_irqrestore(&cm_core->apbvt_lock, flags); - if (in_use) - return 0; - } + enum i40iw_status_code status; cqp_request = i40iw_get_cqp_request(&iwdev->cqp, add_port); - if (!cqp_request) { - status = -ENOMEM; - goto exit; - } + if (!cqp_request) + return I40IW_ERR_NO_MEMORY; cqp_info = &cqp_request->info; info = &cqp_info->in.u.manage_apbvt_entry.info; @@ -489,14 +468,54 @@ int i40iw_manage_apbvt(struct i40iw_device *iwdev, u16 accel_local_port, bool ad status = i40iw_handle_cqp_op(iwdev, cqp_request); if (status) i40iw_pr_err("CQP-OP Manage APBVT entry fail"); -exit: - if (!add_port) - spin_unlock_irqrestore(&cm_core->apbvt_lock, flags); return status; } /** + * i40iw_manage_apbvt - add or delete tcp port + * @iwdev: iwarp device + * @accel_local_port: port for apbvt + * @add_port: add or delete port + */ +enum i40iw_status_code i40iw_manage_apbvt(struct i40iw_device *iwdev, + u16 accel_local_port, + bool add_port) +{ + struct i40iw_cm_core *cm_core = &iwdev->cm_core; + enum i40iw_status_code status; + unsigned long flags; + bool in_use; + + /* apbvt_lock is held across CQP delete APBVT OP (non-waiting) to + * protect against race where add APBVT CQP can race ahead of the delete + * APBVT for same port. + */ + if (add_port) { + spin_lock_irqsave(&cm_core->apbvt_lock, flags); + in_use = __test_and_set_bit(accel_local_port, + cm_core->ports_in_use); + spin_unlock_irqrestore(&cm_core->apbvt_lock, flags); + if (in_use) + return 0; + return i40iw_cqp_manage_abvpt_cmd(iwdev, accel_local_port, + true); + } else { + spin_lock_irqsave(&cm_core->apbvt_lock, flags); + in_use = i40iw_port_in_use(cm_core, accel_local_port); + if (in_use) { + spin_unlock_irqrestore(&cm_core->apbvt_lock, flags); + return 0; + } + __clear_bit(accel_local_port, cm_core->ports_in_use); + status = i40iw_cqp_manage_abvpt_cmd(iwdev, accel_local_port, + false); + spin_unlock_irqrestore(&cm_core->apbvt_lock, flags); + return status; + } +} + +/** * i40iw_manage_arp_cache - manage hw arp cache * @iwdev: iwarp device * @mac_addr: mac address ptr diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 68679ad4c6da..e2e6c74a7452 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -71,7 +71,8 @@ static int i40iw_query_device(struct ib_device *ibdev, props->max_mr_size = I40IW_MAX_OUTBOUND_MESSAGE_SIZE; props->max_qp = iwdev->max_qp - iwdev->used_qps; props->max_qp_wr = I40IW_MAX_QP_WRS; - props->max_sge = I40IW_MAX_WQ_FRAGMENT_COUNT; + props->max_send_sge = I40IW_MAX_WQ_FRAGMENT_COUNT; + props->max_recv_sge = I40IW_MAX_WQ_FRAGMENT_COUNT; props->max_cq = iwdev->max_cq - iwdev->used_cqs; props->max_cqe = iwdev->max_cqe; props->max_mr = iwdev->max_mr - iwdev->used_mrs; @@ -1409,6 +1410,7 @@ static void i40iw_set_hugetlb_values(u64 addr, struct i40iw_mr *iwmr) struct vm_area_struct *vma; struct hstate *h; + down_read(¤t->mm->mmap_sem); vma = find_vma(current->mm, addr); if (vma && is_vm_hugetlb_page(vma)) { h = hstate_vma(vma); @@ -1417,6 +1419,7 @@ static void i40iw_set_hugetlb_values(u64 addr, struct i40iw_mr *iwmr) iwmr->page_msk = huge_page_mask(h); } } + up_read(¤t->mm->mmap_sem); } /** @@ -2198,8 +2201,8 @@ static void i40iw_copy_sg_list(struct i40iw_sge *sg_list, struct ib_sge *sgl, in * @bad_wr: return of bad wr if err */ static int i40iw_post_send(struct ib_qp *ibqp, - struct ib_send_wr *ib_wr, - struct ib_send_wr **bad_wr) + const struct ib_send_wr *ib_wr, + const struct ib_send_wr **bad_wr) { struct i40iw_qp *iwqp; struct i40iw_qp_uk *ukqp; @@ -2374,9 +2377,8 @@ out: * @ib_wr: work request for receive * @bad_wr: bad wr caused an error */ -static int i40iw_post_recv(struct ib_qp *ibqp, - struct ib_recv_wr *ib_wr, - struct ib_recv_wr **bad_wr) +static int i40iw_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *ib_wr, + const struct ib_recv_wr **bad_wr) { struct i40iw_qp *iwqp; struct i40iw_qp_uk *ukqp; @@ -2701,21 +2703,6 @@ static int i40iw_query_gid(struct ib_device *ibdev, } /** - * i40iw_modify_port Modify port properties - * @ibdev: device pointer from stack - * @port: port number - * @port_modify_mask: mask for port modifications - * @props: port properties - */ -static int i40iw_modify_port(struct ib_device *ibdev, - u8 port, - int port_modify_mask, - struct ib_port_modify *props) -{ - return -ENOSYS; -} - -/** * i40iw_query_pkey - Query partition key * @ibdev: device pointer from stack * @port: port number @@ -2732,28 +2719,6 @@ static int i40iw_query_pkey(struct ib_device *ibdev, } /** - * i40iw_create_ah - create address handle - * @ibpd: ptr of pd - * @ah_attr: address handle attributes - */ -static struct ib_ah *i40iw_create_ah(struct ib_pd *ibpd, - struct rdma_ah_attr *attr, - struct ib_udata *udata) - -{ - return ERR_PTR(-ENOSYS); -} - -/** - * i40iw_destroy_ah - Destroy address handle - * @ah: pointer to address handle - */ -static int i40iw_destroy_ah(struct ib_ah *ah) -{ - return -ENOSYS; -} - -/** * i40iw_get_vector_affinity - report IRQ affinity mask * @ibdev: IB device * @comp_vector: completion vector index @@ -2820,7 +2785,6 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev iwibdev->ibdev.num_comp_vectors = iwdev->ceqs_count; iwibdev->ibdev.dev.parent = &pcidev->dev; iwibdev->ibdev.query_port = i40iw_query_port; - iwibdev->ibdev.modify_port = i40iw_modify_port; iwibdev->ibdev.query_pkey = i40iw_query_pkey; iwibdev->ibdev.query_gid = i40iw_query_gid; iwibdev->ibdev.alloc_ucontext = i40iw_alloc_ucontext; @@ -2840,8 +2804,6 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev iwibdev->ibdev.alloc_hw_stats = i40iw_alloc_hw_stats; iwibdev->ibdev.get_hw_stats = i40iw_get_hw_stats; iwibdev->ibdev.query_device = i40iw_query_device; - iwibdev->ibdev.create_ah = i40iw_create_ah; - iwibdev->ibdev.destroy_ah = i40iw_destroy_ah; iwibdev->ibdev.drain_sq = i40iw_drain_sq; iwibdev->ibdev.drain_rq = i40iw_drain_rq; iwibdev->ibdev.alloc_mr = i40iw_alloc_mr; diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 9345d5b546d1..e9e3a6f390db 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -82,12 +82,11 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct mlx4_ib_ah *ah) { struct mlx4_ib_dev *ibdev = to_mdev(pd->device); + const struct ib_gid_attr *gid_attr; struct mlx4_dev *dev = ibdev->dev; int is_mcast = 0; struct in6_addr in6; u16 vlan_tag = 0xffff; - union ib_gid sgid; - struct ib_gid_attr gid_attr; const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); int ret; @@ -96,25 +95,30 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, is_mcast = 1; memcpy(ah->av.eth.mac, ah_attr->roce.dmac, ETH_ALEN); - ret = ib_get_cached_gid(pd->device, rdma_ah_get_port_num(ah_attr), - grh->sgid_index, &sgid, &gid_attr); - if (ret) - return ERR_PTR(ret); eth_zero_addr(ah->av.eth.s_mac); - if (is_vlan_dev(gid_attr.ndev)) - vlan_tag = vlan_dev_vlan_id(gid_attr.ndev); - memcpy(ah->av.eth.s_mac, gid_attr.ndev->dev_addr, ETH_ALEN); - dev_put(gid_attr.ndev); + + /* + * If sgid_attr is NULL we are being called by mlx4_ib_create_ah_slave + * and we are directly creating an AV for a slave's gid_index. + */ + gid_attr = ah_attr->grh.sgid_attr; + if (gid_attr) { + if (is_vlan_dev(gid_attr->ndev)) + vlan_tag = vlan_dev_vlan_id(gid_attr->ndev); + memcpy(ah->av.eth.s_mac, gid_attr->ndev->dev_addr, ETH_ALEN); + ret = mlx4_ib_gid_index_to_real_index(ibdev, gid_attr); + if (ret < 0) + return ERR_PTR(ret); + ah->av.eth.gid_index = ret; + } else { + /* mlx4_ib_create_ah_slave fills in the s_mac and the vlan */ + ah->av.eth.gid_index = ah_attr->grh.sgid_index; + } + if (vlan_tag < 0x1000) vlan_tag |= (rdma_ah_get_sl(ah_attr) & 7) << 13; ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (rdma_ah_get_port_num(ah_attr) << 24)); - ret = mlx4_ib_gid_index_to_real_index(ibdev, - rdma_ah_get_port_num(ah_attr), - grh->sgid_index); - if (ret < 0) - return ERR_PTR(ret); - ah->av.eth.gid_index = ret; ah->av.eth.vlan = cpu_to_be16(vlan_tag); ah->av.eth.hop_limit = grh->hop_limit; if (rdma_ah_get_static_rate(ah_attr)) { @@ -173,6 +177,40 @@ struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, return create_ib_ah(pd, ah_attr, ah); /* never fails */ } +/* AH's created via this call must be free'd by mlx4_ib_destroy_ah. */ +struct ib_ah *mlx4_ib_create_ah_slave(struct ib_pd *pd, + struct rdma_ah_attr *ah_attr, + int slave_sgid_index, u8 *s_mac, + u16 vlan_tag) +{ + struct rdma_ah_attr slave_attr = *ah_attr; + struct mlx4_ib_ah *mah; + struct ib_ah *ah; + + slave_attr.grh.sgid_attr = NULL; + slave_attr.grh.sgid_index = slave_sgid_index; + ah = mlx4_ib_create_ah(pd, &slave_attr, NULL); + if (IS_ERR(ah)) + return ah; + + ah->device = pd->device; + ah->pd = pd; + ah->type = ah_attr->type; + mah = to_mah(ah); + + /* get rid of force-loopback bit */ + mah->av.ib.port_pd &= cpu_to_be32(0x7FFFFFFF); + + if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) + memcpy(mah->av.eth.s_mac, s_mac, 6); + + if (vlan_tag < 0x1000) + vlan_tag |= (rdma_ah_get_sl(ah_attr) & 7) << 13; + mah->av.eth.vlan = cpu_to_be16(vlan_tag); + + return ah; +} + int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) { struct mlx4_ib_ah *ah = to_mah(ibah); diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 90a3e2642c2e..e5466d786bb1 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -506,7 +506,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, { struct ib_sge list; struct ib_ud_wr wr; - struct ib_send_wr *bad_wr; + const struct ib_send_wr *bad_wr; struct mlx4_ib_demux_pv_ctx *tun_ctx; struct mlx4_ib_demux_pv_qp *tun_qp; struct mlx4_rcv_tunnel_mad *tun_mad; @@ -1310,7 +1310,8 @@ static int mlx4_ib_post_pv_qp_buf(struct mlx4_ib_demux_pv_ctx *ctx, int index) { struct ib_sge sg_list; - struct ib_recv_wr recv_wr, *bad_recv_wr; + struct ib_recv_wr recv_wr; + const struct ib_recv_wr *bad_recv_wr; int size; size = (tun_qp->qp->qp_type == IB_QPT_UD) ? @@ -1361,19 +1362,16 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, { struct ib_sge list; struct ib_ud_wr wr; - struct ib_send_wr *bad_wr; + const struct ib_send_wr *bad_wr; struct mlx4_ib_demux_pv_ctx *sqp_ctx; struct mlx4_ib_demux_pv_qp *sqp; struct mlx4_mad_snd_buf *sqp_mad; struct ib_ah *ah; struct ib_qp *send_qp = NULL; - struct ib_global_route *grh; unsigned wire_tx_ix = 0; int ret = 0; u16 wire_pkey_ix; int src_qpnum; - u8 sgid_index; - sqp_ctx = dev->sriov.sqps[port-1]; @@ -1394,16 +1392,11 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, send_qp = sqp->qp; /* create ah */ - grh = rdma_ah_retrieve_grh(attr); - sgid_index = grh->sgid_index; - grh->sgid_index = 0; - ah = rdma_create_ah(sqp_ctx->pd, attr); + ah = mlx4_ib_create_ah_slave(sqp_ctx->pd, attr, + rdma_ah_retrieve_grh(attr)->sgid_index, + s_mac, vlan_id); if (IS_ERR(ah)) return -ENOMEM; - grh->sgid_index = sgid_index; - to_mah(ah)->av.ib.gid_index = sgid_index; - /* get rid of force-loopback bit */ - to_mah(ah)->av.ib.port_pd &= cpu_to_be32(0x7FFFFFFF); spin_lock(&sqp->tx_lock); if (sqp->tx_ix_head - sqp->tx_ix_tail >= (MLX4_NUM_TUNNEL_BUFS - 1)) @@ -1445,12 +1438,6 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, wr.wr.num_sge = 1; wr.wr.opcode = IB_WR_SEND; wr.wr.send_flags = IB_SEND_SIGNALED; - if (s_mac) - memcpy(to_mah(ah)->av.eth.s_mac, s_mac, 6); - if (vlan_id < 0x1000) - vlan_id |= (rdma_ah_get_sl(attr) & 7) << 13; - to_mah(ah)->av.eth.vlan = cpu_to_be16(vlan_id); - ret = ib_post_send(send_qp, &wr.wr, &bad_wr); if (!ret) @@ -1461,7 +1448,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, spin_unlock(&sqp->tx_lock); sqp->tx_ring[wire_tx_ix].ah = NULL; out: - rdma_destroy_ah(ah); + mlx4_ib_destroy_ah(ah); return ret; } diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 4ec519afc45b..ca0f1ee26091 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -246,9 +246,7 @@ static int mlx4_ib_update_gids(struct gid_entry *gids, return mlx4_ib_update_gids_v1(gids, ibdev, port_num); } -static int mlx4_ib_add_gid(const union ib_gid *gid, - const struct ib_gid_attr *attr, - void **context) +static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context) { struct mlx4_ib_dev *ibdev = to_mdev(attr->device); struct mlx4_ib_iboe *iboe = &ibdev->iboe; @@ -271,8 +269,9 @@ static int mlx4_ib_add_gid(const union ib_gid *gid, port_gid_table = &iboe->gids[attr->port_num - 1]; spin_lock_bh(&iboe->lock); for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) { - if (!memcmp(&port_gid_table->gids[i].gid, gid, sizeof(*gid)) && - (port_gid_table->gids[i].gid_type == attr->gid_type)) { + if (!memcmp(&port_gid_table->gids[i].gid, + &attr->gid, sizeof(attr->gid)) && + port_gid_table->gids[i].gid_type == attr->gid_type) { found = i; break; } @@ -289,7 +288,8 @@ static int mlx4_ib_add_gid(const union ib_gid *gid, ret = -ENOMEM; } else { *context = port_gid_table->gids[free].ctx; - memcpy(&port_gid_table->gids[free].gid, gid, sizeof(*gid)); + memcpy(&port_gid_table->gids[free].gid, + &attr->gid, sizeof(attr->gid)); port_gid_table->gids[free].gid_type = attr->gid_type; port_gid_table->gids[free].ctx->real_index = free; port_gid_table->gids[free].ctx->refcount = 1; @@ -380,17 +380,15 @@ static int mlx4_ib_del_gid(const struct ib_gid_attr *attr, void **context) } int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev, - u8 port_num, int index) + const struct ib_gid_attr *attr) { struct mlx4_ib_iboe *iboe = &ibdev->iboe; struct gid_cache_context *ctx = NULL; - union ib_gid gid; struct mlx4_port_gid_table *port_gid_table; int real_index = -EINVAL; int i; - int ret; unsigned long flags; - struct ib_gid_attr attr; + u8 port_num = attr->port_num; if (port_num > MLX4_MAX_PORTS) return -EINVAL; @@ -399,21 +397,15 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev, port_num = 1; if (!rdma_cap_roce_gid_table(&ibdev->ib_dev, port_num)) - return index; - - ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid, &attr); - if (ret) - return ret; - - if (attr.ndev) - dev_put(attr.ndev); + return attr->index; spin_lock_irqsave(&iboe->lock, flags); port_gid_table = &iboe->gids[port_num - 1]; for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) - if (!memcmp(&port_gid_table->gids[i].gid, &gid, sizeof(gid)) && - attr.gid_type == port_gid_table->gids[i].gid_type) { + if (!memcmp(&port_gid_table->gids[i].gid, + &attr->gid, sizeof(attr->gid)) && + attr->gid_type == port_gid_table->gids[i].gid_type) { ctx = port_gid_table->gids[i].ctx; break; } @@ -525,8 +517,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->page_size_cap = dev->dev->caps.page_size_cap; props->max_qp = dev->dev->quotas.qp; props->max_qp_wr = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE; - props->max_sge = min(dev->dev->caps.max_sq_sg, - dev->dev->caps.max_rq_sg); + props->max_send_sge = dev->dev->caps.max_sq_sg; + props->max_recv_sge = dev->dev->caps.max_rq_sg; props->max_sge_rd = MLX4_MAX_SGE_RD; props->max_cq = dev->dev->quotas.cq; props->max_cqe = dev->dev->caps.max_cqes; @@ -770,7 +762,8 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port, IB_WIDTH_4X : IB_WIDTH_1X; props->active_speed = (((u8 *)mailbox->buf)[5] == 0x20 /*56Gb*/) ? IB_SPEED_FDR : IB_SPEED_QDR; - props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_IP_BASED_GIDS; + props->port_cap_flags = IB_PORT_CM_SUP; + props->ip_gids = true; props->gid_tbl_len = mdev->dev->caps.gid_table_len[port]; props->max_msg_sz = mdev->dev->caps.max_msg_sz; props->pkey_tbl_len = 1; @@ -2709,6 +2702,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.modify_qp = mlx4_ib_modify_qp; ibdev->ib_dev.query_qp = mlx4_ib_query_qp; ibdev->ib_dev.destroy_qp = mlx4_ib_destroy_qp; + ibdev->ib_dev.drain_sq = mlx4_ib_drain_sq; + ibdev->ib_dev.drain_rq = mlx4_ib_drain_rq; ibdev->ib_dev.post_send = mlx4_ib_post_send; ibdev->ib_dev.post_recv = mlx4_ib_post_recv; ibdev->ib_dev.create_cq = mlx4_ib_create_cq; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 7b1429917aba..e10dccc7958f 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -322,7 +322,6 @@ struct mlx4_ib_qp { u32 doorbell_qpn; __be32 sq_signal_bits; unsigned sq_next_wqe; - int sq_max_wqes_per_wr; int sq_spare_wqes; struct mlx4_ib_wq sq; @@ -760,6 +759,10 @@ void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, struct ib_udata *udata); +struct ib_ah *mlx4_ib_create_ah_slave(struct ib_pd *pd, + struct rdma_ah_attr *ah_attr, + int slave_sgid_index, u8 *s_mac, + u16 vlan_tag); int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); int mlx4_ib_destroy_ah(struct ib_ah *ah); @@ -771,21 +774,23 @@ int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, int mlx4_ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); int mlx4_ib_destroy_srq(struct ib_srq *srq); void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index); -int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); +int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata); int mlx4_ib_destroy_qp(struct ib_qp *qp); +void mlx4_ib_drain_sq(struct ib_qp *qp); +void mlx4_ib_drain_rq(struct ib_qp *qp); int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); -int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr); -int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); +int mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr); +int mlx4_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags, int port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, @@ -900,7 +905,7 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, int mr_access_flags, struct ib_pd *pd, struct ib_udata *udata); int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev, - u8 port_num, int index); + const struct ib_gid_attr *attr); void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev, int port); diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 3b8045fd23ed..6dd3cd2c2f80 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -204,91 +204,26 @@ static void *get_send_wqe(struct mlx4_ib_qp *qp, int n) /* * Stamp a SQ WQE so that it is invalid if prefetched by marking the - * first four bytes of every 64 byte chunk with - * 0x7FFFFFF | (invalid_ownership_value << 31). - * - * When the max work request size is less than or equal to the WQE - * basic block size, as an optimization, we can stamp all WQEs with - * 0xffffffff, and skip the very first chunk of each WQE. + * first four bytes of every 64 byte chunk with 0xffffffff, except for + * the very first chunk of the WQE. */ -static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size) +static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n) { __be32 *wqe; int i; int s; - int ind; void *buf; - __be32 stamp; struct mlx4_wqe_ctrl_seg *ctrl; - if (qp->sq_max_wqes_per_wr > 1) { - s = roundup(size, 1U << qp->sq.wqe_shift); - for (i = 0; i < s; i += 64) { - ind = (i >> qp->sq.wqe_shift) + n; - stamp = ind & qp->sq.wqe_cnt ? cpu_to_be32(0x7fffffff) : - cpu_to_be32(0xffffffff); - buf = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1)); - wqe = buf + (i & ((1 << qp->sq.wqe_shift) - 1)); - *wqe = stamp; - } - } else { - ctrl = buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1)); - s = (ctrl->qpn_vlan.fence_size & 0x3f) << 4; - for (i = 64; i < s; i += 64) { - wqe = buf + i; - *wqe = cpu_to_be32(0xffffffff); - } + buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1)); + ctrl = (struct mlx4_wqe_ctrl_seg *)buf; + s = (ctrl->qpn_vlan.fence_size & 0x3f) << 4; + for (i = 64; i < s; i += 64) { + wqe = buf + i; + *wqe = cpu_to_be32(0xffffffff); } } -static void post_nop_wqe(struct mlx4_ib_qp *qp, int n, int size) -{ - struct mlx4_wqe_ctrl_seg *ctrl; - struct mlx4_wqe_inline_seg *inl; - void *wqe; - int s; - - ctrl = wqe = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1)); - s = sizeof(struct mlx4_wqe_ctrl_seg); - - if (qp->ibqp.qp_type == IB_QPT_UD) { - struct mlx4_wqe_datagram_seg *dgram = wqe + sizeof *ctrl; - struct mlx4_av *av = (struct mlx4_av *)dgram->av; - memset(dgram, 0, sizeof *dgram); - av->port_pd = cpu_to_be32((qp->port << 24) | to_mpd(qp->ibqp.pd)->pdn); - s += sizeof(struct mlx4_wqe_datagram_seg); - } - - /* Pad the remainder of the WQE with an inline data segment. */ - if (size > s) { - inl = wqe + s; - inl->byte_count = cpu_to_be32(1 << 31 | (size - s - sizeof *inl)); - } - ctrl->srcrb_flags = 0; - ctrl->qpn_vlan.fence_size = size / 16; - /* - * Make sure descriptor is fully written before setting ownership bit - * (because HW can start executing as soon as we do). - */ - wmb(); - - ctrl->owner_opcode = cpu_to_be32(MLX4_OPCODE_NOP | MLX4_WQE_CTRL_NEC) | - (n & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0); - - stamp_send_wqe(qp, n + qp->sq_spare_wqes, size); -} - -/* Post NOP WQE to prevent wrap-around in the middle of WR */ -static inline unsigned pad_wraparound(struct mlx4_ib_qp *qp, int ind) -{ - unsigned s = qp->sq.wqe_cnt - (ind & (qp->sq.wqe_cnt - 1)); - if (unlikely(s < qp->sq_max_wqes_per_wr)) { - post_nop_wqe(qp, ind, s << qp->sq.wqe_shift); - ind += s; - } - return ind; -} - static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type) { struct ib_event event; @@ -433,8 +368,7 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, } static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, - enum mlx4_ib_qp_type type, struct mlx4_ib_qp *qp, - bool shrink_wqe) + enum mlx4_ib_qp_type type, struct mlx4_ib_qp *qp) { int s; @@ -461,70 +395,20 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, if (s > dev->dev->caps.max_sq_desc_sz) return -EINVAL; + qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s)); + /* - * Hermon supports shrinking WQEs, such that a single work - * request can include multiple units of 1 << wqe_shift. This - * way, work requests can differ in size, and do not have to - * be a power of 2 in size, saving memory and speeding up send - * WR posting. Unfortunately, if we do this then the - * wqe_index field in CQEs can't be used to look up the WR ID - * anymore, so we do this only if selective signaling is off. - * - * Further, on 32-bit platforms, we can't use vmap() to make - * the QP buffer virtually contiguous. Thus we have to use - * constant-sized WRs to make sure a WR is always fully within - * a single page-sized chunk. - * - * Finally, we use NOP work requests to pad the end of the - * work queue, to avoid wrap-around in the middle of WR. We - * set NEC bit to avoid getting completions with error for - * these NOP WRs, but since NEC is only supported starting - * with firmware 2.2.232, we use constant-sized WRs for older - * firmware. - * - * And, since MLX QPs only support SEND, we use constant-sized - * WRs in this case. - * - * We look for the smallest value of wqe_shift such that the - * resulting number of wqes does not exceed device - * capabilities. - * - * We set WQE size to at least 64 bytes, this way stamping - * invalidates each WQE. + * We need to leave 2 KB + 1 WR of headroom in the SQ to + * allow HW to prefetch. */ - if (shrink_wqe && dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC && - qp->sq_signal_bits && BITS_PER_LONG == 64 && - type != MLX4_IB_QPT_SMI && type != MLX4_IB_QPT_GSI && - !(type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI | - MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) - qp->sq.wqe_shift = ilog2(64); - else - qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s)); - - for (;;) { - qp->sq_max_wqes_per_wr = DIV_ROUND_UP(s, 1U << qp->sq.wqe_shift); - - /* - * We need to leave 2 KB + 1 WR of headroom in the SQ to - * allow HW to prefetch. - */ - qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + qp->sq_max_wqes_per_wr; - qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr * - qp->sq_max_wqes_per_wr + - qp->sq_spare_wqes); - - if (qp->sq.wqe_cnt <= dev->dev->caps.max_wqes) - break; - - if (qp->sq_max_wqes_per_wr <= 1) - return -EINVAL; - - ++qp->sq.wqe_shift; - } - - qp->sq.max_gs = (min(dev->dev->caps.max_sq_desc_sz, - (qp->sq_max_wqes_per_wr << qp->sq.wqe_shift)) - - send_wqe_overhead(type, qp->flags)) / + qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + 1; + qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr + + qp->sq_spare_wqes); + + qp->sq.max_gs = + (min(dev->dev->caps.max_sq_desc_sz, + (1 << qp->sq.wqe_shift)) - + send_wqe_overhead(type, qp->flags)) / sizeof (struct mlx4_wqe_data_seg); qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + @@ -538,7 +422,7 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, } cap->max_send_wr = qp->sq.max_post = - (qp->sq.wqe_cnt - qp->sq_spare_wqes) / qp->sq_max_wqes_per_wr; + qp->sq.wqe_cnt - qp->sq_spare_wqes; cap->max_send_sge = min(qp->sq.max_gs, min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg)); @@ -977,7 +861,6 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, { int qpn; int err; - struct ib_qp_cap backup_cap; struct mlx4_ib_sqp *sqp = NULL; struct mlx4_ib_qp *qp; enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type; @@ -1178,9 +1061,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err; } - memcpy(&backup_cap, &init_attr->cap, sizeof(backup_cap)); - err = set_kernel_sq_size(dev, &init_attr->cap, - qp_type, qp, true); + err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp); if (err) goto err; @@ -1192,20 +1073,10 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, *qp->db.db = 0; } - if (mlx4_buf_alloc(dev->dev, qp->buf_size, qp->buf_size, + if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf)) { - memcpy(&init_attr->cap, &backup_cap, - sizeof(backup_cap)); - err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, - qp, false); - if (err) - goto err_db; - - if (mlx4_buf_alloc(dev->dev, qp->buf_size, - PAGE_SIZE * 2, &qp->buf)) { - err = -ENOMEM; - goto err_db; - } + err = -ENOMEM; + goto err_db; } err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift, @@ -1859,8 +1730,7 @@ static int _mlx4_set_path(struct mlx4_ib_dev *dev, if (rdma_ah_get_ah_flags(ah) & IB_AH_GRH) { const struct ib_global_route *grh = rdma_ah_read_grh(ah); int real_sgid_index = - mlx4_ib_gid_index_to_real_index(dev, port, - grh->sgid_index); + mlx4_ib_gid_index_to_real_index(dev, grh->sgid_attr); if (real_sgid_index < 0) return real_sgid_index; @@ -2176,6 +2046,7 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, { struct ib_uobject *ibuobject; struct ib_srq *ibsrq; + const struct ib_gid_attr *gid_attr = NULL; struct ib_rwq_ind_table *rwq_ind_tbl; enum ib_qp_type qp_type; struct mlx4_ib_dev *dev; @@ -2356,29 +2227,17 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, if (attr_mask & IB_QP_AV) { u8 port_num = mlx4_is_bonded(dev->dev) ? 1 : attr_mask & IB_QP_PORT ? attr->port_num : qp->port; - union ib_gid gid; - struct ib_gid_attr gid_attr = {.gid_type = IB_GID_TYPE_IB}; u16 vlan = 0xffff; u8 smac[ETH_ALEN]; - int status = 0; int is_eth = rdma_cap_eth_ah(&dev->ib_dev, port_num) && rdma_ah_get_ah_flags(&attr->ah_attr) & IB_AH_GRH; if (is_eth) { - int index = - rdma_ah_read_grh(&attr->ah_attr)->sgid_index; - - status = ib_get_cached_gid(&dev->ib_dev, port_num, - index, &gid, &gid_attr); - if (!status) { - vlan = rdma_vlan_dev_vlan_id(gid_attr.ndev); - memcpy(smac, gid_attr.ndev->dev_addr, ETH_ALEN); - dev_put(gid_attr.ndev); - } + gid_attr = attr->ah_attr.grh.sgid_attr; + vlan = rdma_vlan_dev_vlan_id(gid_attr->ndev); + memcpy(smac, gid_attr->ndev->dev_addr, ETH_ALEN); } - if (status) - goto out; if (mlx4_set_path(dev, attr, attr_mask, qp, &context->pri_path, port_num, vlan, smac)) @@ -2389,7 +2248,7 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, if (is_eth && (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR)) { - u8 qpc_roce_mode = gid_type_to_qpc(gid_attr.gid_type); + u8 qpc_roce_mode = gid_type_to_qpc(gid_attr->gid_type); if (qpc_roce_mode == MLX4_QPC_ROCE_MODE_UNDEFINED) { err = -EINVAL; @@ -2594,11 +2453,9 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, for (i = 0; i < qp->sq.wqe_cnt; ++i) { ctrl = get_send_wqe(qp, i); ctrl->owner_opcode = cpu_to_be32(1 << 31); - if (qp->sq_max_wqes_per_wr == 1) - ctrl->qpn_vlan.fence_size = - 1 << (qp->sq.wqe_shift - 4); - - stamp_send_wqe(qp, i, 1 << qp->sq.wqe_shift); + ctrl->qpn_vlan.fence_size = + 1 << (qp->sq.wqe_shift - 4); + stamp_send_wqe(qp, i); } } @@ -2937,7 +2794,7 @@ static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey) } static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, - struct ib_ud_wr *wr, + const struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len) { struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device); @@ -3085,7 +2942,7 @@ static int fill_gid_by_hw_index(struct mlx4_ib_dev *ibdev, u8 port_num, } #define MLX4_ROCEV2_QP1_SPORT 0xC000 -static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, +static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len) { struct ib_device *ib_dev = sqp->qp.ibqp.device; @@ -3181,10 +3038,8 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. guid_cache[ah->av.ib.gid_index]; } else { - ib_get_cached_gid(ib_dev, - be32_to_cpu(ah->av.ib.port_pd) >> 24, - ah->av.ib.gid_index, - &sqp->ud_header.grh.source_gid, NULL); + sqp->ud_header.grh.source_gid = + ah->ibah.sgid_attr->gid; } } memcpy(sqp->ud_header.grh.destination_gid.raw, @@ -3369,7 +3224,7 @@ static __be32 convert_access(int acc) } static void set_reg_seg(struct mlx4_wqe_fmr_seg *fseg, - struct ib_reg_wr *wr) + const struct ib_reg_wr *wr) { struct mlx4_ib_mr *mr = to_mmr(wr->mr); @@ -3399,7 +3254,7 @@ static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg, } static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, - struct ib_atomic_wr *wr) + const struct ib_atomic_wr *wr) { if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { aseg->swap_add = cpu_to_be64(wr->swap); @@ -3415,7 +3270,7 @@ static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, } static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg, - struct ib_atomic_wr *wr) + const struct ib_atomic_wr *wr) { aseg->swap_add = cpu_to_be64(wr->swap); aseg->swap_add_mask = cpu_to_be64(wr->swap_mask); @@ -3424,7 +3279,7 @@ static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg, } static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, - struct ib_ud_wr *wr) + const struct ib_ud_wr *wr) { memcpy(dseg->av, &to_mah(wr->ah)->av, sizeof (struct mlx4_av)); dseg->dqpn = cpu_to_be32(wr->remote_qpn); @@ -3435,7 +3290,7 @@ static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev, struct mlx4_wqe_datagram_seg *dseg, - struct ib_ud_wr *wr, + const struct ib_ud_wr *wr, enum mlx4_ib_qp_type qpt) { union mlx4_ext_av *av = &to_mah(wr->ah)->av; @@ -3457,7 +3312,8 @@ static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev, dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY); } -static void build_tunnel_header(struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len) +static void build_tunnel_header(const struct ib_ud_wr *wr, void *wqe, + unsigned *mlx_seg_len) { struct mlx4_wqe_inline_seg *inl = wqe; struct mlx4_ib_tunnel_header hdr; @@ -3540,9 +3396,9 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg) dseg->addr = cpu_to_be64(sg->addr); } -static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_ud_wr *wr, - struct mlx4_ib_qp *qp, unsigned *lso_seg_len, - __be32 *lso_hdr_sz, __be32 *blh) +static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, + const struct ib_ud_wr *wr, struct mlx4_ib_qp *qp, + unsigned *lso_seg_len, __be32 *lso_hdr_sz, __be32 *blh) { unsigned halign = ALIGN(sizeof *wqe + wr->hlen, 16); @@ -3560,7 +3416,7 @@ static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_ud_wr *wr, return 0; } -static __be32 send_ieth(struct ib_send_wr *wr) +static __be32 send_ieth(const struct ib_send_wr *wr) { switch (wr->opcode) { case IB_WR_SEND_WITH_IMM: @@ -3582,8 +3438,8 @@ static void add_zero_len_inline(void *wqe) inl->byte_count = cpu_to_be32(1 << 31); } -int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +static int _mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr, bool drain) { struct mlx4_ib_qp *qp = to_mqp(ibqp); void *wqe; @@ -3593,7 +3449,6 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, int nreq; int err = 0; unsigned ind; - int uninitialized_var(stamp); int uninitialized_var(size); unsigned uninitialized_var(seglen); __be32 dummy; @@ -3623,7 +3478,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } spin_lock_irqsave(&qp->sq.lock, flags); - if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { + if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR && + !drain) { err = -EIO; *bad_wr = wr; nreq = 0; @@ -3865,22 +3721,14 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] | (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh; - stamp = ind + qp->sq_spare_wqes; - ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift); - /* * We can improve latency by not stamping the last * send queue WQE until after ringing the doorbell, so * only stamp here if there are still more WQEs to post. - * - * Same optimization applies to padding with NOP wqe - * in case of WQE shrinking (used to prevent wrap-around - * in the middle of WR). */ - if (wr->next) { - stamp_send_wqe(qp, stamp, size * 16); - ind = pad_wraparound(qp, ind); - } + if (wr->next) + stamp_send_wqe(qp, ind + qp->sq_spare_wqes); + ind++; } out: @@ -3902,9 +3750,8 @@ out: */ mmiowb(); - stamp_send_wqe(qp, stamp, size * 16); + stamp_send_wqe(qp, ind + qp->sq_spare_wqes - 1); - ind = pad_wraparound(qp, ind); qp->sq_next_wqe = ind; } @@ -3913,8 +3760,14 @@ out: return err; } -int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) +{ + return _mlx4_ib_post_send(ibqp, wr, bad_wr, false); +} + +static int _mlx4_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr, bool drain) { struct mlx4_ib_qp *qp = to_mqp(ibqp); struct mlx4_wqe_data_seg *scat; @@ -3929,7 +3782,8 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, max_gs = qp->rq.max_gs; spin_lock_irqsave(&qp->rq.lock, flags); - if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { + if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR && + !drain) { err = -EIO; *bad_wr = wr; nreq = 0; @@ -4000,6 +3854,12 @@ out: return err; } +int mlx4_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) +{ + return _mlx4_ib_post_recv(ibqp, wr, bad_wr, false); +} + static inline enum ib_qp_state to_ib_qp_state(enum mlx4_qp_state mlx4_state) { switch (mlx4_state) { @@ -4047,9 +3907,9 @@ static void to_rdma_ah_attr(struct mlx4_ib_dev *ibdev, u8 port_num = path->sched_queue & 0x40 ? 2 : 1; memset(ah_attr, 0, sizeof(*ah_attr)); - ah_attr->type = rdma_ah_find_type(&ibdev->ib_dev, port_num); if (port_num == 0 || port_num > dev->caps.num_ports) return; + ah_attr->type = rdma_ah_find_type(&ibdev->ib_dev, port_num); if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) rdma_ah_set_sl(ah_attr, ((path->sched_queue >> 3) & 0x7) | @@ -4465,3 +4325,132 @@ int mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl) kfree(ib_rwq_ind_tbl); return 0; } + +struct mlx4_ib_drain_cqe { + struct ib_cqe cqe; + struct completion done; +}; + +static void mlx4_ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct mlx4_ib_drain_cqe *cqe = container_of(wc->wr_cqe, + struct mlx4_ib_drain_cqe, + cqe); + + complete(&cqe->done); +} + +/* This function returns only once the drained WR was completed */ +static void handle_drain_completion(struct ib_cq *cq, + struct mlx4_ib_drain_cqe *sdrain, + struct mlx4_ib_dev *dev) +{ + struct mlx4_dev *mdev = dev->dev; + + if (cq->poll_ctx == IB_POLL_DIRECT) { + while (wait_for_completion_timeout(&sdrain->done, HZ / 10) <= 0) + ib_process_cq_direct(cq, -1); + return; + } + + if (mdev->persist->state == MLX4_DEVICE_STATE_INTERNAL_ERROR) { + struct mlx4_ib_cq *mcq = to_mcq(cq); + bool triggered = false; + unsigned long flags; + + spin_lock_irqsave(&dev->reset_flow_resource_lock, flags); + /* Make sure that the CQ handler won't run if wasn't run yet */ + if (!mcq->mcq.reset_notify_added) + mcq->mcq.reset_notify_added = 1; + else + triggered = true; + spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags); + + if (triggered) { + /* Wait for any scheduled/running task to be ended */ + switch (cq->poll_ctx) { + case IB_POLL_SOFTIRQ: + irq_poll_disable(&cq->iop); + irq_poll_enable(&cq->iop); + break; + case IB_POLL_WORKQUEUE: + cancel_work_sync(&cq->work); + break; + default: + WARN_ON_ONCE(1); + } + } + + /* Run the CQ handler - this makes sure that the drain WR will + * be processed if wasn't processed yet. + */ + mcq->mcq.comp(&mcq->mcq); + } + + wait_for_completion(&sdrain->done); +} + +void mlx4_ib_drain_sq(struct ib_qp *qp) +{ + struct ib_cq *cq = qp->send_cq; + struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; + struct mlx4_ib_drain_cqe sdrain; + const struct ib_send_wr *bad_swr; + struct ib_rdma_wr swr = { + .wr = { + .next = NULL, + { .wr_cqe = &sdrain.cqe, }, + .opcode = IB_WR_RDMA_WRITE, + }, + }; + int ret; + struct mlx4_ib_dev *dev = to_mdev(qp->device); + struct mlx4_dev *mdev = dev->dev; + + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); + if (ret && mdev->persist->state != MLX4_DEVICE_STATE_INTERNAL_ERROR) { + WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); + return; + } + + sdrain.cqe.done = mlx4_ib_drain_qp_done; + init_completion(&sdrain.done); + + ret = _mlx4_ib_post_send(qp, &swr.wr, &bad_swr, true); + if (ret) { + WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); + return; + } + + handle_drain_completion(cq, &sdrain, dev); +} + +void mlx4_ib_drain_rq(struct ib_qp *qp) +{ + struct ib_cq *cq = qp->recv_cq; + struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; + struct mlx4_ib_drain_cqe rdrain; + struct ib_recv_wr rwr = {}; + const struct ib_recv_wr *bad_rwr; + int ret; + struct mlx4_ib_dev *dev = to_mdev(qp->device); + struct mlx4_dev *mdev = dev->dev; + + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); + if (ret && mdev->persist->state != MLX4_DEVICE_STATE_INTERNAL_ERROR) { + WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); + return; + } + + rwr.wr_cqe = &rdrain.cqe; + rdrain.cqe.done = mlx4_ib_drain_qp_done; + init_completion(&rdrain.done); + + ret = _mlx4_ib_post_recv(qp, &rwr, &bad_rwr, true); + if (ret) { + WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); + return; + } + + handle_drain_completion(cq, &rdrain, dev); +} diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index ebee56cbc0e2..3731b31c3653 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -307,8 +307,8 @@ void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index) spin_unlock(&srq->lock); } -int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct mlx4_ib_srq *srq = to_msrq(ibsrq); struct mlx4_wqe_srq_next_seg *next; diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index d42b922bede8..b8e4b15e2674 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -3,3 +3,5 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o +mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o +mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += flow.o diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c index e6bde32a83f3..ffd03bf1a71e 100644 --- a/drivers/infiniband/hw/mlx5/ah.c +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -37,7 +37,6 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev, struct rdma_ah_attr *ah_attr) { enum ib_gid_type gid_type; - int err; if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) { const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); @@ -53,18 +52,12 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev, ah->av.stat_rate_sl = (rdma_ah_get_static_rate(ah_attr) << 4); if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) { - err = mlx5_get_roce_gid_type(dev, ah_attr->port_num, - ah_attr->grh.sgid_index, - &gid_type); - if (err) - return ERR_PTR(err); + gid_type = ah_attr->grh.sgid_attr->gid_type; memcpy(ah->av.rmac, ah_attr->roce.dmac, sizeof(ah_attr->roce.dmac)); ah->av.udp_sport = - mlx5_get_roce_udp_sport(dev, - rdma_ah_get_port_num(ah_attr), - rdma_ah_read_grh(ah_attr)->sgid_index); + mlx5_get_roce_udp_sport(dev, ah_attr->grh.sgid_attr); ah->av.stat_rate_sl |= (rdma_ah_get_sl(ah_attr) & 0x7) << 1; if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) #define MLX5_ECN_ENABLED BIT(1) diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index ccc0b5d06a7d..c84fef9a8a08 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -185,3 +185,15 @@ int mlx5_cmd_dealloc_memic(struct mlx5_memic *memic, u64 addr, u64 length) return err; } + +int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out) +{ + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + + MLX5_SET(ppcnt_reg, in, local_port, 1); + + MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP); + return mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPCNT, + 0, 0); +} diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index 98ea4648c655..88cbb1c41703 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -41,6 +41,7 @@ int mlx5_cmd_dump_fill_mkey(struct mlx5_core_dev *dev, u32 *mkey); int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey); int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, void *out, int out_size); +int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out); int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev, void *in, int in_size); int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr, diff --git a/drivers/infiniband/hw/mlx5/cong.c b/drivers/infiniband/hw/mlx5/cong.c index 985fa2637390..7e4e358a4fd8 100644 --- a/drivers/infiniband/hw/mlx5/cong.c +++ b/drivers/infiniband/hw/mlx5/cong.c @@ -359,9 +359,6 @@ static ssize_t get_param(struct file *filp, char __user *buf, size_t count, int ret; char lbuf[11]; - if (*pos) - return 0; - ret = mlx5_ib_get_cc_params(param->dev, param->port_num, offset, &var); if (ret) return ret; @@ -370,11 +367,7 @@ static ssize_t get_param(struct file *filp, char __user *buf, size_t count, if (ret < 0) return ret; - if (copy_to_user(buf, lbuf, ret)) - return -EFAULT; - - *pos += ret; - return ret; + return simple_read_from_buffer(buf, count, pos, lbuf, ret); } static const struct file_operations dbg_cc_fops = { diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index ad39d64b8108..088205d7f1a1 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -1184,7 +1184,7 @@ int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) int err; if (!MLX5_CAP_GEN(dev->mdev, cq_moderation)) - return -ENOSYS; + return -EOPNOTSUPP; if (cq_period > MLX5_MAX_CQ_PERIOD) return -EINVAL; diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c new file mode 100644 index 000000000000..ac116d63e466 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -0,0 +1,1119 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + */ + +#include <rdma/ib_user_verbs.h> +#include <rdma/ib_verbs.h> +#include <rdma/uverbs_types.h> +#include <rdma/uverbs_ioctl.h> +#include <rdma/mlx5_user_ioctl_cmds.h> +#include <rdma/ib_umem.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/fs.h> +#include "mlx5_ib.h" + +#define UVERBS_MODULE_NAME mlx5_ib +#include <rdma/uverbs_named_ioctl.h> + +#define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in) +struct devx_obj { + struct mlx5_core_dev *mdev; + u32 obj_id; + u32 dinlen; /* destroy inbox length */ + u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW]; +}; + +struct devx_umem { + struct mlx5_core_dev *mdev; + struct ib_umem *umem; + u32 page_offset; + int page_shift; + int ncont; + u32 dinlen; + u32 dinbox[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)]; +}; + +struct devx_umem_reg_cmd { + void *in; + u32 inlen; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; +}; + +static struct mlx5_ib_ucontext *devx_ufile2uctx(struct ib_uverbs_file *file) +{ + return to_mucontext(ib_uverbs_get_ucontext(file)); +} + +int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context) +{ + u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0}; + u64 general_obj_types; + void *hdr; + int err; + + hdr = MLX5_ADDR_OF(create_uctx_in, in, hdr); + + general_obj_types = MLX5_CAP_GEN_64(dev->mdev, general_obj_types); + if (!(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_UCTX) || + !(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_UMEM)) + return -EINVAL; + + if (!capable(CAP_NET_RAW)) + return -EPERM; + + MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, MLX5_OBJ_TYPE_UCTX); + + err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + context->devx_uid = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + return 0; +} + +void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, + struct mlx5_ib_ucontext *context) +{ + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {0}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0}; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_UCTX); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, context->devx_uid); + + mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); +} + +bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type) +{ + struct devx_obj *devx_obj = obj; + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode); + + switch (opcode) { + case MLX5_CMD_OP_DESTROY_TIR: + *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR; + *dest_id = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, + obj_id); + return true; + + case MLX5_CMD_OP_DESTROY_FLOW_TABLE: + *dest_type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + *dest_id = MLX5_GET(destroy_flow_table_in, devx_obj->dinbox, + table_id); + return true; + default: + return false; + } +} + +static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in) +{ + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); + u32 obj_id; + + switch (opcode) { + case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT: + case MLX5_CMD_OP_QUERY_GENERAL_OBJECT: + obj_id = MLX5_GET(general_obj_in_cmd_hdr, in, obj_id); + break; + case MLX5_CMD_OP_QUERY_MKEY: + obj_id = MLX5_GET(query_mkey_in, in, mkey_index); + break; + case MLX5_CMD_OP_QUERY_CQ: + obj_id = MLX5_GET(query_cq_in, in, cqn); + break; + case MLX5_CMD_OP_MODIFY_CQ: + obj_id = MLX5_GET(modify_cq_in, in, cqn); + break; + case MLX5_CMD_OP_QUERY_SQ: + obj_id = MLX5_GET(query_sq_in, in, sqn); + break; + case MLX5_CMD_OP_MODIFY_SQ: + obj_id = MLX5_GET(modify_sq_in, in, sqn); + break; + case MLX5_CMD_OP_QUERY_RQ: + obj_id = MLX5_GET(query_rq_in, in, rqn); + break; + case MLX5_CMD_OP_MODIFY_RQ: + obj_id = MLX5_GET(modify_rq_in, in, rqn); + break; + case MLX5_CMD_OP_QUERY_RMP: + obj_id = MLX5_GET(query_rmp_in, in, rmpn); + break; + case MLX5_CMD_OP_MODIFY_RMP: + obj_id = MLX5_GET(modify_rmp_in, in, rmpn); + break; + case MLX5_CMD_OP_QUERY_RQT: + obj_id = MLX5_GET(query_rqt_in, in, rqtn); + break; + case MLX5_CMD_OP_MODIFY_RQT: + obj_id = MLX5_GET(modify_rqt_in, in, rqtn); + break; + case MLX5_CMD_OP_QUERY_TIR: + obj_id = MLX5_GET(query_tir_in, in, tirn); + break; + case MLX5_CMD_OP_MODIFY_TIR: + obj_id = MLX5_GET(modify_tir_in, in, tirn); + break; + case MLX5_CMD_OP_QUERY_TIS: + obj_id = MLX5_GET(query_tis_in, in, tisn); + break; + case MLX5_CMD_OP_MODIFY_TIS: + obj_id = MLX5_GET(modify_tis_in, in, tisn); + break; + case MLX5_CMD_OP_QUERY_FLOW_TABLE: + obj_id = MLX5_GET(query_flow_table_in, in, table_id); + break; + case MLX5_CMD_OP_MODIFY_FLOW_TABLE: + obj_id = MLX5_GET(modify_flow_table_in, in, table_id); + break; + case MLX5_CMD_OP_QUERY_FLOW_GROUP: + obj_id = MLX5_GET(query_flow_group_in, in, group_id); + break; + case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY: + obj_id = MLX5_GET(query_fte_in, in, flow_index); + break; + case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: + obj_id = MLX5_GET(set_fte_in, in, flow_index); + break; + case MLX5_CMD_OP_QUERY_Q_COUNTER: + obj_id = MLX5_GET(query_q_counter_in, in, counter_set_id); + break; + case MLX5_CMD_OP_QUERY_FLOW_COUNTER: + obj_id = MLX5_GET(query_flow_counter_in, in, flow_counter_id); + break; + case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT: + obj_id = MLX5_GET(general_obj_in_cmd_hdr, in, obj_id); + break; + case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT: + obj_id = MLX5_GET(query_scheduling_element_in, in, + scheduling_element_id); + break; + case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT: + obj_id = MLX5_GET(modify_scheduling_element_in, in, + scheduling_element_id); + break; + case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: + obj_id = MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port); + break; + case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY: + obj_id = MLX5_GET(query_l2_table_entry_in, in, table_index); + break; + case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: + obj_id = MLX5_GET(set_l2_table_entry_in, in, table_index); + break; + case MLX5_CMD_OP_QUERY_QP: + obj_id = MLX5_GET(query_qp_in, in, qpn); + break; + case MLX5_CMD_OP_RST2INIT_QP: + obj_id = MLX5_GET(rst2init_qp_in, in, qpn); + break; + case MLX5_CMD_OP_INIT2RTR_QP: + obj_id = MLX5_GET(init2rtr_qp_in, in, qpn); + break; + case MLX5_CMD_OP_RTR2RTS_QP: + obj_id = MLX5_GET(rtr2rts_qp_in, in, qpn); + break; + case MLX5_CMD_OP_RTS2RTS_QP: + obj_id = MLX5_GET(rts2rts_qp_in, in, qpn); + break; + case MLX5_CMD_OP_SQERR2RTS_QP: + obj_id = MLX5_GET(sqerr2rts_qp_in, in, qpn); + break; + case MLX5_CMD_OP_2ERR_QP: + obj_id = MLX5_GET(qp_2err_in, in, qpn); + break; + case MLX5_CMD_OP_2RST_QP: + obj_id = MLX5_GET(qp_2rst_in, in, qpn); + break; + case MLX5_CMD_OP_QUERY_DCT: + obj_id = MLX5_GET(query_dct_in, in, dctn); + break; + case MLX5_CMD_OP_QUERY_XRQ: + obj_id = MLX5_GET(query_xrq_in, in, xrqn); + break; + case MLX5_CMD_OP_QUERY_XRC_SRQ: + obj_id = MLX5_GET(query_xrc_srq_in, in, xrc_srqn); + break; + case MLX5_CMD_OP_ARM_XRC_SRQ: + obj_id = MLX5_GET(arm_xrc_srq_in, in, xrc_srqn); + break; + case MLX5_CMD_OP_QUERY_SRQ: + obj_id = MLX5_GET(query_srq_in, in, srqn); + break; + case MLX5_CMD_OP_ARM_RQ: + obj_id = MLX5_GET(arm_rq_in, in, srq_number); + break; + case MLX5_CMD_OP_DRAIN_DCT: + case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION: + obj_id = MLX5_GET(drain_dct_in, in, dctn); + break; + case MLX5_CMD_OP_ARM_XRQ: + obj_id = MLX5_GET(arm_xrq_in, in, xrqn); + break; + default: + return false; + } + + if (obj_id == obj->obj_id) + return true; + + return false; +} + +static bool devx_is_obj_create_cmd(const void *in) +{ + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); + + switch (opcode) { + case MLX5_CMD_OP_CREATE_GENERAL_OBJECT: + case MLX5_CMD_OP_CREATE_MKEY: + case MLX5_CMD_OP_CREATE_CQ: + case MLX5_CMD_OP_ALLOC_PD: + case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN: + case MLX5_CMD_OP_CREATE_RMP: + case MLX5_CMD_OP_CREATE_SQ: + case MLX5_CMD_OP_CREATE_RQ: + case MLX5_CMD_OP_CREATE_RQT: + case MLX5_CMD_OP_CREATE_TIR: + case MLX5_CMD_OP_CREATE_TIS: + case MLX5_CMD_OP_ALLOC_Q_COUNTER: + case MLX5_CMD_OP_CREATE_FLOW_TABLE: + case MLX5_CMD_OP_CREATE_FLOW_GROUP: + case MLX5_CMD_OP_ALLOC_FLOW_COUNTER: + case MLX5_CMD_OP_ALLOC_ENCAP_HEADER: + case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT: + case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: + case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: + case MLX5_CMD_OP_CREATE_QP: + case MLX5_CMD_OP_CREATE_SRQ: + case MLX5_CMD_OP_CREATE_XRC_SRQ: + case MLX5_CMD_OP_CREATE_DCT: + case MLX5_CMD_OP_CREATE_XRQ: + case MLX5_CMD_OP_ATTACH_TO_MCG: + case MLX5_CMD_OP_ALLOC_XRCD: + return true; + case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: + { + u16 op_mod = MLX5_GET(set_fte_in, in, op_mod); + if (op_mod == 0) + return true; + return false; + } + default: + return false; + } +} + +static bool devx_is_obj_modify_cmd(const void *in) +{ + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); + + switch (opcode) { + case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT: + case MLX5_CMD_OP_MODIFY_CQ: + case MLX5_CMD_OP_MODIFY_RMP: + case MLX5_CMD_OP_MODIFY_SQ: + case MLX5_CMD_OP_MODIFY_RQ: + case MLX5_CMD_OP_MODIFY_RQT: + case MLX5_CMD_OP_MODIFY_TIR: + case MLX5_CMD_OP_MODIFY_TIS: + case MLX5_CMD_OP_MODIFY_FLOW_TABLE: + case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: + case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: + case MLX5_CMD_OP_RST2INIT_QP: + case MLX5_CMD_OP_INIT2RTR_QP: + case MLX5_CMD_OP_RTR2RTS_QP: + case MLX5_CMD_OP_RTS2RTS_QP: + case MLX5_CMD_OP_SQERR2RTS_QP: + case MLX5_CMD_OP_2ERR_QP: + case MLX5_CMD_OP_2RST_QP: + case MLX5_CMD_OP_ARM_XRC_SRQ: + case MLX5_CMD_OP_ARM_RQ: + case MLX5_CMD_OP_DRAIN_DCT: + case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION: + case MLX5_CMD_OP_ARM_XRQ: + return true; + case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: + { + u16 op_mod = MLX5_GET(set_fte_in, in, op_mod); + + if (op_mod == 1) + return true; + return false; + } + default: + return false; + } +} + +static bool devx_is_obj_query_cmd(const void *in) +{ + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); + + switch (opcode) { + case MLX5_CMD_OP_QUERY_GENERAL_OBJECT: + case MLX5_CMD_OP_QUERY_MKEY: + case MLX5_CMD_OP_QUERY_CQ: + case MLX5_CMD_OP_QUERY_RMP: + case MLX5_CMD_OP_QUERY_SQ: + case MLX5_CMD_OP_QUERY_RQ: + case MLX5_CMD_OP_QUERY_RQT: + case MLX5_CMD_OP_QUERY_TIR: + case MLX5_CMD_OP_QUERY_TIS: + case MLX5_CMD_OP_QUERY_Q_COUNTER: + case MLX5_CMD_OP_QUERY_FLOW_TABLE: + case MLX5_CMD_OP_QUERY_FLOW_GROUP: + case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY: + case MLX5_CMD_OP_QUERY_FLOW_COUNTER: + case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT: + case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY: + case MLX5_CMD_OP_QUERY_QP: + case MLX5_CMD_OP_QUERY_SRQ: + case MLX5_CMD_OP_QUERY_XRC_SRQ: + case MLX5_CMD_OP_QUERY_DCT: + case MLX5_CMD_OP_QUERY_XRQ: + return true; + default: + return false; + } +} + +static bool devx_is_general_cmd(void *in) +{ + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); + + switch (opcode) { + case MLX5_CMD_OP_QUERY_HCA_CAP: + case MLX5_CMD_OP_QUERY_VPORT_STATE: + case MLX5_CMD_OP_QUERY_ADAPTER: + case MLX5_CMD_OP_QUERY_ISSI: + case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT: + case MLX5_CMD_OP_QUERY_ROCE_ADDRESS: + case MLX5_CMD_OP_QUERY_VNIC_ENV: + case MLX5_CMD_OP_QUERY_VPORT_COUNTER: + case MLX5_CMD_OP_GET_DROPPED_PACKET_LOG: + case MLX5_CMD_OP_NOP: + case MLX5_CMD_OP_QUERY_CONG_STATUS: + case MLX5_CMD_OP_QUERY_CONG_PARAMS: + case MLX5_CMD_OP_QUERY_CONG_STATISTICS: + return true; + default: + return false; + } +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_ucontext *c; + struct mlx5_ib_dev *dev; + int user_vector; + int dev_eqn; + unsigned int irqn; + int err; + + if (uverbs_copy_from(&user_vector, attrs, + MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC)) + return -EFAULT; + + c = devx_ufile2uctx(file); + if (IS_ERR(c)) + return PTR_ERR(c); + dev = to_mdev(c->ibucontext.device); + + err = mlx5_vector2eqn(dev->mdev, user_vector, &dev_eqn, &irqn); + if (err < 0) + return err; + + if (uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN, + &dev_eqn, sizeof(dev_eqn))) + return -EFAULT; + + return 0; +} + +/* + *Security note: + * The hardware protection mechanism works like this: Each device object that + * is subject to UAR doorbells (QP/SQ/CQ) gets a UAR ID (called uar_page in + * the device specification manual) upon its creation. Then upon doorbell, + * hardware fetches the object context for which the doorbell was rang, and + * validates that the UAR through which the DB was rang matches the UAR ID + * of the object. + * If no match the doorbell is silently ignored by the hardware. Of course, + * the user cannot ring a doorbell on a UAR that was not mapped to it. + * Now in devx, as the devx kernel does not manipulate the QP/SQ/CQ command + * mailboxes (except tagging them with UID), we expose to the user its UAR + * ID, so it can embed it in these objects in the expected specification + * format. So the only thing the user can do is hurt itself by creating a + * QP/SQ/CQ with a UAR ID other than his, and then in this case other users + * may ring a doorbell on its objects. + * The consequence of that will be that another user can schedule a QP/SQ + * of the buggy user for execution (just insert it to the hardware schedule + * queue or arm its CQ for event generation), no further harm is expected. + */ +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_UAR)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_ucontext *c; + struct mlx5_ib_dev *dev; + u32 user_idx; + s32 dev_idx; + + c = devx_ufile2uctx(file); + if (IS_ERR(c)) + return PTR_ERR(c); + dev = to_mdev(c->ibucontext.device); + + if (uverbs_copy_from(&user_idx, attrs, + MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX)) + return -EFAULT; + + dev_idx = bfregn_to_uar_index(dev, &c->bfregi, user_idx, true); + if (dev_idx < 0) + return dev_idx; + + if (uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX, + &dev_idx, sizeof(dev_idx))) + return -EFAULT; + + return 0; +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_ucontext *c; + struct mlx5_ib_dev *dev; + void *cmd_in = uverbs_attr_get_alloced_ptr( + attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN); + int cmd_out_len = uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT); + void *cmd_out; + int err; + + c = devx_ufile2uctx(file); + if (IS_ERR(c)) + return PTR_ERR(c); + dev = to_mdev(c->ibucontext.device); + + if (!c->devx_uid) + return -EPERM; + + /* Only white list of some general HCA commands are allowed for this method. */ + if (!devx_is_general_cmd(cmd_in)) + return -EINVAL; + + cmd_out = uverbs_zalloc(attrs, cmd_out_len); + if (IS_ERR(cmd_out)) + return PTR_ERR(cmd_out); + + MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid); + err = mlx5_cmd_exec(dev->mdev, cmd_in, + uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN), + cmd_out, cmd_out_len); + if (err) + return err; + + return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT, cmd_out, + cmd_out_len); +} + +static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, + u32 *dinlen, + u32 *obj_id) +{ + u16 obj_type = MLX5_GET(general_obj_in_cmd_hdr, in, obj_type); + u16 uid = MLX5_GET(general_obj_in_cmd_hdr, in, uid); + + *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + *dinlen = MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr); + + MLX5_SET(general_obj_in_cmd_hdr, din, obj_id, *obj_id); + MLX5_SET(general_obj_in_cmd_hdr, din, uid, uid); + + switch (MLX5_GET(general_obj_in_cmd_hdr, in, opcode)) { + case MLX5_CMD_OP_CREATE_GENERAL_OBJECT: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, din, obj_type, obj_type); + break; + + case MLX5_CMD_OP_CREATE_MKEY: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_MKEY); + break; + case MLX5_CMD_OP_CREATE_CQ: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_CQ); + break; + case MLX5_CMD_OP_ALLOC_PD: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_PD); + break; + case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN); + break; + case MLX5_CMD_OP_CREATE_RMP: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RMP); + break; + case MLX5_CMD_OP_CREATE_SQ: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SQ); + break; + case MLX5_CMD_OP_CREATE_RQ: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQ); + break; + case MLX5_CMD_OP_CREATE_RQT: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQT); + break; + case MLX5_CMD_OP_CREATE_TIR: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIR); + break; + case MLX5_CMD_OP_CREATE_TIS: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIS); + break; + case MLX5_CMD_OP_ALLOC_Q_COUNTER: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DEALLOC_Q_COUNTER); + break; + case MLX5_CMD_OP_CREATE_FLOW_TABLE: + *dinlen = MLX5_ST_SZ_BYTES(destroy_flow_table_in); + *obj_id = MLX5_GET(create_flow_table_out, out, table_id); + MLX5_SET(destroy_flow_table_in, din, other_vport, + MLX5_GET(create_flow_table_in, in, other_vport)); + MLX5_SET(destroy_flow_table_in, din, vport_number, + MLX5_GET(create_flow_table_in, in, vport_number)); + MLX5_SET(destroy_flow_table_in, din, table_type, + MLX5_GET(create_flow_table_in, in, table_type)); + MLX5_SET(destroy_flow_table_in, din, table_id, *obj_id); + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DESTROY_FLOW_TABLE); + break; + case MLX5_CMD_OP_CREATE_FLOW_GROUP: + *dinlen = MLX5_ST_SZ_BYTES(destroy_flow_group_in); + *obj_id = MLX5_GET(create_flow_group_out, out, group_id); + MLX5_SET(destroy_flow_group_in, din, other_vport, + MLX5_GET(create_flow_group_in, in, other_vport)); + MLX5_SET(destroy_flow_group_in, din, vport_number, + MLX5_GET(create_flow_group_in, in, vport_number)); + MLX5_SET(destroy_flow_group_in, din, table_type, + MLX5_GET(create_flow_group_in, in, table_type)); + MLX5_SET(destroy_flow_group_in, din, table_id, + MLX5_GET(create_flow_group_in, in, table_id)); + MLX5_SET(destroy_flow_group_in, din, group_id, *obj_id); + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DESTROY_FLOW_GROUP); + break; + case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: + *dinlen = MLX5_ST_SZ_BYTES(delete_fte_in); + *obj_id = MLX5_GET(set_fte_in, in, flow_index); + MLX5_SET(delete_fte_in, din, other_vport, + MLX5_GET(set_fte_in, in, other_vport)); + MLX5_SET(delete_fte_in, din, vport_number, + MLX5_GET(set_fte_in, in, vport_number)); + MLX5_SET(delete_fte_in, din, table_type, + MLX5_GET(set_fte_in, in, table_type)); + MLX5_SET(delete_fte_in, din, table_id, + MLX5_GET(set_fte_in, in, table_id)); + MLX5_SET(delete_fte_in, din, flow_index, *obj_id); + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY); + break; + case MLX5_CMD_OP_ALLOC_FLOW_COUNTER: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DEALLOC_FLOW_COUNTER); + break; + case MLX5_CMD_OP_ALLOC_ENCAP_HEADER: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DEALLOC_ENCAP_HEADER); + break; + case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT); + break; + case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: + *dinlen = MLX5_ST_SZ_BYTES(destroy_scheduling_element_in); + *obj_id = MLX5_GET(create_scheduling_element_out, out, + scheduling_element_id); + MLX5_SET(destroy_scheduling_element_in, din, + scheduling_hierarchy, + MLX5_GET(create_scheduling_element_in, in, + scheduling_hierarchy)); + MLX5_SET(destroy_scheduling_element_in, din, + scheduling_element_id, *obj_id); + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT); + break; + case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: + *dinlen = MLX5_ST_SZ_BYTES(delete_vxlan_udp_dport_in); + *obj_id = MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port); + MLX5_SET(delete_vxlan_udp_dport_in, din, vxlan_udp_port, *obj_id); + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT); + break; + case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: + *dinlen = MLX5_ST_SZ_BYTES(delete_l2_table_entry_in); + *obj_id = MLX5_GET(set_l2_table_entry_in, in, table_index); + MLX5_SET(delete_l2_table_entry_in, din, table_index, *obj_id); + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY); + break; + case MLX5_CMD_OP_CREATE_QP: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_QP); + break; + case MLX5_CMD_OP_CREATE_SRQ: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SRQ); + break; + case MLX5_CMD_OP_CREATE_XRC_SRQ: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DESTROY_XRC_SRQ); + break; + case MLX5_CMD_OP_CREATE_DCT: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_DCT); + break; + case MLX5_CMD_OP_CREATE_XRQ: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_XRQ); + break; + case MLX5_CMD_OP_ATTACH_TO_MCG: + *dinlen = MLX5_ST_SZ_BYTES(detach_from_mcg_in); + MLX5_SET(detach_from_mcg_in, din, qpn, + MLX5_GET(attach_to_mcg_in, in, qpn)); + memcpy(MLX5_ADDR_OF(detach_from_mcg_in, din, multicast_gid), + MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid), + MLX5_FLD_SZ_BYTES(attach_to_mcg_in, multicast_gid)); + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DETACH_FROM_MCG); + break; + case MLX5_CMD_OP_ALLOC_XRCD: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_XRCD); + break; + default: + /* The entry must match to one of the devx_is_obj_create_cmd */ + WARN_ON(true); + break; + } +} + +static int devx_obj_cleanup(struct ib_uobject *uobject, + enum rdma_remove_reason why) +{ + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + struct devx_obj *obj = uobject->object; + int ret; + + ret = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); + if (ib_is_destroy_retryable(ret, why, uobject)) + return ret; + + kfree(obj); + return ret; +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) +{ + void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN); + int cmd_out_len = uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT); + void *cmd_out; + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE); + struct mlx5_ib_ucontext *c = to_mucontext(uobj->context); + struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device); + struct devx_obj *obj; + int err; + + if (!c->devx_uid) + return -EPERM; + + if (!devx_is_obj_create_cmd(cmd_in)) + return -EINVAL; + + cmd_out = uverbs_zalloc(attrs, cmd_out_len); + if (IS_ERR(cmd_out)) + return PTR_ERR(cmd_out); + + obj = kzalloc(sizeof(struct devx_obj), GFP_KERNEL); + if (!obj) + return -ENOMEM; + + MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid); + err = mlx5_cmd_exec(dev->mdev, cmd_in, + uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN), + cmd_out, cmd_out_len); + if (err) + goto obj_free; + + uobj->object = obj; + obj->mdev = dev->mdev; + devx_obj_build_destroy_cmd(cmd_in, cmd_out, obj->dinbox, &obj->dinlen, &obj->obj_id); + WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32)); + + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len); + if (err) + goto obj_free; + + return 0; + +obj_free: + kfree(obj); + return err; +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) +{ + void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN); + int cmd_out_len = uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT); + struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, + MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE); + struct mlx5_ib_ucontext *c = to_mucontext(uobj->context); + struct devx_obj *obj = uobj->object; + void *cmd_out; + int err; + + if (!c->devx_uid) + return -EPERM; + + if (!devx_is_obj_modify_cmd(cmd_in)) + return -EINVAL; + + if (!devx_is_valid_obj_id(obj, cmd_in)) + return -EINVAL; + + cmd_out = uverbs_zalloc(attrs, cmd_out_len); + if (IS_ERR(cmd_out)) + return PTR_ERR(cmd_out); + + MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid); + err = mlx5_cmd_exec(obj->mdev, cmd_in, + uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN), + cmd_out, cmd_out_len); + if (err) + return err; + + return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT, + cmd_out, cmd_out_len); +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) +{ + void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN); + int cmd_out_len = uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT); + struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, + MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE); + struct mlx5_ib_ucontext *c = to_mucontext(uobj->context); + struct devx_obj *obj = uobj->object; + void *cmd_out; + int err; + + if (!c->devx_uid) + return -EPERM; + + if (!devx_is_obj_query_cmd(cmd_in)) + return -EINVAL; + + if (!devx_is_valid_obj_id(obj, cmd_in)) + return -EINVAL; + + cmd_out = uverbs_zalloc(attrs, cmd_out_len); + if (IS_ERR(cmd_out)) + return PTR_ERR(cmd_out); + + MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid); + err = mlx5_cmd_exec(obj->mdev, cmd_in, + uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN), + cmd_out, cmd_out_len); + if (err) + return err; + + return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT, + cmd_out, cmd_out_len); +} + +static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, + struct uverbs_attr_bundle *attrs, + struct devx_umem *obj) +{ + u64 addr; + size_t size; + u32 access; + int npages; + int err; + u32 page_mask; + + if (uverbs_copy_from(&addr, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR) || + uverbs_copy_from(&size, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_LEN)) + return -EFAULT; + + err = uverbs_get_flags32(&access, attrs, + MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS, + IB_ACCESS_SUPPORTED); + if (err) + return err; + + err = ib_check_mr_access(access); + if (err) + return err; + + obj->umem = ib_umem_get(ucontext, addr, size, access, 0); + if (IS_ERR(obj->umem)) + return PTR_ERR(obj->umem); + + mlx5_ib_cont_pages(obj->umem, obj->umem->address, + MLX5_MKEY_PAGE_SHIFT_MASK, &npages, + &obj->page_shift, &obj->ncont, NULL); + + if (!npages) { + ib_umem_release(obj->umem); + return -EINVAL; + } + + page_mask = (1 << obj->page_shift) - 1; + obj->page_offset = obj->umem->address & page_mask; + + return 0; +} + +static int devx_umem_reg_cmd_alloc(struct uverbs_attr_bundle *attrs, + struct devx_umem *obj, + struct devx_umem_reg_cmd *cmd) +{ + cmd->inlen = MLX5_ST_SZ_BYTES(create_umem_in) + + (MLX5_ST_SZ_BYTES(mtt) * obj->ncont); + cmd->in = uverbs_zalloc(attrs, cmd->inlen); + return PTR_ERR_OR_ZERO(cmd->in); +} + +static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev, + struct devx_umem *obj, + struct devx_umem_reg_cmd *cmd) +{ + void *umem; + __be64 *mtt; + + umem = MLX5_ADDR_OF(create_umem_in, cmd->in, umem); + mtt = (__be64 *)MLX5_ADDR_OF(umem, umem, mtt); + + MLX5_SET(general_obj_in_cmd_hdr, cmd->in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, cmd->in, obj_type, MLX5_OBJ_TYPE_UMEM); + MLX5_SET64(umem, umem, num_of_mtt, obj->ncont); + MLX5_SET(umem, umem, log_page_size, obj->page_shift - + MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(umem, umem, page_offset, obj->page_offset); + mlx5_ib_populate_pas(dev, obj->umem, obj->page_shift, mtt, + (obj->umem->writable ? MLX5_IB_MTT_WRITE : 0) | + MLX5_IB_MTT_READ); +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) +{ + struct devx_umem_reg_cmd cmd; + struct devx_umem *obj; + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE); + u32 obj_id; + struct mlx5_ib_ucontext *c = to_mucontext(uobj->context); + struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device); + int err; + + if (!c->devx_uid) + return -EPERM; + + obj = kzalloc(sizeof(struct devx_umem), GFP_KERNEL); + if (!obj) + return -ENOMEM; + + err = devx_umem_get(dev, &c->ibucontext, attrs, obj); + if (err) + goto err_obj_free; + + err = devx_umem_reg_cmd_alloc(attrs, obj, &cmd); + if (err) + goto err_umem_release; + + devx_umem_reg_cmd_build(dev, obj, &cmd); + + MLX5_SET(general_obj_in_cmd_hdr, cmd.in, uid, c->devx_uid); + err = mlx5_cmd_exec(dev->mdev, cmd.in, cmd.inlen, cmd.out, + sizeof(cmd.out)); + if (err) + goto err_umem_release; + + obj->mdev = dev->mdev; + uobj->object = obj; + devx_obj_build_destroy_cmd(cmd.in, cmd.out, obj->dinbox, &obj->dinlen, &obj_id); + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, &obj_id, sizeof(obj_id)); + if (err) + goto err_umem_destroy; + + return 0; + +err_umem_destroy: + mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, cmd.out, sizeof(cmd.out)); +err_umem_release: + ib_umem_release(obj->umem); +err_obj_free: + kfree(obj); + return err; +} + +static int devx_umem_cleanup(struct ib_uobject *uobject, + enum rdma_remove_reason why) +{ + struct devx_umem *obj = uobject->object; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + int err; + + err = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); + if (ib_is_destroy_retryable(err, why, uobject)) + return err; + + ib_umem_release(obj->umem); + kfree(obj); + return 0; +} + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_UMEM_REG, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE, + MLX5_IB_OBJECT_DEVX_UMEM, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_LEN, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS, + enum ib_access_flags), + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + MLX5_IB_METHOD_DEVX_UMEM_DEREG, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_DEREG_HANDLE, + MLX5_IB_OBJECT_DEVX_UMEM, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_QUERY_EQN, + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_QUERY_UAR, + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_OTHER, + UVERBS_ATTR_PTR_IN( + MLX5_IB_ATTR_DEVX_OTHER_CMD_IN, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), + UA_MANDATORY, + UA_ALLOC_AND_COPY), + UVERBS_ATTR_PTR_OUT( + MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_OBJ_CREATE, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE, + MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN( + MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), + UA_MANDATORY, + UA_ALLOC_AND_COPY), + UVERBS_ATTR_PTR_OUT( + MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + MLX5_IB_METHOD_DEVX_OBJ_DESTROY, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE, + MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_OBJ_MODIFY, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE, + MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_ACCESS_WRITE, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN( + MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), + UA_MANDATORY, + UA_ALLOC_AND_COPY), + UVERBS_ATTR_PTR_OUT( + MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_DEVX_OBJ_QUERY, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE, + MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN( + MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)), + UA_MANDATORY, + UA_ALLOC_AND_COPY), + UVERBS_ATTR_PTR_OUT( + MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT, + UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)), + UA_MANDATORY)); + +DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX, + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN)); + +DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_TYPE_ALLOC_IDR(devx_obj_cleanup), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY)); + +DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM, + UVERBS_TYPE_ALLOC_IDR(devx_umem_cleanup), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG), + &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG)); + +DECLARE_UVERBS_OBJECT_TREE(devx_objects, + &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX), + &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ), + &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM)); + +const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void) +{ + return &devx_objects; +} diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c new file mode 100644 index 000000000000..1a29f47f836e --- /dev/null +++ b/drivers/infiniband/hw/mlx5/flow.c @@ -0,0 +1,252 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + */ + +#include <rdma/ib_user_verbs.h> +#include <rdma/ib_verbs.h> +#include <rdma/uverbs_types.h> +#include <rdma/uverbs_ioctl.h> +#include <rdma/mlx5_user_ioctl_cmds.h> +#include <rdma/ib_umem.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/fs.h> +#include "mlx5_ib.h" + +#define UVERBS_MODULE_NAME mlx5_ib +#include <rdma/uverbs_named_ioctl.h> + +static const struct uverbs_attr_spec mlx5_ib_flow_type[] = { + [MLX5_IB_FLOW_TYPE_NORMAL] = { + .type = UVERBS_ATTR_TYPE_PTR_IN, + .u.ptr = { + .len = sizeof(u16), /* data is priority */ + .min_len = sizeof(u16), + } + }, + [MLX5_IB_FLOW_TYPE_SNIFFER] = { + .type = UVERBS_ATTR_TYPE_PTR_IN, + UVERBS_ATTR_NO_DATA(), + }, + [MLX5_IB_FLOW_TYPE_ALL_DEFAULT] = { + .type = UVERBS_ATTR_TYPE_PTR_IN, + UVERBS_ATTR_NO_DATA(), + }, + [MLX5_IB_FLOW_TYPE_MC_DEFAULT] = { + .type = UVERBS_ATTR_TYPE_PTR_IN, + UVERBS_ATTR_NO_DATA(), + }, +}; + +static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_flow_handler *flow_handler; + struct mlx5_ib_flow_matcher *fs_matcher; + void *devx_obj; + int dest_id, dest_type; + void *cmd_in; + int inlen; + bool dest_devx, dest_qp; + struct ib_qp *qp = NULL; + struct ib_uobject *uobj = + uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE); + struct mlx5_ib_dev *dev = to_mdev(uobj->context->device); + + if (!capable(CAP_NET_RAW)) + return -EPERM; + + dest_devx = + uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX); + dest_qp = uverbs_attr_is_valid(attrs, + MLX5_IB_ATTR_CREATE_FLOW_DEST_QP); + + if ((dest_devx && dest_qp) || (!dest_devx && !dest_qp)) + return -EINVAL; + + if (dest_devx) { + devx_obj = uverbs_attr_get_obj( + attrs, MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX); + if (IS_ERR(devx_obj)) + return PTR_ERR(devx_obj); + + /* Verify that the given DEVX object is a flow + * steering destination. + */ + if (!mlx5_ib_devx_is_flow_dest(devx_obj, &dest_id, &dest_type)) + return -EINVAL; + } else { + struct mlx5_ib_qp *mqp; + + qp = uverbs_attr_get_obj(attrs, + MLX5_IB_ATTR_CREATE_FLOW_DEST_QP); + if (IS_ERR(qp)) + return PTR_ERR(qp); + + if (qp->qp_type != IB_QPT_RAW_PACKET) + return -EINVAL; + + mqp = to_mqp(qp); + if (mqp->flags & MLX5_IB_QP_RSS) + dest_id = mqp->rss_qp.tirn; + else + dest_id = mqp->raw_packet_qp.rq.tirn; + dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR; + } + + if (dev->rep) + return -ENOTSUPP; + + cmd_in = uverbs_attr_get_alloced_ptr( + attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE); + inlen = uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE); + fs_matcher = uverbs_attr_get_obj(attrs, + MLX5_IB_ATTR_CREATE_FLOW_MATCHER); + flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher, cmd_in, inlen, + dest_id, dest_type); + if (IS_ERR(flow_handler)) + return PTR_ERR(flow_handler); + + ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev); + + return 0; +} + +static int flow_matcher_cleanup(struct ib_uobject *uobject, + enum rdma_remove_reason why) +{ + struct mlx5_ib_flow_matcher *obj = uobject->object; + int ret; + + ret = ib_destroy_usecnt(&obj->usecnt, why, uobject); + if (ret) + return ret; + + kfree(obj); + return 0; +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)( + struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE); + struct mlx5_ib_dev *dev = to_mdev(uobj->context->device); + struct mlx5_ib_flow_matcher *obj; + int err; + + obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL); + if (!obj) + return -ENOMEM; + + obj->mask_len = uverbs_attr_get_len( + attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK); + err = uverbs_copy_from(&obj->matcher_mask, + attrs, + MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK); + if (err) + goto end; + + obj->flow_type = uverbs_attr_get_enum_id( + attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE); + + if (obj->flow_type == MLX5_IB_FLOW_TYPE_NORMAL) { + err = uverbs_copy_from(&obj->priority, + attrs, + MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE); + if (err) + goto end; + } + + err = uverbs_copy_from(&obj->match_criteria_enable, + attrs, + MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA); + if (err) + goto end; + + uobj->object = obj; + obj->mdev = dev->mdev; + atomic_set(&obj->usecnt, 0); + return 0; + +end: + kfree(obj); + return err; +} + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_CREATE_FLOW, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE, + UVERBS_OBJECT_FLOW, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN( + MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE, + UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)), + UA_MANDATORY, + UA_ALLOC_AND_COPY), + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_MATCHER, + MLX5_IB_OBJECT_FLOW_MATCHER, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_QP, + UVERBS_OBJECT_QP, + UVERBS_ACCESS_READ), + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX, + MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_ACCESS_READ)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + MLX5_IB_METHOD_DESTROY_FLOW, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE, + UVERBS_OBJECT_FLOW, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +ADD_UVERBS_METHODS(mlx5_ib_fs, + UVERBS_OBJECT_FLOW, + &UVERBS_METHOD(MLX5_IB_METHOD_CREATE_FLOW), + &UVERBS_METHOD(MLX5_IB_METHOD_DESTROY_FLOW)); + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_FLOW_MATCHER_CREATE, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE, + MLX5_IB_OBJECT_FLOW_MATCHER, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN( + MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK, + UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)), + UA_MANDATORY), + UVERBS_ATTR_ENUM_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE, + mlx5_ib_flow_type, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA, + UVERBS_ATTR_TYPE(u8), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + MLX5_IB_METHOD_FLOW_MATCHER_DESTROY, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE, + MLX5_IB_OBJECT_FLOW_MATCHER, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER, + UVERBS_TYPE_ALLOC_IDR(flow_matcher_cleanup), + &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE), + &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY)); + +DECLARE_UVERBS_OBJECT_TREE(flow_objects, + &UVERBS_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER)); + +int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root) +{ + int i = 0; + + root[i++] = &flow_objects; + root[i++] = &mlx5_ib_fs; + + return i; +} diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c index 79e6309460dc..4950df3f71b6 100644 --- a/drivers/infiniband/hw/mlx5/gsi.c +++ b/drivers/infiniband/hw/mlx5/gsi.c @@ -477,8 +477,8 @@ static struct ib_qp *get_tx_qp(struct mlx5_ib_gsi_qp *gsi, struct ib_ud_wr *wr) return gsi->tx_qps[qp_index]; } -int mlx5_ib_gsi_post_send(struct ib_qp *qp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); struct ib_qp *tx_qp; @@ -522,8 +522,8 @@ err: return ret; } -int mlx5_ib_gsi_post_recv(struct ib_qp *qp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int mlx5_ib_gsi_post_recv(struct ib_qp *qp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index b3ba9a222550..c414f3809e5c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -419,8 +419,8 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, translate_eth_proto_oper(eth_prot_oper, &props->active_speed, &props->active_width); - props->port_cap_flags |= IB_PORT_CM_SUP; - props->port_cap_flags |= IB_PORT_IP_BASED_GIDS; + props->port_cap_flags |= IB_PORT_CM_SUP; + props->ip_gids = true; props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev, roce_address_table_size); @@ -510,12 +510,11 @@ static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num, vlan_id, port_num); } -static int mlx5_ib_add_gid(const union ib_gid *gid, - const struct ib_gid_attr *attr, +static int mlx5_ib_add_gid(const struct ib_gid_attr *attr, __always_unused void **context) { return set_roce_addr(to_mdev(attr->device), attr->port_num, - attr->index, gid, attr); + attr->index, &attr->gid, attr); } static int mlx5_ib_del_gid(const struct ib_gid_attr *attr, @@ -525,41 +524,15 @@ static int mlx5_ib_del_gid(const struct ib_gid_attr *attr, attr->index, NULL, NULL); } -__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, - int index) +__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, + const struct ib_gid_attr *attr) { - struct ib_gid_attr attr; - union ib_gid gid; - - if (ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr)) - return 0; - - dev_put(attr.ndev); - - if (attr.gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP) + if (attr->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP) return 0; return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port)); } -int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num, - int index, enum ib_gid_type *gid_type) -{ - struct ib_gid_attr attr; - union ib_gid gid; - int ret; - - ret = ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr); - if (ret) - return ret; - - dev_put(attr.ndev); - - *gid_type = attr.gid_type; - - return 0; -} - static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev) { if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB) @@ -915,7 +888,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, max_sq_sg = (max_sq_desc - sizeof(struct mlx5_wqe_ctrl_seg) - sizeof(struct mlx5_wqe_raddr_seg)) / sizeof(struct mlx5_wqe_data_seg); - props->max_sge = min(max_rq_sg, max_sq_sg); + props->max_send_sge = max_sq_sg; + props->max_recv_sge = max_rq_sg; props->max_sge_rd = MLX5_MAX_SGE_RD; props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq); props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1; @@ -1246,7 +1220,6 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port, props->qkey_viol_cntr = rep->qkey_violation_counter; props->subnet_timeout = rep->subnet_timeout; props->init_type_reply = rep->init_type_reply; - props->grh_required = rep->grh_required; err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port); if (err) @@ -1585,31 +1558,26 @@ error: return err; } -static int deallocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context) +static void deallocate_uars(struct mlx5_ib_dev *dev, + struct mlx5_ib_ucontext *context) { struct mlx5_bfreg_info *bfregi; - int err; int i; bfregi = &context->bfregi; - for (i = 0; i < bfregi->num_sys_pages; i++) { + for (i = 0; i < bfregi->num_sys_pages; i++) if (i < bfregi->num_static_sys_pages || - bfregi->sys_pages[i] != MLX5_IB_INVALID_UAR_INDEX) { - err = mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]); - if (err) { - mlx5_ib_warn(dev, "failed to free uar %d, err=%d\n", i, err); - return err; - } - } - } - - return 0; + bfregi->sys_pages[i] != MLX5_IB_INVALID_UAR_INDEX) + mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]); } static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn) { int err; + if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) + return 0; + err = mlx5_core_alloc_transport_domain(dev->mdev, tdn); if (err) return err; @@ -1631,6 +1599,9 @@ static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn) static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn) { + if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) + return; + mlx5_core_dealloc_transport_domain(dev->mdev, tdn); if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) || @@ -1660,6 +1631,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, int err; size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2, max_cqe_version); + u32 dump_fill_mkey; bool lib_uar_4k; if (!dev->ib_active) @@ -1676,8 +1648,8 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, if (err) return ERR_PTR(err); - if (req.flags) - return ERR_PTR(-EINVAL); + if (req.flags & ~MLX5_IB_ALLOC_UCTX_DEVX) + return ERR_PTR(-EOPNOTSUPP); if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2) return ERR_PTR(-EOPNOTSUPP); @@ -1755,10 +1727,26 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range; #endif - if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) { - err = mlx5_ib_alloc_transport_domain(dev, &context->tdn); + err = mlx5_ib_alloc_transport_domain(dev, &context->tdn); + if (err) + goto out_uars; + + if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) { + /* Block DEVX on Infiniband as of SELinux */ + if (mlx5_ib_port_link_layer(ibdev, 1) != IB_LINK_LAYER_ETHERNET) { + err = -EPERM; + goto out_td; + } + + err = mlx5_ib_devx_create(dev, context); if (err) - goto out_uars; + goto out_td; + } + + if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) { + err = mlx5_cmd_dump_fill_mkey(dev->mdev, &dump_fill_mkey); + if (err) + goto out_mdev; } INIT_LIST_HEAD(&context->vma_private_list); @@ -1819,9 +1807,18 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, resp.response_length += sizeof(resp.num_dyn_bfregs); } + if (field_avail(typeof(resp), dump_fill_mkey, udata->outlen)) { + if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) { + resp.dump_fill_mkey = dump_fill_mkey; + resp.comp_mask |= + MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY; + } + resp.response_length += sizeof(resp.dump_fill_mkey); + } + err = ib_copy_to_udata(udata, &resp, resp.response_length); if (err) - goto out_td; + goto out_mdev; bfregi->ver = ver; bfregi->num_low_latency_bfregs = req.num_low_latency_bfregs; @@ -1831,9 +1828,11 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, return &context->ibucontext; +out_mdev: + if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) + mlx5_ib_devx_destroy(dev, context); out_td: - if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) - mlx5_ib_dealloc_transport_domain(dev, context->tdn); + mlx5_ib_dealloc_transport_domain(dev, context->tdn); out_uars: deallocate_uars(dev, context); @@ -1856,9 +1855,11 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); struct mlx5_bfreg_info *bfregi; + if (context->devx_uid) + mlx5_ib_devx_destroy(dev, context); + bfregi = &context->bfregi; - if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) - mlx5_ib_dealloc_transport_domain(dev, context->tdn); + mlx5_ib_dealloc_transport_domain(dev, context->tdn); deallocate_uars(dev, context); kfree(bfregi->sys_pages); @@ -2040,7 +2041,7 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, struct mlx5_bfreg_info *bfregi = &context->bfregi; int err; unsigned long idx; - phys_addr_t pfn, pa; + phys_addr_t pfn; pgprot_t prot; u32 bfreg_dyn_idx = 0; u32 uar_index; @@ -2131,8 +2132,6 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, goto err; } - pa = pfn << PAGE_SHIFT; - err = mlx5_ib_set_vma_data(vma, context); if (err) goto err; @@ -2699,7 +2698,7 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, IPPROTO_GRE); MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol, - 0xffff); + ntohs(ib_spec->gre.mask.protocol)); MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol, ntohs(ib_spec->gre.val.protocol)); @@ -2979,11 +2978,11 @@ static void counters_clear_description(struct ib_counters *counters) static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) { - struct mlx5_ib_dev *dev = to_mdev(flow_id->qp->device); struct mlx5_ib_flow_handler *handler = container_of(flow_id, struct mlx5_ib_flow_handler, ibflow); struct mlx5_ib_flow_handler *iter, *tmp; + struct mlx5_ib_dev *dev = handler->dev; mutex_lock(&dev->flow_db->lock); @@ -3001,6 +3000,8 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) counters_clear_description(handler->ibcounters); mutex_unlock(&dev->flow_db->lock); + if (handler->flow_matcher) + atomic_dec(&handler->flow_matcher->usecnt); kfree(handler); return 0; @@ -3021,6 +3022,26 @@ enum flow_table_type { #define MLX5_FS_MAX_TYPES 6 #define MLX5_FS_MAX_ENTRIES BIT(16) + +static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns, + struct mlx5_ib_flow_prio *prio, + int priority, + int num_entries, int num_groups) +{ + struct mlx5_flow_table *ft; + + ft = mlx5_create_auto_grouped_flow_table(ns, priority, + num_entries, + num_groups, + 0, 0); + if (IS_ERR(ft)) + return ERR_CAST(ft); + + prio->flow_table = ft; + prio->refcount = 0; + return prio; +} + static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, struct ib_flow_attr *flow_attr, enum flow_table_type ft_type) @@ -3033,7 +3054,6 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, int num_entries; int num_groups; int priority; - int err = 0; max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, log_max_ft_size)); @@ -3083,21 +3103,10 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, return ERR_PTR(-ENOMEM); ft = prio->flow_table; - if (!ft) { - ft = mlx5_create_auto_grouped_flow_table(ns, priority, - num_entries, - num_groups, - 0, 0); - - if (!IS_ERR(ft)) { - prio->refcount = 0; - prio->flow_table = ft; - } else { - err = PTR_ERR(ft); - } - } + if (!ft) + return _get_prio(ns, prio, priority, num_entries, num_groups); - return err ? ERR_PTR(err) : prio; + return prio; } static void set_underlay_qp(struct mlx5_ib_dev *dev, @@ -3356,6 +3365,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, ft_prio->refcount++; handler->prio = ft_prio; + handler->dev = dev; ft_prio->flow_table = ft; free: @@ -3648,6 +3658,189 @@ free_ucmd: return ERR_PTR(err); } +static struct mlx5_ib_flow_prio *_get_flow_table(struct mlx5_ib_dev *dev, + int priority, bool mcast) +{ + int max_table_size; + struct mlx5_flow_namespace *ns = NULL; + struct mlx5_ib_flow_prio *prio; + + max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, + log_max_ft_size)); + if (max_table_size < MLX5_FS_MAX_ENTRIES) + return ERR_PTR(-ENOMEM); + + if (mcast) + priority = MLX5_IB_FLOW_MCAST_PRIO; + else + priority = ib_prio_to_core_prio(priority, false); + + ns = mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS); + if (!ns) + return ERR_PTR(-ENOTSUPP); + + prio = &dev->flow_db->prios[priority]; + + if (prio->flow_table) + return prio; + + return _get_prio(ns, prio, priority, MLX5_FS_MAX_ENTRIES, + MLX5_FS_MAX_TYPES); +} + +static struct mlx5_ib_flow_handler * +_create_raw_flow_rule(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *ft_prio, + struct mlx5_flow_destination *dst, + struct mlx5_ib_flow_matcher *fs_matcher, + void *cmd_in, int inlen) +{ + struct mlx5_ib_flow_handler *handler; + struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG}; + struct mlx5_flow_spec *spec; + struct mlx5_flow_table *ft = ft_prio->flow_table; + int err = 0; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + handler = kzalloc(sizeof(*handler), GFP_KERNEL); + if (!handler || !spec) { + err = -ENOMEM; + goto free; + } + + INIT_LIST_HEAD(&handler->list); + + memcpy(spec->match_value, cmd_in, inlen); + memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params, + fs_matcher->mask_len); + spec->match_criteria_enable = fs_matcher->match_criteria_enable; + + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + handler->rule = mlx5_add_flow_rules(ft, spec, + &flow_act, dst, 1); + + if (IS_ERR(handler->rule)) { + err = PTR_ERR(handler->rule); + goto free; + } + + ft_prio->refcount++; + handler->prio = ft_prio; + handler->dev = dev; + ft_prio->flow_table = ft; + +free: + if (err) + kfree(handler); + kvfree(spec); + return err ? ERR_PTR(err) : handler; +} + +static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher, + void *match_v) +{ + void *match_c; + void *match_v_set_lyr_2_4, *match_c_set_lyr_2_4; + void *dmac, *dmac_mask; + void *ipv4, *ipv4_mask; + + if (!(fs_matcher->match_criteria_enable & + (1 << MATCH_CRITERIA_ENABLE_OUTER_BIT))) + return false; + + match_c = fs_matcher->matcher_mask.match_params; + match_v_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_v, + outer_headers); + match_c_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_c, + outer_headers); + + dmac = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4, + dmac_47_16); + dmac_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4, + dmac_47_16); + + if (is_multicast_ether_addr(dmac) && + is_multicast_ether_addr(dmac_mask)) + return true; + + ipv4 = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + + ipv4_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + + if (ipv4_is_multicast(*(__be32 *)(ipv4)) && + ipv4_is_multicast(*(__be32 *)(ipv4_mask))) + return true; + + return false; +} + +struct mlx5_ib_flow_handler * +mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_matcher *fs_matcher, + void *cmd_in, int inlen, int dest_id, + int dest_type) +{ + struct mlx5_flow_destination *dst; + struct mlx5_ib_flow_prio *ft_prio; + int priority = fs_matcher->priority; + struct mlx5_ib_flow_handler *handler; + bool mcast; + int err; + + if (fs_matcher->flow_type != MLX5_IB_FLOW_TYPE_NORMAL) + return ERR_PTR(-EOPNOTSUPP); + + if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO) + return ERR_PTR(-ENOMEM); + + dst = kzalloc(sizeof(*dst), GFP_KERNEL); + if (!dst) + return ERR_PTR(-ENOMEM); + + mcast = raw_fs_is_multicast(fs_matcher, cmd_in); + mutex_lock(&dev->flow_db->lock); + + ft_prio = _get_flow_table(dev, priority, mcast); + if (IS_ERR(ft_prio)) { + err = PTR_ERR(ft_prio); + goto unlock; + } + + if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) { + dst->type = dest_type; + dst->tir_num = dest_id; + } else { + dst->type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM; + dst->ft_num = dest_id; + } + + handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, cmd_in, + inlen); + + if (IS_ERR(handler)) { + err = PTR_ERR(handler); + goto destroy_ft; + } + + mutex_unlock(&dev->flow_db->lock); + atomic_inc(&fs_matcher->usecnt); + handler->flow_matcher = fs_matcher; + + kfree(dst); + + return handler; + +destroy_ft: + put_flow_table(dev, ft_prio, false); +unlock: + mutex_unlock(&dev->flow_db->lock); + kfree(dst); + + return ERR_PTR(err); +} + static u32 mlx5_ib_flow_action_flags_to_accel_xfrm_flags(u32 mlx5_flags) { u32 flags = 0; @@ -3672,12 +3865,11 @@ mlx5_ib_create_flow_action_esp(struct ib_device *device, u64 flags; int err = 0; - if (IS_UVERBS_COPY_ERR(uverbs_copy_from(&action_flags, attrs, - MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS))) - return ERR_PTR(-EFAULT); - - if (action_flags >= (MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1)) - return ERR_PTR(-EOPNOTSUPP); + err = uverbs_get_flags64( + &action_flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, + ((MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1) - 1)); + if (err) + return ERR_PTR(err); flags = mlx5_ib_flow_action_flags_to_accel_xfrm_flags(action_flags); @@ -4466,7 +4658,8 @@ static void destroy_dev_resources(struct mlx5_ib_resources *devr) cancel_work_sync(&devr->ports[port].pkey_change_work); } -static u32 get_core_cap_flags(struct ib_device *ibdev) +static u32 get_core_cap_flags(struct ib_device *ibdev, + struct mlx5_hca_vport_context *rep) { struct mlx5_ib_dev *dev = to_mdev(ibdev); enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1); @@ -4475,11 +4668,14 @@ static u32 get_core_cap_flags(struct ib_device *ibdev) bool raw_support = !mlx5_core_mp_enabled(dev->mdev); u32 ret = 0; + if (rep->grh_required) + ret |= RDMA_CORE_CAP_IB_GRH_REQUIRED; + if (ll == IB_LINK_LAYER_INFINIBAND) - return RDMA_CORE_PORT_IBA_IB; + return ret | RDMA_CORE_PORT_IBA_IB; if (raw_support) - ret = RDMA_CORE_PORT_RAW_PACKET; + ret |= RDMA_CORE_PORT_RAW_PACKET; if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP)) return ret; @@ -4502,17 +4698,23 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_attr attr; struct mlx5_ib_dev *dev = to_mdev(ibdev); enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num); + struct mlx5_hca_vport_context rep = {0}; int err; - immutable->core_cap_flags = get_core_cap_flags(ibdev); - err = ib_query_port(ibdev, port_num, &attr); if (err) return err; + if (ll == IB_LINK_LAYER_INFINIBAND) { + err = mlx5_query_hca_vport_context(dev->mdev, 0, port_num, 0, + &rep); + if (err) + return err; + } + immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; - immutable->core_cap_flags = get_core_cap_flags(ibdev); + immutable->core_cap_flags = get_core_cap_flags(ibdev, &rep); if ((ll == IB_LINK_LAYER_INFINIBAND) || MLX5_CAP_GEN(dev->mdev, roce)) immutable->max_mad_size = IB_MGMT_MAD_SIZE; @@ -4610,7 +4812,7 @@ static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num) } } -static int mlx5_enable_eth(struct mlx5_ib_dev *dev, u8 port_num) +static int mlx5_enable_eth(struct mlx5_ib_dev *dev) { int err; @@ -4689,12 +4891,21 @@ static const struct mlx5_ib_counter extended_err_cnts[] = { INIT_Q_COUNTER(req_cqe_flush_error), }; +#define INIT_EXT_PPCNT_COUNTER(_name) \ + { .name = #_name, .offset = \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_extended_cntrs_grp_data_layout._name##_high)} + +static const struct mlx5_ib_counter ext_ppcnt_cnts[] = { + INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated), +}; + static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) { int i; for (i = 0; i < dev->num_ports; i++) { - if (dev->port[i].cnts.set_id) + if (dev->port[i].cnts.set_id_valid) mlx5_core_dealloc_q_counter(dev->mdev, dev->port[i].cnts.set_id); kfree(dev->port[i].cnts.names); @@ -4724,7 +4935,10 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, cnts->num_cong_counters = ARRAY_SIZE(cong_cnts); num_counters += ARRAY_SIZE(cong_cnts); } - + if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { + cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts); + num_counters += ARRAY_SIZE(ext_ppcnt_cnts); + } cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL); if (!cnts->names) return -ENOMEM; @@ -4781,6 +4995,13 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, offsets[j] = cong_cnts[i].offset; } } + + if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { + for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) { + names[j] = ext_ppcnt_cnts[i].name; + offsets[j] = ext_ppcnt_cnts[i].offset; + } + } } static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev) @@ -4826,7 +5047,8 @@ static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev, return rdma_alloc_hw_stats_struct(port->cnts.names, port->cnts.num_q_counters + - port->cnts.num_cong_counters, + port->cnts.num_cong_counters + + port->cnts.num_ext_ppcnt_counters, RDMA_HW_STATS_DEFAULT_LIFESPAN); } @@ -4859,6 +5081,34 @@ free: return ret; } +static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev, + struct mlx5_ib_port *port, + struct rdma_hw_stats *stats) +{ + int offset = port->cnts.num_q_counters + port->cnts.num_cong_counters; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + int ret, i; + void *out; + + out = kvzalloc(sz, GFP_KERNEL); + if (!out) + return -ENOMEM; + + ret = mlx5_cmd_query_ext_ppcnt_counters(dev->mdev, out); + if (ret) + goto free; + + for (i = 0; i < port->cnts.num_ext_ppcnt_counters; i++) { + stats->value[i + offset] = + be64_to_cpup((__be64 *)(out + + port->cnts.offsets[i + offset])); + } + +free: + kvfree(out); + return ret; +} + static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, u8 port_num, int index) @@ -4872,13 +5122,21 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, if (!stats) return -EINVAL; - num_counters = port->cnts.num_q_counters + port->cnts.num_cong_counters; + num_counters = port->cnts.num_q_counters + + port->cnts.num_cong_counters + + port->cnts.num_ext_ppcnt_counters; /* q_counters are per IB device, query the master mdev */ ret = mlx5_ib_query_q_counters(dev->mdev, port, stats); if (ret) return ret; + if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { + ret = mlx5_ib_query_ext_ppcnt_counters(dev, port, stats); + if (ret) + return ret; + } + if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { mdev = mlx5_ib_get_native_port_mdev(dev, port_num, &mdev_port_num); @@ -4905,11 +5163,6 @@ done: return num_counters; } -static void mlx5_ib_free_rdma_netdev(struct net_device *netdev) -{ - return mlx5_rdma_netdev_free(netdev); -} - static struct net_device* mlx5_ib_alloc_rdma_netdev(struct ib_device *hca, u8 port_num, @@ -4919,17 +5172,12 @@ mlx5_ib_alloc_rdma_netdev(struct ib_device *hca, void (*setup)(struct net_device *)) { struct net_device *netdev; - struct rdma_netdev *rn; if (type != RDMA_NETDEV_IPOIB) return ERR_PTR(-EOPNOTSUPP); netdev = mlx5_rdma_netdev_alloc(to_mdev(hca)->mdev, hca, name, setup); - if (likely(!IS_ERR_OR_NULL(netdev))) { - rn = netdev_priv(netdev); - rn->free_rdma_netdev = mlx5_ib_free_rdma_netdev; - } return netdev; } @@ -5127,8 +5375,8 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, spin_lock(&ibdev->port[port_num].mp.mpi_lock); if (ibdev->port[port_num].mp.mpi) { - mlx5_ib_warn(ibdev, "port %d already affiliated.\n", - port_num + 1); + mlx5_ib_dbg(ibdev, "port %d already affiliated.\n", + port_num + 1); spin_unlock(&ibdev->port[port_num].mp.mpi_lock); return false; } @@ -5263,45 +5511,47 @@ static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev) mlx5_nic_vport_disable_roce(dev->mdev); } -ADD_UVERBS_ATTRIBUTES_SIMPLE(mlx5_ib_dm, UVERBS_OBJECT_DM, - UVERBS_METHOD_DM_ALLOC, - &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET, - UVERBS_ATTR_TYPE(u64), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), - &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, - UVERBS_ATTR_TYPE(u16), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); - -ADD_UVERBS_ATTRIBUTES_SIMPLE(mlx5_ib_flow_action, UVERBS_OBJECT_FLOW_ACTION, - UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, - &UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, - UVERBS_ATTR_TYPE(u64), - UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); +ADD_UVERBS_ATTRIBUTES_SIMPLE( + mlx5_ib_dm, + UVERBS_OBJECT_DM, + UVERBS_METHOD_DM_ALLOC, + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, + UVERBS_ATTR_TYPE(u16), + UA_MANDATORY)); + +ADD_UVERBS_ATTRIBUTES_SIMPLE( + mlx5_ib_flow_action, + UVERBS_OBJECT_FLOW_ACTION, + UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, + UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, + enum mlx5_ib_uapi_flow_action_flags)); -#define NUM_TREES 2 static int populate_specs_root(struct mlx5_ib_dev *dev) { - const struct uverbs_object_tree_def *default_root[NUM_TREES + 1] = { - uverbs_default_get_objects()}; - size_t num_trees = 1; + const struct uverbs_object_tree_def **trees = dev->driver_trees; + size_t num_trees = 0; - if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_DEVICE && - !WARN_ON(num_trees >= ARRAY_SIZE(default_root))) - default_root[num_trees++] = &mlx5_ib_flow_action; + if (mlx5_accel_ipsec_device_caps(dev->mdev) & + MLX5_ACCEL_IPSEC_CAP_DEVICE) + trees[num_trees++] = &mlx5_ib_flow_action; - if (MLX5_CAP_DEV_MEM(dev->mdev, memic) && - !WARN_ON(num_trees >= ARRAY_SIZE(default_root))) - default_root[num_trees++] = &mlx5_ib_dm; + if (MLX5_CAP_DEV_MEM(dev->mdev, memic)) + trees[num_trees++] = &mlx5_ib_dm; - dev->ib_dev.specs_root = - uverbs_alloc_spec_tree(num_trees, default_root); + if (MLX5_CAP_GEN_64(dev->mdev, general_obj_types) & + MLX5_GENERAL_OBJ_TYPES_CAP_UCTX) + trees[num_trees++] = mlx5_ib_get_devx_tree(); - return PTR_ERR_OR_ZERO(dev->ib_dev.specs_root); -} + num_trees += mlx5_ib_get_flow_trees(trees + num_trees); -static void depopulate_specs_root(struct mlx5_ib_dev *dev) -{ - uverbs_free_spec_tree(dev->ib_dev.specs_root); + WARN_ON(num_trees >= ARRAY_SIZE(dev->driver_trees)); + trees[num_trees] = NULL; + dev->ib_dev.driver_specs = trees; + + return 0; } static int mlx5_ib_read_counters(struct ib_counters *counters, @@ -5552,6 +5802,8 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) dev->ib_dev.modify_qp = mlx5_ib_modify_qp; dev->ib_dev.query_qp = mlx5_ib_query_qp; dev->ib_dev.destroy_qp = mlx5_ib_destroy_qp; + dev->ib_dev.drain_sq = mlx5_ib_drain_sq; + dev->ib_dev.drain_rq = mlx5_ib_drain_rq; dev->ib_dev.post_send = mlx5_ib_post_send; dev->ib_dev.post_recv = mlx5_ib_post_recv; dev->ib_dev.create_cq = mlx5_ib_create_cq; @@ -5649,9 +5901,9 @@ int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev) return 0; } -static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev, - u8 port_num) +static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev) { + u8 port_num; int i; for (i = 0; i < dev->num_ports; i++) { @@ -5674,6 +5926,8 @@ static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev, (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); + port_num = mlx5_core_native_port_num(dev->mdev) - 1; + return mlx5_add_netdev_notifier(dev, port_num); } @@ -5690,14 +5944,12 @@ int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev) enum rdma_link_layer ll; int port_type_cap; int err = 0; - u8 port_num; - port_num = mlx5_core_native_port_num(dev->mdev) - 1; port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); if (ll == IB_LINK_LAYER_ETHERNET) - err = mlx5_ib_stage_common_roce_init(dev, port_num); + err = mlx5_ib_stage_common_roce_init(dev); return err; } @@ -5712,19 +5964,17 @@ static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev) struct mlx5_core_dev *mdev = dev->mdev; enum rdma_link_layer ll; int port_type_cap; - u8 port_num; int err; - port_num = mlx5_core_native_port_num(dev->mdev) - 1; port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); if (ll == IB_LINK_LAYER_ETHERNET) { - err = mlx5_ib_stage_common_roce_init(dev, port_num); + err = mlx5_ib_stage_common_roce_init(dev); if (err) return err; - err = mlx5_enable_eth(dev, port_num); + err = mlx5_enable_eth(dev); if (err) goto cleanup; } @@ -5741,9 +5991,7 @@ static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev) struct mlx5_core_dev *mdev = dev->mdev; enum rdma_link_layer ll; int port_type_cap; - u8 port_num; - port_num = mlx5_core_native_port_num(dev->mdev) - 1; port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); @@ -5842,11 +6090,6 @@ int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) return ib_register_device(&dev->ib_dev, NULL); } -static void mlx5_ib_stage_depopulate_specs(struct mlx5_ib_dev *dev) -{ - depopulate_specs_root(dev); -} - void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev) { destroy_umrc_res(dev); @@ -5915,8 +6158,6 @@ void __mlx5_ib_remove(struct mlx5_ib_dev *dev, ib_dealloc_device((struct ib_device *)dev); } -static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num); - void *__mlx5_ib_add(struct mlx5_ib_dev *dev, const struct mlx5_ib_profile *profile) { @@ -5983,7 +6224,7 @@ static const struct mlx5_ib_profile pf_profile = { mlx5_ib_stage_pre_ib_reg_umr_cleanup), STAGE_CREATE(MLX5_IB_STAGE_SPECS, mlx5_ib_stage_populate_specs, - mlx5_ib_stage_depopulate_specs), + NULL), STAGE_CREATE(MLX5_IB_STAGE_IB_REG, mlx5_ib_stage_ib_reg_init, mlx5_ib_stage_ib_reg_cleanup), @@ -6031,7 +6272,7 @@ static const struct mlx5_ib_profile nic_rep_profile = { mlx5_ib_stage_pre_ib_reg_umr_cleanup), STAGE_CREATE(MLX5_IB_STAGE_SPECS, mlx5_ib_stage_populate_specs, - mlx5_ib_stage_depopulate_specs), + NULL), STAGE_CREATE(MLX5_IB_STAGE_IB_REG, mlx5_ib_stage_ib_reg_init, mlx5_ib_stage_ib_reg_cleanup), @@ -6046,7 +6287,7 @@ static const struct mlx5_ib_profile nic_rep_profile = { mlx5_ib_stage_rep_reg_cleanup), }; -static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num) +static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev) { struct mlx5_ib_multiport_info *mpi; struct mlx5_ib_dev *dev; @@ -6080,8 +6321,6 @@ static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num) if (!bound) { list_add_tail(&mpi->list, &mlx5_ib_unaffiliated_port_list); dev_dbg(&mdev->pdev->dev, "no suitable IB device found to bind to, added to unaffiliated list.\n"); - } else { - mlx5_ib_dbg(dev, "bound port %u\n", port_num + 1); } mutex_unlock(&mlx5_ib_multiport_mutex); @@ -6099,11 +6338,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); - if (mlx5_core_is_mp_slave(mdev) && ll == IB_LINK_LAYER_ETHERNET) { - u8 port_num = mlx5_core_native_port_num(mdev) - 1; - - return mlx5_ib_add_slave_port(mdev, port_num); - } + if (mlx5_core_is_mp_slave(mdev) && ll == IB_LINK_LAYER_ETHERNET) + return mlx5_ib_add_slave_port(mdev); dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev)); if (!dev) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index d89c8fe626f6..320d4dfe8c2f 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -46,6 +46,7 @@ #include <rdma/ib_user_verbs.h> #include <rdma/mlx5-abi.h> #include <rdma/uverbs_ioctl.h> +#include <rdma/mlx5_user_ioctl_cmds.h> #define mlx5_ib_dbg(dev, format, arg...) \ pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ @@ -78,12 +79,6 @@ enum { MLX5_REQ_SCAT_DATA64_CQE = 0x22, }; -enum mlx5_ib_latency_class { - MLX5_IB_LATENCY_CLASS_LOW, - MLX5_IB_LATENCY_CLASS_MEDIUM, - MLX5_IB_LATENCY_CLASS_HIGH, -}; - enum mlx5_ib_mad_ifc_flags { MLX5_MAD_IFC_IGNORE_MKEY = 1, MLX5_MAD_IFC_IGNORE_BKEY = 2, @@ -143,6 +138,7 @@ struct mlx5_ib_ucontext { u64 lib_caps; DECLARE_BITMAP(dm_pages, MLX5_MAX_MEMIC_PAGES); + u16 devx_uid; }; static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext) @@ -176,6 +172,18 @@ struct mlx5_ib_flow_handler { struct mlx5_ib_flow_prio *prio; struct mlx5_flow_handle *rule; struct ib_counters *ibcounters; + struct mlx5_ib_dev *dev; + struct mlx5_ib_flow_matcher *flow_matcher; +}; + +struct mlx5_ib_flow_matcher { + struct mlx5_ib_match_params matcher_mask; + int mask_len; + enum mlx5_ib_flow_type flow_type; + u16 priority; + struct mlx5_core_dev *mdev; + atomic_t usecnt; + u8 match_criteria_enable; }; struct mlx5_ib_flow_db { @@ -461,7 +469,7 @@ struct mlx5_umr_wr { u32 mkey; }; -static inline struct mlx5_umr_wr *umr_wr(struct ib_send_wr *wr) +static inline const struct mlx5_umr_wr *umr_wr(const struct ib_send_wr *wr) { return container_of(wr, struct mlx5_umr_wr, wr); } @@ -665,6 +673,7 @@ struct mlx5_ib_counters { size_t *offsets; u32 num_q_counters; u32 num_cong_counters; + u32 num_ext_ppcnt_counters; u16 set_id; bool set_id_valid; }; @@ -851,6 +860,7 @@ to_mcounters(struct ib_counters *ibcntrs) struct mlx5_ib_dev { struct ib_device ib_dev; + const struct uverbs_object_tree_def *driver_trees[6]; struct mlx5_core_dev *mdev; struct mlx5_roce roce[MLX5_MAX_PORTS]; int num_ports; @@ -1004,8 +1014,8 @@ int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr); int mlx5_ib_destroy_srq(struct ib_srq *srq); -int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); +int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata); @@ -1014,10 +1024,12 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); int mlx5_ib_destroy_qp(struct ib_qp *qp); -int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr); -int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); +void mlx5_ib_drain_sq(struct ib_qp *qp); +void mlx5_ib_drain_rq(struct ib_qp *qp); +int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr); +int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n); int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index, void *buffer, u32 length, @@ -1183,10 +1195,8 @@ int mlx5_ib_get_vf_stats(struct ib_device *device, int vf, int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid, int type); -__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, - int index); -int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num, - int index, enum ib_gid_type *gid_type); +__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, + const struct ib_gid_attr *attr); void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num); int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num); @@ -1200,10 +1210,10 @@ int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); -int mlx5_ib_gsi_post_send(struct ib_qp *qp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr); -int mlx5_ib_gsi_post_recv(struct ib_qp *qp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); +int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr); +int mlx5_ib_gsi_post_recv(struct ib_qp *qp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi); int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc); @@ -1217,6 +1227,36 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *dev, void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev, u8 port_num); +#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) +int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, + struct mlx5_ib_ucontext *context); +void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, + struct mlx5_ib_ucontext *context); +const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void); +struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add( + struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher, + void *cmd_in, int inlen, int dest_id, int dest_type); +bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type); +int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root); +#else +static inline int +mlx5_ib_devx_create(struct mlx5_ib_dev *dev, + struct mlx5_ib_ucontext *context) { return -EOPNOTSUPP; }; +static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, + struct mlx5_ib_ucontext *context) {} +static inline const struct uverbs_object_tree_def * +mlx5_ib_get_devx_tree(void) { return NULL; } +static inline bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, + int *dest_type) +{ + return false; +} +static inline int +mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root) +{ + return 0; +} +#endif static inline void init_query_mad(struct ib_smp *mad) { mad->base_version = 1; @@ -1318,4 +1358,7 @@ static inline int get_num_static_uars(struct mlx5_ib_dev *dev, unsigned long mlx5_ib_get_xlt_emergency_page(void); void mlx5_ib_put_xlt_emergency_page(void); +int bfregn_to_uar_index(struct mlx5_ib_dev *dev, + struct mlx5_bfreg_info *bfregi, u32 bfregn, + bool dyn_bfreg); #endif /* MLX5_IB_H */ diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 90a9c461cedc..9fb1d9cb9401 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -271,16 +271,16 @@ static ssize_t size_write(struct file *filp, const char __user *buf, { struct mlx5_cache_ent *ent = filp->private_data; struct mlx5_ib_dev *dev = ent->dev; - char lbuf[20]; + char lbuf[20] = {0}; u32 var; int err; int c; - if (copy_from_user(lbuf, buf, sizeof(lbuf))) + count = min(count, sizeof(lbuf) - 1); + if (copy_from_user(lbuf, buf, count)) return -EFAULT; c = order2idx(dev, ent->order); - lbuf[sizeof(lbuf) - 1] = 0; if (sscanf(lbuf, "%u", &var) != 1) return -EINVAL; @@ -310,19 +310,11 @@ static ssize_t size_read(struct file *filp, char __user *buf, size_t count, char lbuf[20]; int err; - if (*pos) - return 0; - err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size); if (err < 0) return err; - if (copy_to_user(buf, lbuf, err)) - return -EFAULT; - - *pos += err; - - return err; + return simple_read_from_buffer(buf, count, pos, lbuf, err); } static const struct file_operations size_fops = { @@ -337,16 +329,16 @@ static ssize_t limit_write(struct file *filp, const char __user *buf, { struct mlx5_cache_ent *ent = filp->private_data; struct mlx5_ib_dev *dev = ent->dev; - char lbuf[20]; + char lbuf[20] = {0}; u32 var; int err; int c; - if (copy_from_user(lbuf, buf, sizeof(lbuf))) + count = min(count, sizeof(lbuf) - 1); + if (copy_from_user(lbuf, buf, count)) return -EFAULT; c = order2idx(dev, ent->order); - lbuf[sizeof(lbuf) - 1] = 0; if (sscanf(lbuf, "%u", &var) != 1) return -EINVAL; @@ -372,19 +364,11 @@ static ssize_t limit_read(struct file *filp, char __user *buf, size_t count, char lbuf[20]; int err; - if (*pos) - return 0; - err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit); if (err < 0) return err; - if (copy_to_user(buf, lbuf, err)) - return -EFAULT; - - *pos += err; - - return err; + return simple_read_from_buffer(buf, count, pos, lbuf, err); } static const struct file_operations limit_fops = { @@ -914,7 +898,7 @@ static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev, struct mlx5_umr_wr *umrwr) { struct umr_common *umrc = &dev->umrc; - struct ib_send_wr *bad; + const struct ib_send_wr *bad; int err; struct mlx5_ib_umr_context umr_context; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index a4f1f638509f..6cba2a02d11b 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -563,32 +563,21 @@ static int alloc_med_class_bfreg(struct mlx5_ib_dev *dev, } static int alloc_bfreg(struct mlx5_ib_dev *dev, - struct mlx5_bfreg_info *bfregi, - enum mlx5_ib_latency_class lat) + struct mlx5_bfreg_info *bfregi) { - int bfregn = -EINVAL; + int bfregn = -ENOMEM; mutex_lock(&bfregi->lock); - switch (lat) { - case MLX5_IB_LATENCY_CLASS_LOW: + if (bfregi->ver >= 2) { + bfregn = alloc_high_class_bfreg(dev, bfregi); + if (bfregn < 0) + bfregn = alloc_med_class_bfreg(dev, bfregi); + } + + if (bfregn < 0) { BUILD_BUG_ON(NUM_NON_BLUE_FLAME_BFREGS != 1); bfregn = 0; bfregi->count[bfregn]++; - break; - - case MLX5_IB_LATENCY_CLASS_MEDIUM: - if (bfregi->ver < 2) - bfregn = -ENOMEM; - else - bfregn = alloc_med_class_bfreg(dev, bfregi); - break; - - case MLX5_IB_LATENCY_CLASS_HIGH: - if (bfregi->ver < 2) - bfregn = -ENOMEM; - else - bfregn = alloc_high_class_bfreg(dev, bfregi); - break; } mutex_unlock(&bfregi->lock); @@ -641,13 +630,13 @@ static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq, static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv_cq); -static int bfregn_to_uar_index(struct mlx5_ib_dev *dev, - struct mlx5_bfreg_info *bfregi, int bfregn, - bool dyn_bfreg) +int bfregn_to_uar_index(struct mlx5_ib_dev *dev, + struct mlx5_bfreg_info *bfregi, u32 bfregn, + bool dyn_bfreg) { - int bfregs_per_sys_page; - int index_of_sys_page; - int offset; + unsigned int bfregs_per_sys_page; + u32 index_of_sys_page; + u32 offset; bfregs_per_sys_page = get_uars_per_sys_page(dev, bfregi->lib_uar_4k) * MLX5_NON_FP_BFREGS_PER_UAR; @@ -655,6 +644,10 @@ static int bfregn_to_uar_index(struct mlx5_ib_dev *dev, if (dyn_bfreg) { index_of_sys_page += bfregi->num_static_sys_pages; + + if (index_of_sys_page >= bfregi->num_sys_pages) + return -EINVAL; + if (bfregn > bfregi->num_dyn_bfregs || bfregi->sys_pages[index_of_sys_page] == MLX5_IB_INVALID_UAR_INDEX) { mlx5_ib_dbg(dev, "Invalid dynamic uar index\n"); @@ -819,21 +812,9 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, bfregn = MLX5_CROSS_CHANNEL_BFREG; } else { - bfregn = alloc_bfreg(dev, &context->bfregi, MLX5_IB_LATENCY_CLASS_HIGH); - if (bfregn < 0) { - mlx5_ib_dbg(dev, "failed to allocate low latency BFREG\n"); - mlx5_ib_dbg(dev, "reverting to medium latency\n"); - bfregn = alloc_bfreg(dev, &context->bfregi, MLX5_IB_LATENCY_CLASS_MEDIUM); - if (bfregn < 0) { - mlx5_ib_dbg(dev, "failed to allocate medium latency BFREG\n"); - mlx5_ib_dbg(dev, "reverting to high latency\n"); - bfregn = alloc_bfreg(dev, &context->bfregi, MLX5_IB_LATENCY_CLASS_LOW); - if (bfregn < 0) { - mlx5_ib_warn(dev, "bfreg allocation failed\n"); - return bfregn; - } - } - } + bfregn = alloc_bfreg(dev, &context->bfregi); + if (bfregn < 0) + return bfregn; } mlx5_ib_dbg(dev, "bfregn 0x%x, uar_index 0x%x\n", bfregn, uar_index); @@ -1626,7 +1607,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct mlx5_ib_resources *devr = &dev->devr; int inlen = MLX5_ST_SZ_BYTES(create_qp_in); struct mlx5_core_dev *mdev = dev->mdev; - struct mlx5_ib_create_qp_resp resp; + struct mlx5_ib_create_qp_resp resp = {}; struct mlx5_ib_cq *send_cq; struct mlx5_ib_cq *recv_cq; unsigned long flags; @@ -2555,18 +2536,16 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (ah->type == RDMA_AH_ATTR_TYPE_ROCE) { if (!(ah_flags & IB_AH_GRH)) return -EINVAL; - err = mlx5_get_roce_gid_type(dev, port, grh->sgid_index, - &gid_type); - if (err) - return err; + memcpy(path->rmac, ah->roce.dmac, sizeof(ah->roce.dmac)); if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC || qp->ibqp.qp_type == IB_QPT_XRC_INI || qp->ibqp.qp_type == IB_QPT_XRC_TGT) - path->udp_sport = mlx5_get_roce_udp_sport(dev, port, - grh->sgid_index); + path->udp_sport = + mlx5_get_roce_udp_sport(dev, ah->grh.sgid_attr); path->dci_cfi_prio_sl = (sl & 0x7) << 4; + gid_type = ah->grh.sgid_attr->gid_type; if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) path->ecn_dscp = (grh->traffic_class >> 2) & 0x3f; } else { @@ -3529,7 +3508,7 @@ static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg, } static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg, - struct ib_send_wr *wr, void *qend, + const struct ib_send_wr *wr, void *qend, struct mlx5_ib_qp *qp, int *size) { void *seg = eseg; @@ -3582,7 +3561,7 @@ static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg, } static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg, - struct ib_send_wr *wr) + const struct ib_send_wr *wr) { memcpy(&dseg->av, &to_mah(ud_wr(wr)->ah)->av, sizeof(struct mlx5_av)); dseg->av.dqp_dct = cpu_to_be32(ud_wr(wr)->remote_qpn | MLX5_EXTENDED_UD_AV); @@ -3730,9 +3709,9 @@ static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask) static int set_reg_umr_segment(struct mlx5_ib_dev *dev, struct mlx5_wqe_umr_ctrl_seg *umr, - struct ib_send_wr *wr, int atomic) + const struct ib_send_wr *wr, int atomic) { - struct mlx5_umr_wr *umrwr = umr_wr(wr); + const struct mlx5_umr_wr *umrwr = umr_wr(wr); memset(umr, 0, sizeof(*umr)); @@ -3803,9 +3782,10 @@ static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg) seg->status = MLX5_MKEY_STATUS_FREE; } -static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr) +static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, + const struct ib_send_wr *wr) { - struct mlx5_umr_wr *umrwr = umr_wr(wr); + const struct mlx5_umr_wr *umrwr = umr_wr(wr); memset(seg, 0, sizeof(*seg)); if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR) @@ -3854,7 +3834,7 @@ static void set_reg_umr_inline_seg(void *seg, struct mlx5_ib_qp *qp, seg += mr_list_size; } -static __be32 send_ieth(struct ib_send_wr *wr) +static __be32 send_ieth(const struct ib_send_wr *wr) { switch (wr->opcode) { case IB_WR_SEND_WITH_IMM: @@ -3886,7 +3866,7 @@ static u8 wq_sig(void *wqe) return calc_sig(wqe, (*((u8 *)wqe + 8) & 0x3f) << 4); } -static int set_data_inl_seg(struct mlx5_ib_qp *qp, struct ib_send_wr *wr, +static int set_data_inl_seg(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr, void *wqe, int *sz) { struct mlx5_wqe_inline_seg *seg; @@ -4032,7 +4012,7 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr, return 0; } -static int set_sig_data_segment(struct ib_sig_handover_wr *wr, +static int set_sig_data_segment(const struct ib_sig_handover_wr *wr, struct mlx5_ib_qp *qp, void **seg, int *size) { struct ib_sig_attrs *sig_attrs = wr->sig_attrs; @@ -4134,7 +4114,7 @@ static int set_sig_data_segment(struct ib_sig_handover_wr *wr, } static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg, - struct ib_sig_handover_wr *wr, u32 size, + const struct ib_sig_handover_wr *wr, u32 size, u32 length, u32 pdn) { struct ib_mr *sig_mr = wr->sig_mr; @@ -4165,10 +4145,10 @@ static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, } -static int set_sig_umr_wr(struct ib_send_wr *send_wr, struct mlx5_ib_qp *qp, - void **seg, int *size) +static int set_sig_umr_wr(const struct ib_send_wr *send_wr, + struct mlx5_ib_qp *qp, void **seg, int *size) { - struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr); + const struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr); struct mlx5_ib_mr *sig_mr = to_mmr(wr->sig_mr); u32 pdn = get_pd(qp)->pdn; u32 xlt_size; @@ -4243,7 +4223,7 @@ static int set_psv_wr(struct ib_sig_domain *domain, } static int set_reg_wr(struct mlx5_ib_qp *qp, - struct ib_reg_wr *wr, + const struct ib_reg_wr *wr, void **seg, int *size) { struct mlx5_ib_mr *mr = to_mmr(wr->mr); @@ -4314,10 +4294,10 @@ static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16) } } -static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, +static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg, struct mlx5_wqe_ctrl_seg **ctrl, - struct ib_send_wr *wr, unsigned *idx, - int *size, int nreq) + const struct ib_send_wr *wr, unsigned *idx, + int *size, int nreq, bool send_signaled, bool solicited) { if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) return -ENOMEM; @@ -4328,10 +4308,8 @@ static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, *(uint32_t *)(*seg + 8) = 0; (*ctrl)->imm = send_ieth(wr); (*ctrl)->fm_ce_se = qp->sq_signal_bits | - (wr->send_flags & IB_SEND_SIGNALED ? - MLX5_WQE_CTRL_CQ_UPDATE : 0) | - (wr->send_flags & IB_SEND_SOLICITED ? - MLX5_WQE_CTRL_SOLICITED : 0); + (send_signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0) | + (solicited ? MLX5_WQE_CTRL_SOLICITED : 0); *seg += sizeof(**ctrl); *size = sizeof(**ctrl) / 16; @@ -4339,6 +4317,16 @@ static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, return 0; } +static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, + struct mlx5_wqe_ctrl_seg **ctrl, + const struct ib_send_wr *wr, unsigned *idx, + int *size, int nreq) +{ + return __begin_wqe(qp, seg, ctrl, wr, idx, size, nreq, + wr->send_flags & IB_SEND_SIGNALED, + wr->send_flags & IB_SEND_SOLICITED); +} + static void finish_wqe(struct mlx5_ib_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl, u8 size, unsigned idx, u64 wr_id, @@ -4360,9 +4348,8 @@ static void finish_wqe(struct mlx5_ib_qp *qp, qp->sq.w_list[idx].next = qp->sq.cur_post; } - -int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr, bool drain) { struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */ struct mlx5_ib_dev *dev = to_mdev(ibqp->device); @@ -4393,7 +4380,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, spin_lock_irqsave(&qp->sq.lock, flags); - if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && !drain) { err = -EIO; *bad_wr = wr; nreq = 0; @@ -4498,10 +4485,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, * SET_PSV WQEs are not signaled and solicited * on error */ - wr->send_flags &= ~IB_SEND_SIGNALED; - wr->send_flags |= IB_SEND_SOLICITED; - err = begin_wqe(qp, &seg, &ctrl, wr, - &idx, &size, nreq); + err = __begin_wqe(qp, &seg, &ctrl, wr, &idx, + &size, nreq, false, true); if (err) { mlx5_ib_warn(dev, "\n"); err = -ENOMEM; @@ -4520,8 +4505,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, fence, MLX5_OPCODE_SET_PSV); - err = begin_wqe(qp, &seg, &ctrl, wr, - &idx, &size, nreq); + err = __begin_wqe(qp, &seg, &ctrl, wr, &idx, + &size, nreq, false, true); if (err) { mlx5_ib_warn(dev, "\n"); err = -ENOMEM; @@ -4690,13 +4675,19 @@ out: return err; } +int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) +{ + return _mlx5_ib_post_send(ibqp, wr, bad_wr, false); +} + static void set_sig_seg(struct mlx5_rwqe_sig *sig, int size) { sig->signature = calc_sig(sig, size); } -int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +static int _mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr, bool drain) { struct mlx5_ib_qp *qp = to_mqp(ibqp); struct mlx5_wqe_data_seg *scat; @@ -4714,7 +4705,7 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, spin_lock_irqsave(&qp->rq.lock, flags); - if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && !drain) { err = -EIO; *bad_wr = wr; nreq = 0; @@ -4776,6 +4767,12 @@ out: return err; } +int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) +{ + return _mlx5_ib_post_recv(ibqp, wr, bad_wr, false); +} + static inline enum ib_qp_state to_ib_qp_state(enum mlx5_qp_state mlx5_state) { switch (mlx5_state) { @@ -5365,7 +5362,9 @@ static int set_user_rq_size(struct mlx5_ib_dev *dev, rwq->wqe_count = ucmd->rq_wqe_count; rwq->wqe_shift = ucmd->rq_wqe_shift; - rwq->buf_size = (rwq->wqe_count << rwq->wqe_shift); + if (check_shl_overflow(rwq->wqe_count, rwq->wqe_shift, &rwq->buf_size)) + return -EINVAL; + rwq->log_rq_stride = rwq->wqe_shift; rwq->log_rq_size = ilog2(rwq->wqe_count); return 0; @@ -5697,3 +5696,132 @@ out: kvfree(in); return err; } + +struct mlx5_ib_drain_cqe { + struct ib_cqe cqe; + struct completion done; +}; + +static void mlx5_ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct mlx5_ib_drain_cqe *cqe = container_of(wc->wr_cqe, + struct mlx5_ib_drain_cqe, + cqe); + + complete(&cqe->done); +} + +/* This function returns only once the drained WR was completed */ +static void handle_drain_completion(struct ib_cq *cq, + struct mlx5_ib_drain_cqe *sdrain, + struct mlx5_ib_dev *dev) +{ + struct mlx5_core_dev *mdev = dev->mdev; + + if (cq->poll_ctx == IB_POLL_DIRECT) { + while (wait_for_completion_timeout(&sdrain->done, HZ / 10) <= 0) + ib_process_cq_direct(cq, -1); + return; + } + + if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + struct mlx5_ib_cq *mcq = to_mcq(cq); + bool triggered = false; + unsigned long flags; + + spin_lock_irqsave(&dev->reset_flow_resource_lock, flags); + /* Make sure that the CQ handler won't run if wasn't run yet */ + if (!mcq->mcq.reset_notify_added) + mcq->mcq.reset_notify_added = 1; + else + triggered = true; + spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags); + + if (triggered) { + /* Wait for any scheduled/running task to be ended */ + switch (cq->poll_ctx) { + case IB_POLL_SOFTIRQ: + irq_poll_disable(&cq->iop); + irq_poll_enable(&cq->iop); + break; + case IB_POLL_WORKQUEUE: + cancel_work_sync(&cq->work); + break; + default: + WARN_ON_ONCE(1); + } + } + + /* Run the CQ handler - this makes sure that the drain WR will + * be processed if wasn't processed yet. + */ + mcq->mcq.comp(&mcq->mcq); + } + + wait_for_completion(&sdrain->done); +} + +void mlx5_ib_drain_sq(struct ib_qp *qp) +{ + struct ib_cq *cq = qp->send_cq; + struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; + struct mlx5_ib_drain_cqe sdrain; + const struct ib_send_wr *bad_swr; + struct ib_rdma_wr swr = { + .wr = { + .next = NULL, + { .wr_cqe = &sdrain.cqe, }, + .opcode = IB_WR_RDMA_WRITE, + }, + }; + int ret; + struct mlx5_ib_dev *dev = to_mdev(qp->device); + struct mlx5_core_dev *mdev = dev->mdev; + + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); + if (ret && mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) { + WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); + return; + } + + sdrain.cqe.done = mlx5_ib_drain_qp_done; + init_completion(&sdrain.done); + + ret = _mlx5_ib_post_send(qp, &swr.wr, &bad_swr, true); + if (ret) { + WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); + return; + } + + handle_drain_completion(cq, &sdrain, dev); +} + +void mlx5_ib_drain_rq(struct ib_qp *qp) +{ + struct ib_cq *cq = qp->recv_cq; + struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; + struct mlx5_ib_drain_cqe rdrain; + struct ib_recv_wr rwr = {}; + const struct ib_recv_wr *bad_rwr; + int ret; + struct mlx5_ib_dev *dev = to_mdev(qp->device); + struct mlx5_core_dev *mdev = dev->mdev; + + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); + if (ret && mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) { + WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); + return; + } + + rwr.wr_cqe = &rdrain.cqe; + rdrain.cqe.done = mlx5_ib_drain_qp_done; + init_completion(&rdrain.done); + + ret = _mlx5_ib_post_recv(qp, &rwr, &bad_rwr, true); + if (ret) { + WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); + return; + } + + handle_drain_completion(cq, &rdrain, dev); +} diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index f5de5adc9b1a..d359fecf7a5b 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -446,8 +446,8 @@ void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index) spin_unlock(&srq->lock); } -int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct mlx5_ib_srq *srq = to_msrq(ibsrq); struct mlx5_wqe_srq_next_seg *next; diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c index e7f6223e9c60..0823c0bc7e73 100644 --- a/drivers/infiniband/hw/mthca/mthca_av.c +++ b/drivers/infiniband/hw/mthca/mthca_av.c @@ -281,10 +281,7 @@ int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah, header->grh.flow_label = ah->av->sl_tclass_flowlabel & cpu_to_be32(0xfffff); header->grh.hop_limit = ah->av->hop_limit; - ib_get_cached_gid(&dev->ib_dev, - be32_to_cpu(ah->av->port_pd) >> 24, - ah->av->gid_index % dev->limits.gid_table_len, - &header->grh.source_gid, NULL); + header->grh.source_gid = ah->ibah.sgid_attr->gid; memcpy(header->grh.destination_gid.raw, ah->av->dgid, 16); } diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 5508afbf1c67..220a3e4717a3 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -519,10 +519,10 @@ int mthca_max_srq_sge(struct mthca_dev *dev); void mthca_srq_event(struct mthca_dev *dev, u32 srqn, enum ib_event_type event_type); void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr); -int mthca_tavor_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); -int mthca_arbel_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); +int mthca_tavor_post_srq_recv(struct ib_srq *srq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); +int mthca_arbel_post_srq_recv(struct ib_srq *srq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); void mthca_qp_event(struct mthca_dev *dev, u32 qpn, enum ib_event_type event_type); @@ -530,14 +530,14 @@ int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_m struct ib_qp_init_attr *qp_init_attr); int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); -int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr); -int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); -int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr); -int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); +int mthca_tavor_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr); +int mthca_tavor_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); +int mthca_arbel_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr); +int mthca_arbel_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); void mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send, int index, int *dbd, __be32 *new_wqe); int mthca_alloc_qp(struct mthca_dev *dev, diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 541f237965c7..0d3473b4596e 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -96,8 +96,9 @@ static int mthca_query_device(struct ib_device *ibdev, struct ib_device_attr *pr props->page_size_cap = mdev->limits.page_size_cap; props->max_qp = mdev->limits.num_qps - mdev->limits.reserved_qps; props->max_qp_wr = mdev->limits.max_wqes; - props->max_sge = mdev->limits.max_sg; - props->max_sge_rd = props->max_sge; + props->max_send_sge = mdev->limits.max_sg; + props->max_recv_sge = mdev->limits.max_sg; + props->max_sge_rd = mdev->limits.max_sg; props->max_cq = mdev->limits.num_cqs - mdev->limits.reserved_cqs; props->max_cqe = mdev->limits.max_cqes; props->max_mr = mdev->limits.num_mpts - mdev->limits.reserved_mrws; @@ -448,7 +449,7 @@ static struct ib_srq *mthca_create_srq(struct ib_pd *pd, int err; if (init_attr->srq_type != IB_SRQT_BASIC) - return ERR_PTR(-ENOSYS); + return ERR_PTR(-EOPNOTSUPP); srq = kmalloc(sizeof *srq, GFP_KERNEL); if (!srq) diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index af1c49d70b89..3d37f2373d63 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -1488,7 +1488,7 @@ void mthca_free_qp(struct mthca_dev *dev, /* Create UD header for an MLX send and build a data segment for it */ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, - int ind, struct ib_ud_wr *wr, + int ind, const struct ib_ud_wr *wr, struct mthca_mlx_seg *mlx, struct mthca_data_seg *data) { @@ -1581,7 +1581,7 @@ static __always_inline void set_raddr_seg(struct mthca_raddr_seg *rseg, } static __always_inline void set_atomic_seg(struct mthca_atomic_seg *aseg, - struct ib_atomic_wr *wr) + const struct ib_atomic_wr *wr) { if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { aseg->swap_add = cpu_to_be64(wr->swap); @@ -1594,7 +1594,7 @@ static __always_inline void set_atomic_seg(struct mthca_atomic_seg *aseg, } static void set_tavor_ud_seg(struct mthca_tavor_ud_seg *useg, - struct ib_ud_wr *wr) + const struct ib_ud_wr *wr) { useg->lkey = cpu_to_be32(to_mah(wr->ah)->key); useg->av_addr = cpu_to_be64(to_mah(wr->ah)->avdma); @@ -1604,15 +1604,15 @@ static void set_tavor_ud_seg(struct mthca_tavor_ud_seg *useg, } static void set_arbel_ud_seg(struct mthca_arbel_ud_seg *useg, - struct ib_ud_wr *wr) + const struct ib_ud_wr *wr) { memcpy(useg->av, to_mah(wr->ah)->av, MTHCA_AV_SIZE); useg->dqpn = cpu_to_be32(wr->remote_qpn); useg->qkey = cpu_to_be32(wr->remote_qkey); } -int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +int mthca_tavor_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { struct mthca_dev *dev = to_mdev(ibqp->device); struct mthca_qp *qp = to_mqp(ibqp); @@ -1814,8 +1814,8 @@ out: return err; } -int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int mthca_tavor_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct mthca_dev *dev = to_mdev(ibqp->device); struct mthca_qp *qp = to_mqp(ibqp); @@ -1925,8 +1925,8 @@ out: return err; } -int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +int mthca_arbel_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { struct mthca_dev *dev = to_mdev(ibqp->device); struct mthca_qp *qp = to_mqp(ibqp); @@ -2165,8 +2165,8 @@ out: return err; } -int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int mthca_arbel_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct mthca_dev *dev = to_mdev(ibqp->device); struct mthca_qp *qp = to_mqp(ibqp); diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c index f79732bc73b4..9a3fc6fb0d7e 100644 --- a/drivers/infiniband/hw/mthca/mthca_srq.c +++ b/drivers/infiniband/hw/mthca/mthca_srq.c @@ -472,8 +472,8 @@ void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr) spin_unlock(&srq->lock); } -int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct mthca_dev *dev = to_mdev(ibsrq->device); struct mthca_srq *srq = to_msrq(ibsrq); @@ -572,8 +572,8 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, return err; } -int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct mthca_dev *dev = to_mdev(ibsrq->device); struct mthca_srq *srq = to_msrq(ibsrq); diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h index 00c27291dc26..bedaa02749fb 100644 --- a/drivers/infiniband/hw/nes/nes.h +++ b/drivers/infiniband/hw/nes/nes.h @@ -159,7 +159,7 @@ do { \ #define NES_EVENT_TIMEOUT 1200000 #else -#define nes_debug(level, fmt, args...) +#define nes_debug(level, fmt, args...) no_printk(fmt, ##args) #define assert(expr) do {} while (0) #define NES_EVENT_TIMEOUT 100000 diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 6cdfbf8c5674..2b67ace5b614 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -58,6 +58,7 @@ #include <net/neighbour.h> #include <net/route.h> #include <net/ip_fib.h> +#include <net/secure_seq.h> #include <net/tcp.h> #include <linux/fcntl.h> @@ -1445,7 +1446,6 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, struct nes_cm_listener *listener) { struct nes_cm_node *cm_node; - struct timespec ts; int oldarpindex = 0; int arpindex = 0; struct nes_device *nesdev; @@ -1496,8 +1496,10 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE; cm_node->tcp_cntxt.rcv_wnd = NES_CM_DEFAULT_RCV_WND_SCALED >> NES_CM_DEFAULT_RCV_WND_SCALE; - ts = current_kernel_time(); - cm_node->tcp_cntxt.loc_seq_num = htonl(ts.tv_nsec); + cm_node->tcp_cntxt.loc_seq_num = secure_tcp_seq(htonl(cm_node->loc_addr), + htonl(cm_node->rem_addr), + htons(cm_node->loc_port), + htons(cm_node->rem_port)); cm_node->tcp_cntxt.mss = nesvnic->max_frame_size - sizeof(struct iphdr) - sizeof(struct tcphdr) - ETH_HLEN - VLAN_HLEN; cm_node->tcp_cntxt.rcv_nxt = 0; diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 18a7de1c3923..bd0675d8f298 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -70,8 +70,7 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number); static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_Mode); static void nes_terminate_start_timer(struct nes_qp *nesqp); -#ifdef CONFIG_INFINIBAND_NES_DEBUG -static unsigned char *nes_iwarp_state_str[] = { +static const char *const nes_iwarp_state_str[] = { "Non-Existent", "Idle", "RTS", @@ -82,7 +81,7 @@ static unsigned char *nes_iwarp_state_str[] = { "RSVD2", }; -static unsigned char *nes_tcp_state_str[] = { +static const char *const nes_tcp_state_str[] = { "Non-Existent", "Closed", "Listen", @@ -100,7 +99,6 @@ static unsigned char *nes_tcp_state_str[] = { "RSVD3", "RSVD4", }; -#endif static inline void print_ip(struct nes_cm_node *cm_node) { diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 32f26556c808..6940c7215961 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -436,7 +436,8 @@ static int nes_query_device(struct ib_device *ibdev, struct ib_device_attr *prop props->max_mr_size = 0x80000000; props->max_qp = nesibdev->max_qp; props->max_qp_wr = nesdev->nesadapter->max_qp_wr - 2; - props->max_sge = nesdev->nesadapter->max_sge; + props->max_send_sge = nesdev->nesadapter->max_sge; + props->max_recv_sge = nesdev->nesadapter->max_sge; props->max_cq = nesibdev->max_cq; props->max_cqe = nesdev->nesadapter->max_cqe; props->max_mr = nesibdev->max_mr; @@ -754,26 +755,6 @@ static int nes_dealloc_pd(struct ib_pd *ibpd) /** - * nes_create_ah - */ -static struct ib_ah *nes_create_ah(struct ib_pd *pd, - struct rdma_ah_attr *ah_attr, - struct ib_udata *udata) -{ - return ERR_PTR(-ENOSYS); -} - - -/** - * nes_destroy_ah - */ -static int nes_destroy_ah(struct ib_ah *ah) -{ - return -ENOSYS; -} - - -/** * nes_get_encoded_size */ static inline u8 nes_get_encoded_size(int *size) @@ -3004,42 +2985,9 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, return err; } - -/** - * nes_muticast_attach - */ -static int nes_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) -{ - nes_debug(NES_DBG_INIT, "\n"); - return -ENOSYS; -} - - -/** - * nes_multicast_detach - */ -static int nes_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) -{ - nes_debug(NES_DBG_INIT, "\n"); - return -ENOSYS; -} - - -/** - * nes_process_mad - */ -static int nes_process_mad(struct ib_device *ibdev, int mad_flags, - u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index) -{ - nes_debug(NES_DBG_INIT, "\n"); - return -ENOSYS; -} - static inline void -fill_wqe_sg_send(struct nes_hw_qp_wqe *wqe, struct ib_send_wr *ib_wr, u32 uselkey) +fill_wqe_sg_send(struct nes_hw_qp_wqe *wqe, const struct ib_send_wr *ib_wr, + u32 uselkey) { int sge_index; int total_payload_length = 0; @@ -3065,8 +3013,8 @@ fill_wqe_sg_send(struct nes_hw_qp_wqe *wqe, struct ib_send_wr *ib_wr, u32 uselke /** * nes_post_send */ -static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, - struct ib_send_wr **bad_wr) +static int nes_post_send(struct ib_qp *ibqp, const struct ib_send_wr *ib_wr, + const struct ib_send_wr **bad_wr) { u64 u64temp; unsigned long flags = 0; @@ -3327,8 +3275,8 @@ out: /** * nes_post_recv */ -static int nes_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr, - struct ib_recv_wr **bad_wr) +static int nes_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *ib_wr, + const struct ib_recv_wr **bad_wr) { u64 u64temp; unsigned long flags = 0; @@ -3735,8 +3683,6 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) nesibdev->ibdev.mmap = nes_mmap; nesibdev->ibdev.alloc_pd = nes_alloc_pd; nesibdev->ibdev.dealloc_pd = nes_dealloc_pd; - nesibdev->ibdev.create_ah = nes_create_ah; - nesibdev->ibdev.destroy_ah = nes_destroy_ah; nesibdev->ibdev.create_qp = nes_create_qp; nesibdev->ibdev.modify_qp = nes_modify_qp; nesibdev->ibdev.query_qp = nes_query_qp; @@ -3753,10 +3699,6 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) nesibdev->ibdev.alloc_mr = nes_alloc_mr; nesibdev->ibdev.map_mr_sg = nes_map_mr_sg; - nesibdev->ibdev.attach_mcast = nes_multicast_attach; - nesibdev->ibdev.detach_mcast = nes_multicast_detach; - nesibdev->ibdev.process_mad = nes_process_mad; - nesibdev->ibdev.req_notify_cq = nes_req_notify_cq; nesibdev->ibdev.post_send = nes_post_send; nesibdev->ibdev.post_recv = nes_post_recv; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index 3897b64532e1..58188fe5aed2 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -71,7 +71,7 @@ static u16 ocrdma_hdr_type_to_proto_num(int devid, u8 hdr_type) } static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, - struct rdma_ah_attr *attr, union ib_gid *sgid, + struct rdma_ah_attr *attr, const union ib_gid *sgid, int pdid, bool *isvlan, u16 vlan_tag) { int status; @@ -164,17 +164,14 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, struct ocrdma_ah *ah; bool isvlan = false; u16 vlan_tag = 0xffff; - struct ib_gid_attr sgid_attr; + const struct ib_gid_attr *sgid_attr; struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); - const struct ib_global_route *grh; - union ib_gid sgid; if ((attr->type != RDMA_AH_ATTR_TYPE_ROCE) || !(rdma_ah_get_ah_flags(attr) & IB_AH_GRH)) return ERR_PTR(-EINVAL); - grh = rdma_ah_read_grh(attr); if (atomic_cmpxchg(&dev->update_sl, 1, 0)) ocrdma_init_service_level(dev); @@ -186,20 +183,15 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, if (status) goto av_err; - status = ib_get_cached_gid(&dev->ibdev, 1, grh->sgid_index, &sgid, - &sgid_attr); - if (status) { - pr_err("%s(): Failed to query sgid, status = %d\n", - __func__, status); - goto av_conf_err; - } - if (is_vlan_dev(sgid_attr.ndev)) - vlan_tag = vlan_dev_vlan_id(sgid_attr.ndev); - dev_put(sgid_attr.ndev); + sgid_attr = attr->grh.sgid_attr; + if (is_vlan_dev(sgid_attr->ndev)) + vlan_tag = vlan_dev_vlan_id(sgid_attr->ndev); + /* Get network header type for this GID */ - ah->hdr_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid); + ah->hdr_type = rdma_gid_attr_network_type(sgid_attr); - status = set_av_attr(dev, ah, attr, &sgid, pd->id, &isvlan, vlan_tag); + status = set_av_attr(dev, ah, attr, &sgid_attr->gid, pd->id, + &isvlan, vlan_tag); if (status) goto av_conf_err; @@ -262,12 +254,6 @@ int ocrdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) return 0; } -int ocrdma_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) -{ - /* modify_ah is unsupported */ - return -ENOSYS; -} - int ocrdma_process_mad(struct ib_device *ibdev, int process_mad_flags, u8 port_num, diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h index 1a65c47945aa..c0c32c9b80ae 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h @@ -55,7 +55,6 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, struct ib_udata *udata); int ocrdma_destroy_ah(struct ib_ah *ah); int ocrdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); -int ocrdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int ocrdma_process_mad(struct ib_device *, int process_mad_flags, diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 6c136e5017fe..e578281471af 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -1365,8 +1365,9 @@ static int ocrdma_mbx_get_ctrl_attribs(struct ocrdma_dev *dev) dev->hba_port_num = (hba_attribs->ptpnum_maxdoms_hbast_cv & OCRDMA_HBA_ATTRB_PTNUM_MASK) >> OCRDMA_HBA_ATTRB_PTNUM_SHIFT; - strncpy(dev->model_number, - hba_attribs->controller_model_number, 31); + strlcpy(dev->model_number, + hba_attribs->controller_model_number, + sizeof(dev->model_number)); } dma_free_coherent(&dev->nic_info.pdev->dev, dma.size, dma.va, dma.pa); free_mqe: @@ -2494,8 +2495,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, { int status; struct rdma_ah_attr *ah_attr = &attrs->ah_attr; - union ib_gid sgid; - struct ib_gid_attr sgid_attr; + const struct ib_gid_attr *sgid_attr; u32 vlan_id = 0xFFFF; u8 mac_addr[6], hdr_type; union { @@ -2525,25 +2525,23 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, memcpy(&cmd->params.dgid[0], &grh->dgid.raw[0], sizeof(cmd->params.dgid)); - status = ib_get_cached_gid(&dev->ibdev, 1, grh->sgid_index, - &sgid, &sgid_attr); - if (!status) { - vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev); - memcpy(mac_addr, sgid_attr.ndev->dev_addr, ETH_ALEN); - dev_put(sgid_attr.ndev); - } + sgid_attr = ah_attr->grh.sgid_attr; + vlan_id = rdma_vlan_dev_vlan_id(sgid_attr->ndev); + memcpy(mac_addr, sgid_attr->ndev->dev_addr, ETH_ALEN); qp->sgid_idx = grh->sgid_index; - memcpy(&cmd->params.sgid[0], &sgid.raw[0], sizeof(cmd->params.sgid)); + memcpy(&cmd->params.sgid[0], &sgid_attr->gid.raw[0], + sizeof(cmd->params.sgid)); status = ocrdma_resolve_dmac(dev, ah_attr, &mac_addr[0]); if (status) return status; + cmd->params.dmac_b0_to_b3 = mac_addr[0] | (mac_addr[1] << 8) | (mac_addr[2] << 16) | (mac_addr[3] << 24); - hdr_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid); + hdr_type = rdma_gid_attr_network_type(sgid_attr); if (hdr_type == RDMA_NETWORK_IPV4) { - rdma_gid2ip(&sgid_addr._sockaddr, &sgid); + rdma_gid2ip(&sgid_addr._sockaddr, &sgid_attr->gid); rdma_gid2ip(&dgid_addr._sockaddr, &grh->dgid); memcpy(&cmd->params.dgid[0], &dgid_addr._sockaddr_in.sin_addr.s_addr, 4); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 5962c0ed9847..7832ee3e0c84 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -176,7 +176,6 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) dev->ibdev.create_ah = ocrdma_create_ah; dev->ibdev.destroy_ah = ocrdma_destroy_ah; dev->ibdev.query_ah = ocrdma_query_ah; - dev->ibdev.modify_ah = ocrdma_modify_ah; dev->ibdev.poll_cq = ocrdma_poll_cq; dev->ibdev.post_send = ocrdma_post_send; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 82e20fc32890..c158ca9fde6d 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -89,7 +89,8 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS; - attr->max_sge = min(dev->attr.max_send_sge, dev->attr.max_recv_sge); + attr->max_send_sge = dev->attr.max_send_sge; + attr->max_recv_sge = dev->attr.max_recv_sge; attr->max_sge_rd = dev->attr.max_rdma_sge; attr->max_cq = dev->attr.max_cq; attr->max_cqe = dev->attr.max_cqe; @@ -196,11 +197,10 @@ int ocrdma_query_port(struct ib_device *ibdev, props->sm_lid = 0; props->sm_sl = 0; props->state = port_state; - props->port_cap_flags = - IB_PORT_CM_SUP | - IB_PORT_REINIT_SUP | - IB_PORT_DEVICE_MGMT_SUP | IB_PORT_VENDOR_CLASS_SUP | - IB_PORT_IP_BASED_GIDS; + props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP | + IB_PORT_DEVICE_MGMT_SUP | + IB_PORT_VENDOR_CLASS_SUP; + props->ip_gids = true; props->gid_tbl_len = OCRDMA_MAX_SGID; props->pkey_tbl_len = 1; props->bad_pkey_cntr = 0; @@ -1774,13 +1774,13 @@ int ocrdma_destroy_qp(struct ib_qp *ibqp) * protect against proessing in-flight CQEs for this QP. */ spin_lock_irqsave(&qp->sq_cq->cq_lock, flags); - if (qp->rq_cq && (qp->rq_cq != qp->sq_cq)) + if (qp->rq_cq && (qp->rq_cq != qp->sq_cq)) { spin_lock(&qp->rq_cq->cq_lock); - - ocrdma_del_qpn_map(dev, qp); - - if (qp->rq_cq && (qp->rq_cq != qp->sq_cq)) + ocrdma_del_qpn_map(dev, qp); spin_unlock(&qp->rq_cq->cq_lock); + } else { + ocrdma_del_qpn_map(dev, qp); + } spin_unlock_irqrestore(&qp->sq_cq->cq_lock, flags); if (!pd->uctx) { @@ -1953,7 +1953,7 @@ int ocrdma_destroy_srq(struct ib_srq *ibsrq) /* unprivileged verbs and their support functions. */ static void ocrdma_build_ud_hdr(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, - struct ib_send_wr *wr) + const struct ib_send_wr *wr) { struct ocrdma_ewqe_ud_hdr *ud_hdr = (struct ocrdma_ewqe_ud_hdr *)(hdr + 1); @@ -2000,7 +2000,7 @@ static inline uint32_t ocrdma_sglist_len(struct ib_sge *sg_list, int num_sge) static int ocrdma_build_inline_sges(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, struct ocrdma_sge *sge, - struct ib_send_wr *wr, u32 wqe_size) + const struct ib_send_wr *wr, u32 wqe_size) { int i; char *dpp_addr; @@ -2038,7 +2038,7 @@ static int ocrdma_build_inline_sges(struct ocrdma_qp *qp, } static int ocrdma_build_send(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, - struct ib_send_wr *wr) + const struct ib_send_wr *wr) { int status; struct ocrdma_sge *sge; @@ -2057,7 +2057,7 @@ static int ocrdma_build_send(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, } static int ocrdma_build_write(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, - struct ib_send_wr *wr) + const struct ib_send_wr *wr) { int status; struct ocrdma_sge *ext_rw = (struct ocrdma_sge *)(hdr + 1); @@ -2075,7 +2075,7 @@ static int ocrdma_build_write(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, } static void ocrdma_build_read(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, - struct ib_send_wr *wr) + const struct ib_send_wr *wr) { struct ocrdma_sge *ext_rw = (struct ocrdma_sge *)(hdr + 1); struct ocrdma_sge *sge = ext_rw + 1; @@ -2105,7 +2105,7 @@ static int get_encoded_page_size(int pg_sz) static int ocrdma_build_reg(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, - struct ib_reg_wr *wr) + const struct ib_reg_wr *wr) { u64 fbo; struct ocrdma_ewqe_fr *fast_reg = (struct ocrdma_ewqe_fr *)(hdr + 1); @@ -2166,8 +2166,8 @@ static void ocrdma_ring_sq_db(struct ocrdma_qp *qp) iowrite32(val, qp->sq_db); } -int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +int ocrdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { int status = 0; struct ocrdma_qp *qp = get_ocrdma_qp(ibqp); @@ -2278,8 +2278,8 @@ static void ocrdma_ring_rq_db(struct ocrdma_qp *qp) iowrite32(val, qp->rq_db); } -static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe, struct ib_recv_wr *wr, - u16 tag) +static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe, + const struct ib_recv_wr *wr, u16 tag) { u32 wqe_size = 0; struct ocrdma_sge *sge; @@ -2299,8 +2299,8 @@ static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe, struct ib_recv_wr *wr, ocrdma_cpu_to_le32(rqe, wqe_size); } -int ocrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int ocrdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { int status = 0; unsigned long flags; @@ -2369,8 +2369,8 @@ static void ocrdma_ring_srq_db(struct ocrdma_srq *srq) iowrite32(val, srq->db + OCRDMA_DB_GEN2_SRQ_OFFSET); } -int ocrdma_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int ocrdma_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { int status = 0; unsigned long flags; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h index 9a9971708646..b69cfdce7970 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h @@ -43,10 +43,10 @@ #ifndef __OCRDMA_VERBS_H__ #define __OCRDMA_VERBS_H__ -int ocrdma_post_send(struct ib_qp *, struct ib_send_wr *, - struct ib_send_wr **bad_wr); -int ocrdma_post_recv(struct ib_qp *, struct ib_recv_wr *, - struct ib_recv_wr **bad_wr); +int ocrdma_post_send(struct ib_qp *, const struct ib_send_wr *, + const struct ib_send_wr **bad_wr); +int ocrdma_post_recv(struct ib_qp *, const struct ib_recv_wr *, + const struct ib_recv_wr **bad_wr); int ocrdma_poll_cq(struct ib_cq *, int num_entries, struct ib_wc *wc); int ocrdma_arm_cq(struct ib_cq *, enum ib_cq_notify_flags flags); @@ -100,8 +100,8 @@ int ocrdma_modify_srq(struct ib_srq *, struct ib_srq_attr *, enum ib_srq_attr_mask, struct ib_udata *); int ocrdma_query_srq(struct ib_srq *, struct ib_srq_attr *); int ocrdma_destroy_srq(struct ib_srq *); -int ocrdma_post_srq_recv(struct ib_srq *, struct ib_recv_wr *, - struct ib_recv_wr **bad_recv_wr); +int ocrdma_post_srq_recv(struct ib_srq *, const struct ib_recv_wr *, + const struct ib_recv_wr **bad_recv_wr); int ocrdma_dereg_mr(struct ib_mr *); struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *, int acc); diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index ad22b32bbd9c..a0af6d424aed 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -191,6 +191,11 @@ static int qedr_register_device(struct qedr_dev *dev) QEDR_UVERBS(MODIFY_QP) | QEDR_UVERBS(QUERY_QP) | QEDR_UVERBS(DESTROY_QP) | + QEDR_UVERBS(CREATE_SRQ) | + QEDR_UVERBS(DESTROY_SRQ) | + QEDR_UVERBS(QUERY_SRQ) | + QEDR_UVERBS(MODIFY_SRQ) | + QEDR_UVERBS(POST_SRQ_RECV) | QEDR_UVERBS(REG_MR) | QEDR_UVERBS(DEREG_MR) | QEDR_UVERBS(POLL_CQ) | @@ -229,6 +234,11 @@ static int qedr_register_device(struct qedr_dev *dev) dev->ibdev.query_qp = qedr_query_qp; dev->ibdev.destroy_qp = qedr_destroy_qp; + dev->ibdev.create_srq = qedr_create_srq; + dev->ibdev.destroy_srq = qedr_destroy_srq; + dev->ibdev.modify_srq = qedr_modify_srq; + dev->ibdev.query_srq = qedr_query_srq; + dev->ibdev.post_srq_recv = qedr_post_srq_recv; dev->ibdev.query_pkey = qedr_query_pkey; dev->ibdev.create_ah = qedr_create_ah; @@ -325,8 +335,8 @@ static int qedr_alloc_resources(struct qedr_dev *dev) spin_lock_init(&dev->sgid_lock); if (IS_IWARP(dev)) { - spin_lock_init(&dev->idr_lock); - idr_init(&dev->qpidr); + spin_lock_init(&dev->qpidr.idr_lock); + idr_init(&dev->qpidr.idr); dev->iwarp_wq = create_singlethread_workqueue("qedr_iwarpq"); } @@ -653,42 +663,70 @@ static void qedr_affiliated_event(void *context, u8 e_code, void *fw_handle) #define EVENT_TYPE_NOT_DEFINED 0 #define EVENT_TYPE_CQ 1 #define EVENT_TYPE_QP 2 +#define EVENT_TYPE_SRQ 3 struct qedr_dev *dev = (struct qedr_dev *)context; struct regpair *async_handle = (struct regpair *)fw_handle; u64 roce_handle64 = ((u64) async_handle->hi << 32) + async_handle->lo; u8 event_type = EVENT_TYPE_NOT_DEFINED; struct ib_event event; + struct ib_srq *ibsrq; + struct qedr_srq *srq; + unsigned long flags; struct ib_cq *ibcq; struct ib_qp *ibqp; struct qedr_cq *cq; struct qedr_qp *qp; + u16 srq_id; - switch (e_code) { - case ROCE_ASYNC_EVENT_CQ_OVERFLOW_ERR: - event.event = IB_EVENT_CQ_ERR; - event_type = EVENT_TYPE_CQ; - break; - case ROCE_ASYNC_EVENT_SQ_DRAINED: - event.event = IB_EVENT_SQ_DRAINED; - event_type = EVENT_TYPE_QP; - break; - case ROCE_ASYNC_EVENT_QP_CATASTROPHIC_ERR: - event.event = IB_EVENT_QP_FATAL; - event_type = EVENT_TYPE_QP; - break; - case ROCE_ASYNC_EVENT_LOCAL_INVALID_REQUEST_ERR: - event.event = IB_EVENT_QP_REQ_ERR; - event_type = EVENT_TYPE_QP; - break; - case ROCE_ASYNC_EVENT_LOCAL_ACCESS_ERR: - event.event = IB_EVENT_QP_ACCESS_ERR; - event_type = EVENT_TYPE_QP; - break; - default: + if (IS_ROCE(dev)) { + switch (e_code) { + case ROCE_ASYNC_EVENT_CQ_OVERFLOW_ERR: + event.event = IB_EVENT_CQ_ERR; + event_type = EVENT_TYPE_CQ; + break; + case ROCE_ASYNC_EVENT_SQ_DRAINED: + event.event = IB_EVENT_SQ_DRAINED; + event_type = EVENT_TYPE_QP; + break; + case ROCE_ASYNC_EVENT_QP_CATASTROPHIC_ERR: + event.event = IB_EVENT_QP_FATAL; + event_type = EVENT_TYPE_QP; + break; + case ROCE_ASYNC_EVENT_LOCAL_INVALID_REQUEST_ERR: + event.event = IB_EVENT_QP_REQ_ERR; + event_type = EVENT_TYPE_QP; + break; + case ROCE_ASYNC_EVENT_LOCAL_ACCESS_ERR: + event.event = IB_EVENT_QP_ACCESS_ERR; + event_type = EVENT_TYPE_QP; + break; + case ROCE_ASYNC_EVENT_SRQ_LIMIT: + event.event = IB_EVENT_SRQ_LIMIT_REACHED; + event_type = EVENT_TYPE_SRQ; + break; + case ROCE_ASYNC_EVENT_SRQ_EMPTY: + event.event = IB_EVENT_SRQ_ERR; + event_type = EVENT_TYPE_SRQ; + break; + default: + DP_ERR(dev, "unsupported event %d on handle=%llx\n", + e_code, roce_handle64); + } + } else { + switch (e_code) { + case QED_IWARP_EVENT_SRQ_LIMIT: + event.event = IB_EVENT_SRQ_LIMIT_REACHED; + event_type = EVENT_TYPE_SRQ; + break; + case QED_IWARP_EVENT_SRQ_EMPTY: + event.event = IB_EVENT_SRQ_ERR; + event_type = EVENT_TYPE_SRQ; + break; + default: DP_ERR(dev, "unsupported event %d on handle=%llx\n", e_code, roce_handle64); + } } - switch (event_type) { case EVENT_TYPE_CQ: cq = (struct qedr_cq *)(uintptr_t)roce_handle64; @@ -722,6 +760,25 @@ static void qedr_affiliated_event(void *context, u8 e_code, void *fw_handle) } DP_ERR(dev, "QP event %d on handle %p\n", e_code, qp); break; + case EVENT_TYPE_SRQ: + srq_id = (u16)roce_handle64; + spin_lock_irqsave(&dev->srqidr.idr_lock, flags); + srq = idr_find(&dev->srqidr.idr, srq_id); + if (srq) { + ibsrq = &srq->ibsrq; + if (ibsrq->event_handler) { + event.device = ibsrq->device; + event.element.srq = ibsrq; + ibsrq->event_handler(&event, + ibsrq->srq_context); + } + } else { + DP_NOTICE(dev, + "SRQ event with NULL pointer ibsrq. Handle=%llx\n", + roce_handle64); + } + spin_unlock_irqrestore(&dev->srqidr.idr_lock, flags); + DP_NOTICE(dev, "SRQ event %d on handle %p\n", e_code, srq); default: break; } diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 86d4511e0d75..a2d708dceb8d 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -58,6 +58,7 @@ #define QEDR_MSG_RQ " RQ" #define QEDR_MSG_SQ " SQ" #define QEDR_MSG_QP " QP" +#define QEDR_MSG_SRQ " SRQ" #define QEDR_MSG_GSI " GSI" #define QEDR_MSG_IWARP " IW" @@ -122,6 +123,11 @@ struct qedr_device_attr { #define QEDR_ENET_STATE_BIT (0) +struct qedr_idr { + spinlock_t idr_lock; /* Protect idr data-structure */ + struct idr idr; +}; + struct qedr_dev { struct ib_device ibdev; struct qed_dev *cdev; @@ -165,8 +171,8 @@ struct qedr_dev { struct qedr_cq *gsi_rqcq; struct qedr_qp *gsi_qp; enum qed_rdma_type rdma_type; - spinlock_t idr_lock; /* Protect qpidr data-structure */ - struct idr qpidr; + struct qedr_idr qpidr; + struct qedr_idr srqidr; struct workqueue_struct *iwarp_wq; u16 iwarp_max_mtu; @@ -337,6 +343,34 @@ struct qedr_qp_hwq_info { qed_chain_get_capacity(p_info->pbl) \ } while (0) +struct qedr_srq_hwq_info { + u32 max_sges; + u32 max_wr; + struct qed_chain pbl; + u64 p_phys_addr_tbl; + u32 wqe_prod; + u32 sge_prod; + u32 wr_prod_cnt; + u32 wr_cons_cnt; + u32 num_elems; + + u32 *virt_prod_pair_addr; + dma_addr_t phy_prod_pair_addr; +}; + +struct qedr_srq { + struct ib_srq ibsrq; + struct qedr_dev *dev; + + struct qedr_userq usrq; + struct qedr_srq_hwq_info hw_srq; + struct ib_umem *prod_umem; + u16 srq_id; + u32 srq_limit; + /* lock to protect srq recv post */ + spinlock_t lock; +}; + enum qedr_qp_err_bitmap { QEDR_QP_ERR_SQ_FULL = 1, QEDR_QP_ERR_RQ_FULL = 2, @@ -538,4 +572,9 @@ static inline struct qedr_mr *get_qedr_mr(struct ib_mr *ibmr) { return container_of(ibmr, struct qedr_mr, ibmr); } + +static inline struct qedr_srq *get_qedr_srq(struct ib_srq *ibsrq) +{ + return container_of(ibsrq, struct qedr_srq, ibsrq); +} #endif diff --git a/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h b/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h index 7e1f7021396a..228dd7d49622 100644 --- a/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h +++ b/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h @@ -161,12 +161,23 @@ struct rdma_rq_sge { #define RDMA_RQ_SGE_L_KEY_HI_SHIFT 29 }; +struct rdma_srq_wqe_header { + struct regpair wr_id; + u8 num_sges /* number of SGEs in WQE */; + u8 reserved2[7]; +}; + struct rdma_srq_sge { struct regpair addr; __le32 length; __le32 l_key; }; +union rdma_srq_elm { + struct rdma_srq_wqe_header header; + struct rdma_srq_sge sge; +}; + /* Rdma doorbell data for flags update */ struct rdma_pwm_flags_data { __le16 icid; /* internal CID */ diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c index 26dc374787f7..505fa3648762 100644 --- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c @@ -491,7 +491,7 @@ int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) int rc = 0; int i; - qp = idr_find(&dev->qpidr, conn_param->qpn); + qp = idr_find(&dev->qpidr.idr, conn_param->qpn); laddr = (struct sockaddr_in *)&cm_id->m_local_addr; raddr = (struct sockaddr_in *)&cm_id->m_remote_addr; @@ -679,7 +679,7 @@ int qedr_iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) DP_DEBUG(dev, QEDR_MSG_IWARP, "Accept on qpid=%d\n", conn_param->qpn); - qp = idr_find(&dev->qpidr, conn_param->qpn); + qp = idr_find(&dev->qpidr.idr, conn_param->qpn); if (!qp) { DP_ERR(dev, "Invalid QP number %d\n", conn_param->qpn); return -EINVAL; @@ -737,9 +737,9 @@ void qedr_iw_qp_rem_ref(struct ib_qp *ibqp) struct qedr_qp *qp = get_qedr_qp(ibqp); if (atomic_dec_and_test(&qp->refcnt)) { - spin_lock_irq(&qp->dev->idr_lock); - idr_remove(&qp->dev->qpidr, qp->qp_id); - spin_unlock_irq(&qp->dev->idr_lock); + spin_lock_irq(&qp->dev->qpidr.idr_lock); + idr_remove(&qp->dev->qpidr.idr, qp->qp_id); + spin_unlock_irq(&qp->dev->qpidr.idr_lock); kfree(qp); } } @@ -748,5 +748,5 @@ struct ib_qp *qedr_iw_get_qp(struct ib_device *ibdev, int qpn) { struct qedr_dev *dev = get_qedr_dev(ibdev); - return idr_find(&dev->qpidr, qpn); + return idr_find(&dev->qpidr.idr, qpn); } diff --git a/drivers/infiniband/hw/qedr/qedr_roce_cm.c b/drivers/infiniband/hw/qedr/qedr_roce_cm.c index 0f14e687bb91..85578887421b 100644 --- a/drivers/infiniband/hw/qedr/qedr_roce_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_roce_cm.c @@ -380,18 +380,17 @@ int qedr_destroy_gsi_qp(struct qedr_dev *dev) #define QEDR_GSI_QPN (1) static inline int qedr_gsi_build_header(struct qedr_dev *dev, struct qedr_qp *qp, - struct ib_send_wr *swr, + const struct ib_send_wr *swr, struct ib_ud_header *udh, int *roce_mode) { bool has_vlan = false, has_grh_ipv6 = true; struct rdma_ah_attr *ah_attr = &get_qedr_ah(ud_wr(swr)->ah)->attr; const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); - union ib_gid sgid; + const struct ib_gid_attr *sgid_attr = grh->sgid_attr; int send_size = 0; u16 vlan_id = 0; u16 ether_type; - struct ib_gid_attr sgid_attr; int rc; int ip_ver = 0; @@ -402,28 +401,16 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev, for (i = 0; i < swr->num_sge; ++i) send_size += swr->sg_list[i].length; - rc = ib_get_cached_gid(qp->ibqp.device, rdma_ah_get_port_num(ah_attr), - grh->sgid_index, &sgid, &sgid_attr); - if (rc) { - DP_ERR(dev, - "gsi post send: failed to get cached GID (port=%d, ix=%d)\n", - rdma_ah_get_port_num(ah_attr), - grh->sgid_index); - return rc; - } - - vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev); + vlan_id = rdma_vlan_dev_vlan_id(sgid_attr->ndev); if (vlan_id < VLAN_CFI_MASK) has_vlan = true; - dev_put(sgid_attr.ndev); - - has_udp = (sgid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP); + has_udp = (sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP); if (!has_udp) { /* RoCE v1 */ ether_type = ETH_P_IBOE; *roce_mode = ROCE_V1; - } else if (ipv6_addr_v4mapped((struct in6_addr *)&sgid)) { + } else if (ipv6_addr_v4mapped((struct in6_addr *)&sgid_attr->gid)) { /* RoCE v2 IPv4 */ ip_ver = 4; ether_type = ETH_P_IP; @@ -471,7 +458,7 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev, udh->grh.flow_label = grh->flow_label; udh->grh.hop_limit = grh->hop_limit; udh->grh.destination_gid = grh->dgid; - memcpy(&udh->grh.source_gid.raw, &sgid.raw, + memcpy(&udh->grh.source_gid.raw, sgid_attr->gid.raw, sizeof(udh->grh.source_gid.raw)); } else { /* IPv4 header */ @@ -482,7 +469,7 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev, udh->ip4.frag_off = htons(IP_DF); udh->ip4.ttl = grh->hop_limit; - ipv4_addr = qedr_get_ipv4_from_gid(sgid.raw); + ipv4_addr = qedr_get_ipv4_from_gid(sgid_attr->gid.raw); udh->ip4.saddr = ipv4_addr; ipv4_addr = qedr_get_ipv4_from_gid(grh->dgid.raw); udh->ip4.daddr = ipv4_addr; @@ -501,7 +488,7 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev, static inline int qedr_gsi_build_packet(struct qedr_dev *dev, struct qedr_qp *qp, - struct ib_send_wr *swr, + const struct ib_send_wr *swr, struct qed_roce_ll2_packet **p_packet) { u8 ud_header_buffer[QEDR_MAX_UD_HEADER_SIZE]; @@ -550,8 +537,8 @@ static inline int qedr_gsi_build_packet(struct qedr_dev *dev, return 0; } -int qedr_gsi_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +int qedr_gsi_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { struct qed_roce_ll2_packet *pkt = NULL; struct qedr_qp *qp = get_qedr_qp(ibqp); @@ -620,8 +607,8 @@ err: return rc; } -int qedr_gsi_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int qedr_gsi_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct qedr_dev *dev = get_qedr_dev(ibqp->device); struct qedr_qp *qp = get_qedr_qp(ibqp); diff --git a/drivers/infiniband/hw/qedr/qedr_roce_cm.h b/drivers/infiniband/hw/qedr/qedr_roce_cm.h index a55916323ea9..d46dcd3f6424 100644 --- a/drivers/infiniband/hw/qedr/qedr_roce_cm.h +++ b/drivers/infiniband/hw/qedr/qedr_roce_cm.h @@ -46,10 +46,10 @@ static inline u32 qedr_get_ipv4_from_gid(const u8 *gid) /* RDMA CM */ int qedr_gsi_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); -int qedr_gsi_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); -int qedr_gsi_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr); +int qedr_gsi_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); +int qedr_gsi_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr); struct ib_qp *qedr_create_gsi_qp(struct qedr_dev *dev, struct ib_qp_init_attr *attrs, struct qedr_qp *qp); diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index f07b8df96f43..8cc3df24e04e 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -51,6 +51,10 @@ #include <rdma/qedr-abi.h> #include "qedr_roce_cm.h" +#define QEDR_SRQ_WQE_ELEM_SIZE sizeof(union rdma_srq_elm) +#define RDMA_MAX_SGE_PER_SRQ (4) +#define RDMA_MAX_SRQ_WQE_SIZE (RDMA_MAX_SGE_PER_SRQ + 1) + #define DB_ADDR_SHIFT(addr) ((addr) << DB_PWM_ADDR_OFFSET_SHIFT) static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src, @@ -84,6 +88,19 @@ int qedr_iw_query_gid(struct ib_device *ibdev, u8 port, return 0; } +int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) +{ + struct qedr_dev *dev = get_qedr_dev(ibsrq->device); + struct qedr_device_attr *qattr = &dev->attr; + struct qedr_srq *srq = get_qedr_srq(ibsrq); + + srq_attr->srq_limit = srq->srq_limit; + srq_attr->max_wr = qattr->max_srq_wr; + srq_attr->max_sge = qattr->max_sge; + + return 0; +} + int qedr_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, struct ib_udata *udata) { @@ -112,7 +129,8 @@ int qedr_query_device(struct ib_device *ibdev, IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS; - attr->max_sge = qattr->max_sge; + attr->max_send_sge = qattr->max_sge; + attr->max_recv_sge = qattr->max_sge; attr->max_sge_rd = qattr->max_sge; attr->max_cq = qattr->max_cq; attr->max_cqe = qattr->max_cqe; @@ -224,7 +242,7 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr) attr->lmc = 0; attr->sm_lid = 0; attr->sm_sl = 0; - attr->port_cap_flags = IB_PORT_IP_BASED_GIDS; + attr->ip_gids = true; if (rdma_protocol_iwarp(&dev->ibdev, 1)) { attr->gid_tbl_len = 1; attr->pkey_tbl_len = 1; @@ -1075,27 +1093,19 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp, struct qed_rdma_modify_qp_in_params *qp_params) { + const struct ib_gid_attr *gid_attr; enum rdma_network_type nw_type; - struct ib_gid_attr gid_attr; const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); - union ib_gid gid; u32 ipv4_addr; - int rc = 0; int i; - rc = ib_get_cached_gid(ibqp->device, - rdma_ah_get_port_num(&attr->ah_attr), - grh->sgid_index, &gid, &gid_attr); - if (rc) - return rc; - - qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr.ndev); + gid_attr = grh->sgid_attr; + qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr->ndev); - dev_put(gid_attr.ndev); - nw_type = ib_gid_to_network_type(gid_attr.gid_type, &gid); + nw_type = rdma_gid_attr_network_type(gid_attr); switch (nw_type) { case RDMA_NETWORK_IPV6: - memcpy(&qp_params->sgid.bytes[0], &gid.raw[0], + memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0], sizeof(qp_params->sgid)); memcpy(&qp_params->dgid.bytes[0], &grh->dgid, @@ -1105,7 +1115,7 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp, QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1); break; case RDMA_NETWORK_IB: - memcpy(&qp_params->sgid.bytes[0], &gid.raw[0], + memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0], sizeof(qp_params->sgid)); memcpy(&qp_params->dgid.bytes[0], &grh->dgid, @@ -1115,7 +1125,7 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp, case RDMA_NETWORK_IPV4: memset(&qp_params->sgid, 0, sizeof(qp_params->sgid)); memset(&qp_params->dgid, 0, sizeof(qp_params->dgid)); - ipv4_addr = qedr_get_ipv4_from_gid(gid.raw); + ipv4_addr = qedr_get_ipv4_from_gid(gid_attr->gid.raw); qp_params->sgid.ipv4_addr = ipv4_addr; ipv4_addr = qedr_get_ipv4_from_gid(grh->dgid.raw); @@ -1189,6 +1199,21 @@ static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev, return 0; } +static int qedr_copy_srq_uresp(struct qedr_dev *dev, + struct qedr_srq *srq, struct ib_udata *udata) +{ + struct qedr_create_srq_uresp uresp = {}; + int rc; + + uresp.srq_id = srq->srq_id; + + rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + if (rc) + DP_ERR(dev, "create srq: problem copying data to user space\n"); + + return rc; +} + static void qedr_copy_rq_uresp(struct qedr_dev *dev, struct qedr_create_qp_uresp *uresp, struct qedr_qp *qp) @@ -1255,13 +1280,18 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev, qp->state = QED_ROCE_QP_STATE_RESET; qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false; qp->sq_cq = get_qedr_cq(attrs->send_cq); - qp->rq_cq = get_qedr_cq(attrs->recv_cq); qp->dev = dev; - qp->rq.max_sges = attrs->cap.max_recv_sge; - DP_DEBUG(dev, QEDR_MSG_QP, - "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n", - qp->rq.max_sges, qp->rq_cq->icid); + if (attrs->srq) { + qp->srq = get_qedr_srq(attrs->srq); + } else { + qp->rq_cq = get_qedr_cq(attrs->recv_cq); + qp->rq.max_sges = attrs->cap.max_recv_sge; + DP_DEBUG(dev, QEDR_MSG_QP, + "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n", + qp->rq.max_sges, qp->rq_cq->icid); + } + DP_DEBUG(dev, QEDR_MSG_QP, "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n", pd->pd_id, qp->qp_type, qp->max_inline_data, @@ -1276,9 +1306,303 @@ static void qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp) qp->sq.db = dev->db_addr + DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD); qp->sq.db_data.data.icid = qp->icid + 1; - qp->rq.db = dev->db_addr + - DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD); - qp->rq.db_data.data.icid = qp->icid; + if (!qp->srq) { + qp->rq.db = dev->db_addr + + DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD); + qp->rq.db_data.data.icid = qp->icid; + } +} + +static int qedr_check_srq_params(struct ib_pd *ibpd, struct qedr_dev *dev, + struct ib_srq_init_attr *attrs, + struct ib_udata *udata) +{ + struct qedr_device_attr *qattr = &dev->attr; + + if (attrs->attr.max_wr > qattr->max_srq_wr) { + DP_ERR(dev, + "create srq: unsupported srq_wr=0x%x requested (max_srq_wr=0x%x)\n", + attrs->attr.max_wr, qattr->max_srq_wr); + return -EINVAL; + } + + if (attrs->attr.max_sge > qattr->max_sge) { + DP_ERR(dev, + "create srq: unsupported sge=0x%x requested (max_srq_sge=0x%x)\n", + attrs->attr.max_sge, qattr->max_sge); + return -EINVAL; + } + + return 0; +} + +static void qedr_free_srq_user_params(struct qedr_srq *srq) +{ + qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl); + ib_umem_release(srq->usrq.umem); + ib_umem_release(srq->prod_umem); +} + +static void qedr_free_srq_kernel_params(struct qedr_srq *srq) +{ + struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq; + struct qedr_dev *dev = srq->dev; + + dev->ops->common->chain_free(dev->cdev, &hw_srq->pbl); + + dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers), + hw_srq->virt_prod_pair_addr, + hw_srq->phy_prod_pair_addr); +} + +static int qedr_init_srq_user_params(struct ib_ucontext *ib_ctx, + struct qedr_srq *srq, + struct qedr_create_srq_ureq *ureq, + int access, int dmasync) +{ + struct scatterlist *sg; + int rc; + + rc = qedr_init_user_queue(ib_ctx, srq->dev, &srq->usrq, ureq->srq_addr, + ureq->srq_len, access, dmasync, 1); + if (rc) + return rc; + + srq->prod_umem = ib_umem_get(ib_ctx, ureq->prod_pair_addr, + sizeof(struct rdma_srq_producers), + access, dmasync); + if (IS_ERR(srq->prod_umem)) { + qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl); + ib_umem_release(srq->usrq.umem); + DP_ERR(srq->dev, + "create srq: failed ib_umem_get for producer, got %ld\n", + PTR_ERR(srq->prod_umem)); + return PTR_ERR(srq->prod_umem); + } + + sg = srq->prod_umem->sg_head.sgl; + srq->hw_srq.phy_prod_pair_addr = sg_dma_address(sg); + + return 0; +} + +static int qedr_alloc_srq_kernel_params(struct qedr_srq *srq, + struct qedr_dev *dev, + struct ib_srq_init_attr *init_attr) +{ + struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq; + dma_addr_t phy_prod_pair_addr; + u32 num_elems; + void *va; + int rc; + + va = dma_alloc_coherent(&dev->pdev->dev, + sizeof(struct rdma_srq_producers), + &phy_prod_pair_addr, GFP_KERNEL); + if (!va) { + DP_ERR(dev, + "create srq: failed to allocate dma memory for producer\n"); + return -ENOMEM; + } + + hw_srq->phy_prod_pair_addr = phy_prod_pair_addr; + hw_srq->virt_prod_pair_addr = va; + + num_elems = init_attr->attr.max_wr * RDMA_MAX_SRQ_WQE_SIZE; + rc = dev->ops->common->chain_alloc(dev->cdev, + QED_CHAIN_USE_TO_CONSUME_PRODUCE, + QED_CHAIN_MODE_PBL, + QED_CHAIN_CNT_TYPE_U32, + num_elems, + QEDR_SRQ_WQE_ELEM_SIZE, + &hw_srq->pbl, NULL); + if (rc) + goto err0; + + hw_srq->num_elems = num_elems; + + return 0; + +err0: + dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers), + va, phy_prod_pair_addr); + return rc; +} + +static int qedr_idr_add(struct qedr_dev *dev, struct qedr_idr *qidr, + void *ptr, u32 id); +static void qedr_idr_remove(struct qedr_dev *dev, + struct qedr_idr *qidr, u32 id); + +struct ib_srq *qedr_create_srq(struct ib_pd *ibpd, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) +{ + struct qed_rdma_destroy_srq_in_params destroy_in_params; + struct qed_rdma_create_srq_in_params in_params = {}; + struct qedr_dev *dev = get_qedr_dev(ibpd->device); + struct qed_rdma_create_srq_out_params out_params; + struct qedr_pd *pd = get_qedr_pd(ibpd); + struct qedr_create_srq_ureq ureq = {}; + u64 pbl_base_addr, phy_prod_pair_addr; + struct ib_ucontext *ib_ctx = NULL; + struct qedr_srq_hwq_info *hw_srq; + struct qedr_ucontext *ctx = NULL; + u32 page_cnt, page_size; + struct qedr_srq *srq; + int rc = 0; + + DP_DEBUG(dev, QEDR_MSG_QP, + "create SRQ called from %s (pd %p)\n", + (udata) ? "User lib" : "kernel", pd); + + rc = qedr_check_srq_params(ibpd, dev, init_attr, udata); + if (rc) + return ERR_PTR(-EINVAL); + + srq = kzalloc(sizeof(*srq), GFP_KERNEL); + if (!srq) + return ERR_PTR(-ENOMEM); + + srq->dev = dev; + hw_srq = &srq->hw_srq; + spin_lock_init(&srq->lock); + + hw_srq->max_wr = init_attr->attr.max_wr; + hw_srq->max_sges = init_attr->attr.max_sge; + + if (udata && ibpd->uobject && ibpd->uobject->context) { + ib_ctx = ibpd->uobject->context; + ctx = get_qedr_ucontext(ib_ctx); + + if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) { + DP_ERR(dev, + "create srq: problem copying data from user space\n"); + goto err0; + } + + rc = qedr_init_srq_user_params(ib_ctx, srq, &ureq, 0, 0); + if (rc) + goto err0; + + page_cnt = srq->usrq.pbl_info.num_pbes; + pbl_base_addr = srq->usrq.pbl_tbl->pa; + phy_prod_pair_addr = hw_srq->phy_prod_pair_addr; + page_size = BIT(srq->usrq.umem->page_shift); + } else { + struct qed_chain *pbl; + + rc = qedr_alloc_srq_kernel_params(srq, dev, init_attr); + if (rc) + goto err0; + + pbl = &hw_srq->pbl; + page_cnt = qed_chain_get_page_cnt(pbl); + pbl_base_addr = qed_chain_get_pbl_phys(pbl); + phy_prod_pair_addr = hw_srq->phy_prod_pair_addr; + page_size = QED_CHAIN_PAGE_SIZE; + } + + in_params.pd_id = pd->pd_id; + in_params.pbl_base_addr = pbl_base_addr; + in_params.prod_pair_addr = phy_prod_pair_addr; + in_params.num_pages = page_cnt; + in_params.page_size = page_size; + + rc = dev->ops->rdma_create_srq(dev->rdma_ctx, &in_params, &out_params); + if (rc) + goto err1; + + srq->srq_id = out_params.srq_id; + + if (udata) { + rc = qedr_copy_srq_uresp(dev, srq, udata); + if (rc) + goto err2; + } + + rc = qedr_idr_add(dev, &dev->srqidr, srq, srq->srq_id); + if (rc) + goto err2; + + DP_DEBUG(dev, QEDR_MSG_SRQ, + "create srq: created srq with srq_id=0x%0x\n", srq->srq_id); + return &srq->ibsrq; + +err2: + destroy_in_params.srq_id = srq->srq_id; + + dev->ops->rdma_destroy_srq(dev->rdma_ctx, &destroy_in_params); +err1: + if (udata) + qedr_free_srq_user_params(srq); + else + qedr_free_srq_kernel_params(srq); +err0: + kfree(srq); + + return ERR_PTR(-EFAULT); +} + +int qedr_destroy_srq(struct ib_srq *ibsrq) +{ + struct qed_rdma_destroy_srq_in_params in_params = {}; + struct qedr_dev *dev = get_qedr_dev(ibsrq->device); + struct qedr_srq *srq = get_qedr_srq(ibsrq); + + qedr_idr_remove(dev, &dev->srqidr, srq->srq_id); + in_params.srq_id = srq->srq_id; + dev->ops->rdma_destroy_srq(dev->rdma_ctx, &in_params); + + if (ibsrq->pd->uobject) + qedr_free_srq_user_params(srq); + else + qedr_free_srq_kernel_params(srq); + + DP_DEBUG(dev, QEDR_MSG_SRQ, + "destroy srq: destroyed srq with srq_id=0x%0x\n", + srq->srq_id); + kfree(srq); + + return 0; +} + +int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata) +{ + struct qed_rdma_modify_srq_in_params in_params = {}; + struct qedr_dev *dev = get_qedr_dev(ibsrq->device); + struct qedr_srq *srq = get_qedr_srq(ibsrq); + int rc; + + if (attr_mask & IB_SRQ_MAX_WR) { + DP_ERR(dev, + "modify srq: invalid attribute mask=0x%x specified for %p\n", + attr_mask, srq); + return -EINVAL; + } + + if (attr_mask & IB_SRQ_LIMIT) { + if (attr->srq_limit >= srq->hw_srq.max_wr) { + DP_ERR(dev, + "modify srq: invalid srq_limit=0x%x (max_srq_limit=0x%x)\n", + attr->srq_limit, srq->hw_srq.max_wr); + return -EINVAL; + } + + in_params.srq_id = srq->srq_id; + in_params.wqe_limit = attr->srq_limit; + rc = dev->ops->rdma_modify_srq(dev->rdma_ctx, &in_params); + if (rc) + return rc; + } + + srq->srq_limit = attr->srq_limit; + + DP_DEBUG(dev, QEDR_MSG_SRQ, + "modify srq: modified srq with srq_id=0x%0x\n", srq->srq_id); + + return 0; } static inline void @@ -1299,9 +1623,17 @@ qedr_init_common_qp_in_params(struct qedr_dev *dev, params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi; params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid; params->stats_queue = 0; - params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid; params->srq_id = 0; params->use_srq = false; + + if (!qp->srq) { + params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid; + + } else { + params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid; + params->srq_id = qp->srq->srq_id; + params->use_srq = true; + } } static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp) @@ -1318,32 +1650,27 @@ static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp) qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len); } -static int qedr_idr_add(struct qedr_dev *dev, void *ptr, u32 id) +static int qedr_idr_add(struct qedr_dev *dev, struct qedr_idr *qidr, + void *ptr, u32 id) { int rc; - if (!rdma_protocol_iwarp(&dev->ibdev, 1)) - return 0; - idr_preload(GFP_KERNEL); - spin_lock_irq(&dev->idr_lock); + spin_lock_irq(&qidr->idr_lock); - rc = idr_alloc(&dev->qpidr, ptr, id, id + 1, GFP_ATOMIC); + rc = idr_alloc(&qidr->idr, ptr, id, id + 1, GFP_ATOMIC); - spin_unlock_irq(&dev->idr_lock); + spin_unlock_irq(&qidr->idr_lock); idr_preload_end(); return rc < 0 ? rc : 0; } -static void qedr_idr_remove(struct qedr_dev *dev, u32 id) +static void qedr_idr_remove(struct qedr_dev *dev, struct qedr_idr *qidr, u32 id) { - if (!rdma_protocol_iwarp(&dev->ibdev, 1)) - return; - - spin_lock_irq(&dev->idr_lock); - idr_remove(&dev->qpidr, id); - spin_unlock_irq(&dev->idr_lock); + spin_lock_irq(&qidr->idr_lock); + idr_remove(&qidr->idr, id); + spin_unlock_irq(&qidr->idr_lock); } static inline void @@ -1356,9 +1683,10 @@ qedr_iwarp_populate_user_qp(struct qedr_dev *dev, qedr_populate_pbls(dev, qp->usq.umem, qp->usq.pbl_tbl, &qp->usq.pbl_info, FW_PAGE_SHIFT); - - qp->urq.pbl_tbl->va = out_params->rq_pbl_virt; - qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys; + if (!qp->srq) { + qp->urq.pbl_tbl->va = out_params->rq_pbl_virt; + qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys; + } qedr_populate_pbls(dev, qp->urq.umem, qp->urq.pbl_tbl, &qp->urq.pbl_info, FW_PAGE_SHIFT); @@ -1404,11 +1732,13 @@ static int qedr_create_user_qp(struct qedr_dev *dev, if (rc) return rc; - /* RQ - read access only (0), dma sync not required (0) */ - rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq.rq_addr, - ureq.rq_len, 0, 0, alloc_and_init); - if (rc) - return rc; + if (!qp->srq) { + /* RQ - read access only (0), dma sync not required (0) */ + rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq.rq_addr, + ureq.rq_len, 0, 0, alloc_and_init); + if (rc) + return rc; + } memset(&in_params, 0, sizeof(in_params)); qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params); @@ -1416,8 +1746,10 @@ static int qedr_create_user_qp(struct qedr_dev *dev, in_params.qp_handle_hi = ureq.qp_handle_hi; in_params.sq_num_pages = qp->usq.pbl_info.num_pbes; in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa; - in_params.rq_num_pages = qp->urq.pbl_info.num_pbes; - in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa; + if (!qp->srq) { + in_params.rq_num_pages = qp->urq.pbl_info.num_pbes; + in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa; + } qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx, &in_params, &out_params); @@ -1679,16 +2011,13 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd, if (rc) return ERR_PTR(rc); - if (attrs->srq) - return ERR_PTR(-EINVAL); - DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n", udata ? "user library" : "kernel", attrs->event_handler, pd, get_qedr_cq(attrs->send_cq), get_qedr_cq(attrs->send_cq)->icid, get_qedr_cq(attrs->recv_cq), - get_qedr_cq(attrs->recv_cq)->icid); + attrs->recv_cq ? get_qedr_cq(attrs->recv_cq)->icid : 0); qp = kzalloc(sizeof(*qp), GFP_KERNEL); if (!qp) { @@ -1715,9 +2044,11 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd, qp->ibqp.qp_num = qp->qp_id; - rc = qedr_idr_add(dev, qp, qp->qp_id); - if (rc) - goto err; + if (rdma_protocol_iwarp(&dev->ibdev, 1)) { + rc = qedr_idr_add(dev, &dev->qpidr, qp, qp->qp_id); + if (rc) + goto err; + } return &qp->ibqp; @@ -2289,8 +2620,9 @@ int qedr_destroy_qp(struct ib_qp *ibqp) qedr_free_qp_resources(dev, qp); - if (atomic_dec_and_test(&qp->refcnt)) { - qedr_idr_remove(dev, qp->qp_id); + if (atomic_dec_and_test(&qp->refcnt) && + rdma_protocol_iwarp(&dev->ibdev, 1)) { + qedr_idr_remove(dev, &dev->qpidr, qp->qp_id); kfree(qp); } return rc; @@ -2305,7 +2637,7 @@ struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, if (!ah) return ERR_PTR(-ENOMEM); - ah->attr = *attr; + rdma_copy_ah_attr(&ah->attr, attr); return &ah->ibah; } @@ -2314,6 +2646,7 @@ int qedr_destroy_ah(struct ib_ah *ibah) { struct qedr_ah *ah = get_qedr_ah(ibah); + rdma_destroy_ah_attr(&ah->attr); kfree(ah); return 0; } @@ -2705,9 +3038,9 @@ static void swap_wqe_data64(u64 *p) static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev, struct qedr_qp *qp, u8 *wqe_size, - struct ib_send_wr *wr, - struct ib_send_wr **bad_wr, u8 *bits, - u8 bit) + const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr, + u8 *bits, u8 bit) { u32 data_size = sge_data_len(wr->sg_list, wr->num_sge); char *seg_prt, *wqe; @@ -2790,7 +3123,7 @@ static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev, } while (0) static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size, - struct ib_send_wr *wr) + const struct ib_send_wr *wr) { u32 data_size = 0; int i; @@ -2814,8 +3147,8 @@ static u32 qedr_prepare_sq_rdma_data(struct qedr_dev *dev, struct qedr_qp *qp, struct rdma_sq_rdma_wqe_1st *rwqe, struct rdma_sq_rdma_wqe_2nd *rwqe2, - struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) + const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey); DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr); @@ -2837,8 +3170,8 @@ static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev, struct qedr_qp *qp, struct rdma_sq_send_wqe_1st *swqe, struct rdma_sq_send_wqe_2st *swqe2, - struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) + const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { memset(swqe2, 0, sizeof(*swqe2)); if (wr->send_flags & IB_SEND_INLINE) { @@ -2854,7 +3187,7 @@ static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev, static int qedr_prepare_reg(struct qedr_qp *qp, struct rdma_sq_fmr_wqe_1st *fwqe1, - struct ib_reg_wr *wr) + const struct ib_reg_wr *wr) { struct qedr_mr *mr = get_qedr_mr(wr->mr); struct rdma_sq_fmr_wqe_2nd *fwqe2; @@ -2916,7 +3249,8 @@ static enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode) } } -static inline bool qedr_can_post_send(struct qedr_qp *qp, struct ib_send_wr *wr) +static inline bool qedr_can_post_send(struct qedr_qp *qp, + const struct ib_send_wr *wr) { int wq_is_full, err_wr, pbl_is_full; struct qedr_dev *dev = qp->dev; @@ -2953,8 +3287,8 @@ static inline bool qedr_can_post_send(struct qedr_qp *qp, struct ib_send_wr *wr) return true; } -static int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +static int __qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { struct qedr_dev *dev = get_qedr_dev(ibqp->device); struct qedr_qp *qp = get_qedr_qp(ibqp); @@ -3168,8 +3502,8 @@ static int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, return rc; } -int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +int qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { struct qedr_dev *dev = get_qedr_dev(ibqp->device); struct qedr_qp *qp = get_qedr_qp(ibqp); @@ -3234,8 +3568,104 @@ int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, return rc; } -int qedr_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +static u32 qedr_srq_elem_left(struct qedr_srq_hwq_info *hw_srq) +{ + u32 used; + + /* Calculate number of elements used based on producer + * count and consumer count and subtract it from max + * work request supported so that we get elements left. + */ + used = hw_srq->wr_prod_cnt - hw_srq->wr_cons_cnt; + + return hw_srq->max_wr - used; +} + +int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) +{ + struct qedr_srq *srq = get_qedr_srq(ibsrq); + struct qedr_srq_hwq_info *hw_srq; + struct qedr_dev *dev = srq->dev; + struct qed_chain *pbl; + unsigned long flags; + int status = 0; + u32 num_sge; + u32 offset; + + spin_lock_irqsave(&srq->lock, flags); + + hw_srq = &srq->hw_srq; + pbl = &srq->hw_srq.pbl; + while (wr) { + struct rdma_srq_wqe_header *hdr; + int i; + + if (!qedr_srq_elem_left(hw_srq) || + wr->num_sge > srq->hw_srq.max_sges) { + DP_ERR(dev, "Can't post WR (%d,%d) || (%d > %d)\n", + hw_srq->wr_prod_cnt, hw_srq->wr_cons_cnt, + wr->num_sge, srq->hw_srq.max_sges); + status = -ENOMEM; + *bad_wr = wr; + break; + } + + hdr = qed_chain_produce(pbl); + num_sge = wr->num_sge; + /* Set number of sge and work request id in header */ + SRQ_HDR_SET(hdr, wr->wr_id, num_sge); + + srq->hw_srq.wr_prod_cnt++; + hw_srq->wqe_prod++; + hw_srq->sge_prod++; + + DP_DEBUG(dev, QEDR_MSG_SRQ, + "SRQ WR: SGEs: %d with wr_id[%d] = %llx\n", + wr->num_sge, hw_srq->wqe_prod, wr->wr_id); + + for (i = 0; i < wr->num_sge; i++) { + struct rdma_srq_sge *srq_sge = qed_chain_produce(pbl); + + /* Set SGE length, lkey and address */ + SRQ_SGE_SET(srq_sge, wr->sg_list[i].addr, + wr->sg_list[i].length, wr->sg_list[i].lkey); + + DP_DEBUG(dev, QEDR_MSG_SRQ, + "[%d]: len %d key %x addr %x:%x\n", + i, srq_sge->length, srq_sge->l_key, + srq_sge->addr.hi, srq_sge->addr.lo); + hw_srq->sge_prod++; + } + + /* Flush WQE and SGE information before + * updating producer. + */ + wmb(); + + /* SRQ producer is 8 bytes. Need to update SGE producer index + * in first 4 bytes and need to update WQE producer in + * next 4 bytes. + */ + *srq->hw_srq.virt_prod_pair_addr = hw_srq->sge_prod; + offset = offsetof(struct rdma_srq_producers, wqe_prod); + *((u8 *)srq->hw_srq.virt_prod_pair_addr + offset) = + hw_srq->wqe_prod; + + /* Flush producer after updating it. */ + wmb(); + wr = wr->next; + } + + DP_DEBUG(dev, QEDR_MSG_SRQ, "POST: Elements in S-RQ: %d\n", + qed_chain_get_elem_left(pbl)); + spin_unlock_irqrestore(&srq->lock, flags); + + return status; +} + +int qedr_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct qedr_qp *qp = get_qedr_qp(ibqp); struct qedr_dev *dev = qp->dev; @@ -3625,6 +4055,31 @@ static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp, wc->wr_id = wr_id; } +static int process_resp_one_srq(struct qedr_dev *dev, struct qedr_qp *qp, + struct qedr_cq *cq, struct ib_wc *wc, + struct rdma_cqe_responder *resp) +{ + struct qedr_srq *srq = qp->srq; + u64 wr_id; + + wr_id = HILO_GEN(le32_to_cpu(resp->srq_wr_id.hi), + le32_to_cpu(resp->srq_wr_id.lo), u64); + + if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) { + wc->status = IB_WC_WR_FLUSH_ERR; + wc->vendor_err = 0; + wc->wr_id = wr_id; + wc->byte_len = 0; + wc->src_qp = qp->id; + wc->qp = &qp->ibqp; + wc->wr_id = wr_id; + } else { + __process_resp_one(dev, qp, cq, wc, resp, wr_id); + } + srq->hw_srq.wr_cons_cnt++; + + return 1; +} static int process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp, struct qedr_cq *cq, struct ib_wc *wc, struct rdma_cqe_responder *resp) @@ -3674,6 +4129,19 @@ static void try_consume_resp_cqe(struct qedr_cq *cq, struct qedr_qp *qp, } } +static int qedr_poll_cq_resp_srq(struct qedr_dev *dev, struct qedr_qp *qp, + struct qedr_cq *cq, int num_entries, + struct ib_wc *wc, + struct rdma_cqe_responder *resp) +{ + int cnt; + + cnt = process_resp_one_srq(dev, qp, cq, wc, resp); + consume_cqe(cq); + + return cnt; +} + static int qedr_poll_cq_resp(struct qedr_dev *dev, struct qedr_qp *qp, struct qedr_cq *cq, int num_entries, struct ib_wc *wc, struct rdma_cqe_responder *resp, @@ -3751,6 +4219,11 @@ int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) cnt = qedr_poll_cq_resp(dev, qp, cq, num_entries, wc, &cqe->resp, &update); break; + case RDMA_CQE_TYPE_RESPONDER_SRQ: + cnt = qedr_poll_cq_resp_srq(dev, qp, cq, num_entries, + wc, &cqe->resp); + update = 1; + break; case RDMA_CQE_TYPE_INVALID: default: DP_ERR(dev, "Error: invalid CQE type = %d\n", diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index 2c57e4c592a6..0b7d0124b16c 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -66,6 +66,15 @@ int qedr_query_qp(struct ib_qp *, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *); int qedr_destroy_qp(struct ib_qp *ibqp); +struct ib_srq *qedr_create_srq(struct ib_pd *ibpd, + struct ib_srq_init_attr *attr, + struct ib_udata *udata); +int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); +int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); +int qedr_destroy_srq(struct ib_srq *ibsrq); +int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_recv_wr); struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, struct ib_udata *udata); int qedr_destroy_ah(struct ib_ah *ibah); @@ -82,10 +91,10 @@ int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, struct ib_mr *qedr_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); int qedr_poll_cq(struct ib_cq *, int num_entries, struct ib_wc *wc); -int qedr_post_send(struct ib_qp *, struct ib_send_wr *, - struct ib_send_wr **bad_wr); -int qedr_post_recv(struct ib_qp *, struct ib_recv_wr *, - struct ib_recv_wr **bad_wr); +int qedr_post_send(struct ib_qp *, const struct ib_send_wr *, + const struct ib_send_wr **bad_wr); +int qedr_post_recv(struct ib_qp *, const struct ib_recv_wr *, + const struct ib_recv_wr **bad_wr); int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 14b4057a2b8f..41babbc0db58 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1489,7 +1489,8 @@ static void qib_fill_device_attr(struct qib_devdata *dd) rdi->dparms.props.max_mr_size = ~0ULL; rdi->dparms.props.max_qp = ib_qib_max_qps; rdi->dparms.props.max_qp_wr = ib_qib_max_qp_wrs; - rdi->dparms.props.max_sge = ib_qib_max_sges; + rdi->dparms.props.max_send_sge = ib_qib_max_sges; + rdi->dparms.props.max_recv_sge = ib_qib_max_sges; rdi->dparms.props.max_sge_rd = ib_qib_max_sges; rdi->dparms.props.max_cq = ib_qib_max_cqs; rdi->dparms.props.max_cqe = ib_qib_max_cqes; diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index f9a46768a19a..666613eef88f 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -78,9 +78,6 @@ struct qib_verbs_txreq; #define QIB_VENDOR_IPG cpu_to_be16(0xFFA0) -/* XXX Should be defined in ib_verbs.h enum ib_port_cap_flags */ -#define IB_PORT_OTHER_LOCAL_CHANGES_SUP (1 << 26) - #define IB_DEFAULT_GID_PREFIX cpu_to_be64(0xfe80000000000000ULL) /* Values for set/get portinfo VLCap OperationalVLs */ @@ -314,7 +311,7 @@ void qib_rc_rnr_retry(unsigned long arg); void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr); -int qib_post_ud_send(struct rvt_qp *qp, struct ib_send_wr *wr); +int qib_post_ud_send(struct rvt_qp *qp, const struct ib_send_wr *wr); void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr, int has_grh, void *data, u32 tlen, struct rvt_qp *qp); diff --git a/drivers/infiniband/hw/usnic/Kconfig b/drivers/infiniband/hw/usnic/Kconfig index 29ab11c34f3f..d1dae2af4ca9 100644 --- a/drivers/infiniband/hw/usnic/Kconfig +++ b/drivers/infiniband/hw/usnic/Kconfig @@ -1,10 +1,10 @@ config INFINIBAND_USNIC tristate "Verbs support for Cisco VIC" depends on NETDEVICES && ETHERNET && INET && PCI && INTEL_IOMMU + depends on INFINIBAND_USER_ACCESS select ENIC select NET_VENDOR_CISCO select PCI_IOV - select INFINIBAND_USER_ACCESS ---help--- This is a low-level driver for Cisco's Virtual Interface Cards (VICs), including the VIC 1240 and 1280 cards. diff --git a/drivers/infiniband/hw/usnic/usnic_fwd.c b/drivers/infiniband/hw/usnic/usnic_fwd.c index 995a26b65156..7875883621f4 100644 --- a/drivers/infiniband/hw/usnic/usnic_fwd.c +++ b/drivers/infiniband/hw/usnic/usnic_fwd.c @@ -92,8 +92,8 @@ struct usnic_fwd_dev *usnic_fwd_dev_alloc(struct pci_dev *pdev) ufdev->pdev = pdev; ufdev->netdev = pci_get_drvdata(pdev); spin_lock_init(&ufdev->lock); - strncpy(ufdev->name, netdev_name(ufdev->netdev), - sizeof(ufdev->name) - 1); + BUILD_BUG_ON(sizeof(ufdev->name) != sizeof(ufdev->netdev->name)); + strcpy(ufdev->name, ufdev->netdev->name); return ufdev; } diff --git a/drivers/infiniband/hw/usnic/usnic_fwd.h b/drivers/infiniband/hw/usnic/usnic_fwd.h index 0b2cc4e79707..f0b71d593da5 100644 --- a/drivers/infiniband/hw/usnic/usnic_fwd.h +++ b/drivers/infiniband/hw/usnic/usnic_fwd.h @@ -57,7 +57,7 @@ struct usnic_fwd_dev { char mac[ETH_ALEN]; unsigned int mtu; __be32 inaddr; - char name[IFNAMSIZ+1]; + char name[IFNAMSIZ]; }; struct usnic_fwd_flow { diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index a688a5669168..9973ac893635 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -666,7 +666,7 @@ int usnic_ib_dereg_mr(struct ib_mr *ibmr) usnic_dbg("va 0x%lx length 0x%zx\n", mr->umem->va, mr->umem->length); - usnic_uiom_reg_release(mr->umem, ibmr->pd->uobject->context->closing); + usnic_uiom_reg_release(mr->umem, ibmr->uobject->context); kfree(mr); return 0; } @@ -771,15 +771,15 @@ int usnic_ib_destroy_ah(struct ib_ah *ah) return -EINVAL; } -int usnic_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +int usnic_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { usnic_dbg("\n"); return -EINVAL; } -int usnic_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int usnic_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { usnic_dbg("\n"); return -EINVAL; diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h index 1fda94425116..2a2c9beb715f 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h @@ -80,10 +80,10 @@ struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd, struct ib_udata *udata); int usnic_ib_destroy_ah(struct ib_ah *ah); -int usnic_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr); -int usnic_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); +int usnic_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr); +int usnic_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); int usnic_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int usnic_ib_req_notify_cq(struct ib_cq *cq, diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index 4381c0a9a873..9dd39daa602b 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -41,6 +41,7 @@ #include <linux/workqueue.h> #include <linux/list.h> #include <linux/pci.h> +#include <rdma/ib_verbs.h> #include "usnic_log.h" #include "usnic_uiom.h" @@ -88,7 +89,7 @@ static void usnic_uiom_put_pages(struct list_head *chunk_list, int dirty) for_each_sg(chunk->page_list, sg, chunk->nents, i) { page = sg_page(sg); pa = sg_phys(sg); - if (dirty) + if (!PageDirty(page) && dirty) set_page_dirty_lock(page); put_page(page); usnic_dbg("pa: %pa\n", &pa); @@ -114,6 +115,16 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, dma_addr_t pa; unsigned int gup_flags; + /* + * If the combination of the addr and size requested for this memory + * region causes an integer overflow, return error. + */ + if (((addr + size) < addr) || PAGE_ALIGN(addr + size) < (addr + size)) + return -EINVAL; + + if (!size) + return -EINVAL; + if (!can_do_mlock()) return -EPERM; @@ -127,7 +138,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, down_write(¤t->mm->mmap_sem); - locked = npages + current->mm->locked_vm; + locked = npages + current->mm->pinned_vm; lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { @@ -143,7 +154,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, ret = 0; while (npages) { - ret = get_user_pages(cur_base, + ret = get_user_pages_longterm(cur_base, min_t(unsigned long, npages, PAGE_SIZE / sizeof(struct page *)), gup_flags, page_list, NULL); @@ -186,7 +197,7 @@ out: if (ret < 0) usnic_uiom_put_pages(chunk_list, 0); else - current->mm->locked_vm = locked; + current->mm->pinned_vm = locked; up_write(¤t->mm->mmap_sem); free_page((unsigned long) page_list); @@ -420,18 +431,22 @@ out_free_uiomr: return ERR_PTR(err); } -void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing) +void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, + struct ib_ucontext *ucontext) { + struct task_struct *task; struct mm_struct *mm; unsigned long diff; __usnic_uiom_reg_release(uiomr->pd, uiomr, 1); - mm = get_task_mm(current); - if (!mm) { - kfree(uiomr); - return; - } + task = get_pid_task(ucontext->tgid, PIDTYPE_PID); + if (!task) + goto out; + mm = get_task_mm(task); + put_task_struct(task); + if (!mm) + goto out; diff = PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT; @@ -443,7 +458,7 @@ void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing) * up here and not be able to take the mmap_sem. In that case * we defer the vm_locked accounting to the system workqueue. */ - if (closing) { + if (ucontext->closing) { if (!down_write_trylock(&mm->mmap_sem)) { INIT_WORK(&uiomr->work, usnic_uiom_reg_account); uiomr->mm = mm; @@ -455,9 +470,10 @@ void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing) } else down_write(&mm->mmap_sem); - current->mm->locked_vm -= diff; + mm->pinned_vm -= diff; up_write(&mm->mmap_sem); mmput(mm); +out: kfree(uiomr); } diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.h b/drivers/infiniband/hw/usnic/usnic_uiom.h index 431efe4143f4..8c096acff123 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.h +++ b/drivers/infiniband/hw/usnic/usnic_uiom.h @@ -39,6 +39,8 @@ #include "usnic_uiom_interval_tree.h" +struct ib_ucontext; + #define USNIC_UIOM_READ (1) #define USNIC_UIOM_WRITE (2) @@ -89,7 +91,8 @@ void usnic_uiom_free_dev_list(struct device **devs); struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd, unsigned long addr, size_t size, int access, int dmasync); -void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing); +void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, + struct ib_ucontext *ucontext); int usnic_uiom_init(char *drv_name); void usnic_uiom_fini(void); #endif /* USNIC_UIOM_H_ */ diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h index 44cb1cfba417..42b8685c997e 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h @@ -378,11 +378,6 @@ static inline enum ib_port_speed pvrdma_port_speed_to_ib( return (enum ib_port_speed)speed; } -static inline int pvrdma_qp_attr_mask_to_ib(int attr_mask) -{ - return attr_mask; -} - static inline int ib_qp_attr_mask_to_pvrdma(int attr_mask) { return attr_mask & PVRDMA_MASK(PVRDMA_QP_ATTR_MASK_MAX); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c index f95b97646c25..0f004c737620 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c @@ -278,19 +278,6 @@ int pvrdma_destroy_cq(struct ib_cq *cq) return ret; } -/** - * pvrdma_modify_cq - modify the CQ moderation parameters - * @ibcq: the CQ to modify - * @cq_count: number of CQEs that will trigger an event - * @cq_period: max period of time in usec before triggering an event - * - * @return: -EOPNOTSUPP as CQ resize is not supported. - */ -int pvrdma_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) -{ - return -EOPNOTSUPP; -} - static inline struct pvrdma_cqe *get_cqe(struct pvrdma_cq *cq, int i) { return (struct pvrdma_cqe *)pvrdma_page_dir_get_ptr( @@ -428,16 +415,3 @@ int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) /* Ensure we do not return errors from poll_cq */ return npolled; } - -/** - * pvrdma_resize_cq - resize CQ - * @ibcq: the completion queue - * @entries: CQ entries - * @udata: user data - * - * @return: -EOPNOTSUPP as CQ resize is not supported. - */ -int pvrdma_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) -{ - return -EOPNOTSUPP; -} diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 0be33a81bbe6..a5719899f49a 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -62,9 +62,7 @@ static DEFINE_MUTEX(pvrdma_device_list_lock); static LIST_HEAD(pvrdma_device_list); static struct workqueue_struct *event_wq; -static int pvrdma_add_gid(const union ib_gid *gid, - const struct ib_gid_attr *attr, - void **context); +static int pvrdma_add_gid(const struct ib_gid_attr *attr, void **context); static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context); static ssize_t show_hca(struct device *device, struct device_attribute *attr, @@ -216,8 +214,6 @@ static int pvrdma_register_device(struct pvrdma_dev *dev) dev->ib_dev.post_send = pvrdma_post_send; dev->ib_dev.post_recv = pvrdma_post_recv; dev->ib_dev.create_cq = pvrdma_create_cq; - dev->ib_dev.modify_cq = pvrdma_modify_cq; - dev->ib_dev.resize_cq = pvrdma_resize_cq; dev->ib_dev.destroy_cq = pvrdma_destroy_cq; dev->ib_dev.poll_cq = pvrdma_poll_cq; dev->ib_dev.req_notify_cq = pvrdma_req_notify_cq; @@ -261,7 +257,6 @@ static int pvrdma_register_device(struct pvrdma_dev *dev) dev->ib_dev.modify_srq = pvrdma_modify_srq; dev->ib_dev.query_srq = pvrdma_query_srq; dev->ib_dev.destroy_srq = pvrdma_destroy_srq; - dev->ib_dev.post_srq_recv = pvrdma_post_srq_recv; dev->srq_tbl = kcalloc(dev->dsr->caps.max_srq, sizeof(struct pvrdma_srq *), @@ -650,13 +645,11 @@ static int pvrdma_add_gid_at_index(struct pvrdma_dev *dev, return 0; } -static int pvrdma_add_gid(const union ib_gid *gid, - const struct ib_gid_attr *attr, - void **context) +static int pvrdma_add_gid(const struct ib_gid_attr *attr, void **context) { struct pvrdma_dev *dev = to_vdev(attr->device); - return pvrdma_add_gid_at_index(dev, gid, + return pvrdma_add_gid_at_index(dev, &attr->gid, ib_gid_type_to_pvrdma(attr->gid_type), attr->index); } @@ -699,8 +692,12 @@ static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context) } static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev, + struct net_device *ndev, unsigned long event) { + struct pci_dev *pdev_net; + unsigned int slot; + switch (event) { case NETDEV_REBOOT: case NETDEV_DOWN: @@ -718,6 +715,24 @@ static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev, else pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE); break; + case NETDEV_UNREGISTER: + dev_put(dev->netdev); + dev->netdev = NULL; + break; + case NETDEV_REGISTER: + /* vmxnet3 will have same bus, slot. But func will be 0 */ + slot = PCI_SLOT(dev->pdev->devfn); + pdev_net = pci_get_slot(dev->pdev->bus, + PCI_DEVFN(slot, 0)); + if ((dev->netdev == NULL) && + (pci_get_drvdata(pdev_net) == ndev)) { + /* this is our netdev */ + dev->netdev = ndev; + dev_hold(ndev); + } + pci_dev_put(pdev_net); + break; + default: dev_dbg(&dev->pdev->dev, "ignore netdevice event %ld on %s\n", event, dev->ib_dev.name); @@ -734,8 +749,11 @@ static void pvrdma_netdevice_event_work(struct work_struct *work) mutex_lock(&pvrdma_device_list_lock); list_for_each_entry(dev, &pvrdma_device_list, device_link) { - if (dev->netdev == netdev_work->event_netdev) { - pvrdma_netdevice_event_handle(dev, netdev_work->event); + if ((netdev_work->event == NETDEV_REGISTER) || + (dev->netdev == netdev_work->event_netdev)) { + pvrdma_netdevice_event_handle(dev, + netdev_work->event_netdev, + netdev_work->event); break; } } @@ -968,6 +986,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev, ret = -ENODEV; goto err_free_cq_ring; } + dev_hold(dev->netdev); dev_info(&pdev->dev, "paired device to %s\n", dev->netdev->name); @@ -1040,6 +1059,10 @@ err_free_intrs: pvrdma_free_irq(dev); pci_free_irq_vectors(pdev); err_free_cq_ring: + if (dev->netdev) { + dev_put(dev->netdev); + dev->netdev = NULL; + } pvrdma_page_dir_cleanup(dev, &dev->cq_pdir); err_free_async_ring: pvrdma_page_dir_cleanup(dev, &dev->async_pdir); @@ -1079,6 +1102,11 @@ static void pvrdma_pci_remove(struct pci_dev *pdev) flush_workqueue(event_wq); + if (dev->netdev) { + dev_put(dev->netdev); + dev->netdev = NULL; + } + /* Unregister ib device */ ib_unregister_device(&dev->ib_dev); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index eb5b1065ec08..60083c0363a5 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -599,7 +599,8 @@ static inline void *get_rq_wqe(struct pvrdma_qp *qp, unsigned int n) qp->rq.offset + n * qp->rq.wqe_size); } -static int set_reg_seg(struct pvrdma_sq_wqe_hdr *wqe_hdr, struct ib_reg_wr *wr) +static int set_reg_seg(struct pvrdma_sq_wqe_hdr *wqe_hdr, + const struct ib_reg_wr *wr) { struct pvrdma_user_mr *mr = to_vmr(wr->mr); @@ -623,8 +624,8 @@ static int set_reg_seg(struct pvrdma_sq_wqe_hdr *wqe_hdr, struct ib_reg_wr *wr) * * @return: 0 on success, otherwise errno returned. */ -int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +int pvrdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { struct pvrdma_qp *qp = to_vqp(ibqp); struct pvrdma_dev *dev = to_vdev(ibqp->device); @@ -827,8 +828,8 @@ out: * * @return: 0 on success, otherwise errno returned. */ -int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int pvrdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { struct pvrdma_dev *dev = to_vdev(ibqp->device); unsigned long flags; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c index af235967a9c2..dc0ce877c7a3 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c @@ -52,13 +52,6 @@ #include "pvrdma.h" -int pvrdma_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) -{ - /* No support for kernel clients. */ - return -EOPNOTSUPP; -} - /** * pvrdma_query_srq - query shared receive queue * @ibsrq: the shared receive queue to query diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c index a51463cd2f37..b65d10b0a875 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c @@ -82,7 +82,8 @@ int pvrdma_query_device(struct ib_device *ibdev, props->max_qp = dev->dsr->caps.max_qp; props->max_qp_wr = dev->dsr->caps.max_qp_wr; props->device_cap_flags = dev->dsr->caps.device_cap_flags; - props->max_sge = dev->dsr->caps.max_sge; + props->max_send_sge = dev->dsr->caps.max_sge; + props->max_recv_sge = dev->dsr->caps.max_sge; props->max_sge_rd = PVRDMA_GET_CAP(dev, dev->dsr->caps.max_sge, dev->dsr->caps.max_sge_rd); props->max_srq = dev->dsr->caps.max_srq; @@ -154,7 +155,8 @@ int pvrdma_query_port(struct ib_device *ibdev, u8 port, props->gid_tbl_len = resp->attrs.gid_tbl_len; props->port_cap_flags = pvrdma_port_cap_flags_to_ib(resp->attrs.port_cap_flags); - props->port_cap_flags |= IB_PORT_CM_SUP | IB_PORT_IP_BASED_GIDS; + props->port_cap_flags |= IB_PORT_CM_SUP; + props->ip_gids = true; props->max_msg_sz = resp->attrs.max_msg_sz; props->bad_pkey_cntr = resp->attrs.bad_pkey_cntr; props->qkey_viol_cntr = resp->attrs.qkey_viol_cntr; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h index b7b25728a7e5..b2e3ab50cb08 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h @@ -412,15 +412,10 @@ struct ib_mr *pvrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); int pvrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); -int pvrdma_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); -int pvrdma_resize_cq(struct ib_cq *ibcq, int entries, - struct ib_udata *udata); struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata); -int pvrdma_resize_cq(struct ib_cq *ibcq, int entries, - struct ib_udata *udata); int pvrdma_destroy_cq(struct ib_cq *cq); int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int pvrdma_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); @@ -435,8 +430,6 @@ int pvrdma_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int pvrdma_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); int pvrdma_destroy_srq(struct ib_srq *srq); -int pvrdma_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, @@ -446,9 +439,9 @@ int pvrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int pvrdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); int pvrdma_destroy_qp(struct ib_qp *qp); -int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr); -int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); +int pvrdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr); +int pvrdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); #endif /* __PVRDMA_VERBS_H__ */ |