From dd302ee41e6ed204f8d9534d511edc72b5ce5e53 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 13 Apr 2020 16:23:23 +0300 Subject: RDMA/cma: Limit the scope of rdma_is_consumer_reject function The function is local to cma.c, so let's limit its scope. Link: https://lore.kernel.org/r/20200413132323.930869-1-leon@kernel.org Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/rdma_cm.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/rdma') diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index 71f48cfdc24c..ea8e794785ed 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -389,14 +389,6 @@ __be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr); */ const char *__attribute_const__ rdma_reject_msg(struct rdma_cm_id *id, int reason); -/** - * rdma_is_consumer_reject - return true if the consumer rejected the connect - * request. - * @id: Communication identifier that received the REJECT event. - * @reason: Value returned in the REJECT event status field. - */ -bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason); - /** * rdma_consumer_reject_data - return the consumer reject private data and * length, if any. -- cgit v1.2.3 From fa5d010c5630b143b802e0477e87bba0656829cf Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 30 Apr 2020 22:21:42 +0300 Subject: RDMA: Group create AH arguments in struct Following patch adds additional argument to the create AH function, so it make sense to group ah_attr and flags arguments in struct. Link: https://lore.kernel.org/r/20200430192146.12863-13-maorg@mellanox.com Signed-off-by: Maor Gottlieb Acked-by: Devesh Sharma Acked-by: Gal Pressman Acked-by: Weihang Li Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/verbs.c | 5 ++++- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 8 +++++--- drivers/infiniband/hw/bnxt_re/ib_verbs.h | 2 +- drivers/infiniband/hw/efa/efa.h | 3 +-- drivers/infiniband/hw/efa/efa_verbs.c | 6 +++--- drivers/infiniband/hw/hns/hns_roce_ah.c | 5 +++-- drivers/infiniband/hw/hns/hns_roce_device.h | 4 ++-- drivers/infiniband/hw/mlx4/ah.c | 11 +++++++---- drivers/infiniband/hw/mlx4/mlx4_ib.h | 2 +- drivers/infiniband/hw/mlx5/ah.c | 5 +++-- drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 +- drivers/infiniband/hw/mthca/mthca_provider.c | 9 +++++---- drivers/infiniband/hw/ocrdma/ocrdma_ah.c | 3 ++- drivers/infiniband/hw/ocrdma/ocrdma_ah.h | 2 +- drivers/infiniband/hw/qedr/verbs.c | 4 ++-- drivers/infiniband/hw/qedr/verbs.h | 2 +- drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c | 5 +++-- drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h | 2 +- drivers/infiniband/sw/rdmavt/ah.c | 11 ++++++----- drivers/infiniband/sw/rdmavt/ah.h | 4 ++-- drivers/infiniband/sw/rxe/rxe_verbs.c | 9 +++++---- include/rdma/ib_verbs.h | 9 +++++++-- 22 files changed, 66 insertions(+), 47 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 3bfadd8effcc..86be8a54a2d6 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -502,6 +502,7 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, u32 flags, struct ib_udata *udata) { + struct rdma_ah_init_attr init_attr = {}; struct ib_device *device = pd->device; struct ib_ah *ah; int ret; @@ -521,8 +522,10 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, ah->pd = pd; ah->type = ah_attr->type; ah->sgid_attr = rdma_update_sgid_attr(ah_attr, NULL); + init_attr.ah_attr = ah_attr; + init_attr.flags = flags; - ret = device->ops.create_ah(ah, ah_attr, flags, udata); + ret = device->ops.create_ah(ah, &init_attr, udata); if (ret) { kfree(ah); return ERR_PTR(ret); diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index d98348e82422..5a7c090204c5 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -631,11 +631,12 @@ static u8 bnxt_re_stack_to_dev_nw_type(enum rdma_network_type ntype) return nw_type; } -int bnxt_re_create_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata) +int bnxt_re_create_ah(struct ib_ah *ib_ah, struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata) { struct ib_pd *ib_pd = ib_ah->pd; struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); + struct rdma_ah_attr *ah_attr = init_attr->ah_attr; const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); struct bnxt_re_dev *rdev = pd->rdev; const struct ib_gid_attr *sgid_attr; @@ -673,7 +674,8 @@ int bnxt_re_create_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr, memcpy(ah->qplib_ah.dmac, ah_attr->roce.dmac, ETH_ALEN); rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah, - !(flags & RDMA_CREATE_AH_SLEEPABLE)); + !(init_attr->flags & + RDMA_CREATE_AH_SLEEPABLE)); if (rc) { ibdev_err(&rdev->ibdev, "Failed to allocate HW AH"); return rc; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index 18dd46f46cf4..204c0849ba28 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -170,7 +170,7 @@ enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev, u8 port_num); int bnxt_re_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); void bnxt_re_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); -int bnxt_re_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags, +int bnxt_re_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); int bnxt_re_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int bnxt_re_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h index aa7396a1588a..45d519edb4c3 100644 --- a/drivers/infiniband/hw/efa/efa.h +++ b/drivers/infiniband/hw/efa/efa.h @@ -153,8 +153,7 @@ int efa_mmap(struct ib_ucontext *ibucontext, struct vm_area_struct *vma); void efa_mmap_free(struct rdma_user_mmap_entry *rdma_entry); int efa_create_ah(struct ib_ah *ibah, - struct rdma_ah_attr *ah_attr, - u32 flags, + struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); void efa_destroy_ah(struct ib_ah *ibah, u32 flags); int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 5c57098a4aee..454b01b21e6a 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -1639,10 +1639,10 @@ static int efa_ah_destroy(struct efa_dev *dev, struct efa_ah *ah) } int efa_create_ah(struct ib_ah *ibah, - struct rdma_ah_attr *ah_attr, - u32 flags, + struct rdma_ah_init_attr *init_attr, struct ib_udata *udata) { + struct rdma_ah_attr *ah_attr = init_attr->ah_attr; struct efa_dev *dev = to_edev(ibah->device); struct efa_com_create_ah_params params = {}; struct efa_ibv_create_ah_resp resp = {}; @@ -1650,7 +1650,7 @@ int efa_create_ah(struct ib_ah *ibah, struct efa_ah *ah = to_eah(ibah); int err; - if (!(flags & RDMA_CREATE_AH_SLEEPABLE)) { + if (!(init_attr->flags & RDMA_CREATE_AH_SLEEPABLE)) { ibdev_dbg(&dev->ibdev, "Create address handle is not supported in atomic context\n"); err = -EOPNOTSUPP; diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index 8a522e14ef62..5b2f9314edd3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -39,13 +39,14 @@ #define HNS_ROCE_VLAN_SL_BIT_MASK 7 #define HNS_ROCE_VLAN_SL_SHIFT 13 -int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata) +int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibah->device); const struct ib_gid_attr *gid_attr; struct device *dev = hr_dev->dev; struct hns_roce_ah *ah = to_hr_ah(ibah); + struct rdma_ah_attr *ah_attr = init_attr->ah_attr; const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); u16 vlan_id = 0xffff; bool vlan_en = false; diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index ecbfeb6dbdd4..e1032cec2b12 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1208,8 +1208,8 @@ void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap, unsigned long obj, int cnt, int rr); -int hns_roce_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata); +int hns_roce_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata); int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); void hns_roce_destroy_ah(struct ib_ah *ah, u32 flags); diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 02a169f8027b..5f8f8d5c0ce0 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -141,10 +141,11 @@ static int create_iboe_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr) return 0; } -int mlx4_ib_create_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata) - +int mlx4_ib_create_ah(struct ib_ah *ib_ah, struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata) { + struct rdma_ah_attr *ah_attr = init_attr->ah_attr; + if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) { if (!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) return -EINVAL; @@ -167,12 +168,14 @@ int mlx4_ib_create_ah_slave(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, int slave_sgid_index, u8 *s_mac, u16 vlan_tag) { struct rdma_ah_attr slave_attr = *ah_attr; + struct rdma_ah_init_attr init_attr = {}; struct mlx4_ib_ah *mah = to_mah(ah); int ret; slave_attr.grh.sgid_attr = NULL; slave_attr.grh.sgid_index = slave_sgid_index; - ret = mlx4_ib_create_ah(ah, &slave_attr, 0, NULL); + init_attr.ah_attr = &slave_attr; + ret = mlx4_ib_create_ah(ah, &init_attr, NULL); if (ret) return ret; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index d188573187fa..182a237b87f7 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -752,7 +752,7 @@ int mlx4_ib_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); -int mlx4_ib_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags, +int mlx4_ib_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); int mlx4_ib_create_ah_slave(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, int slave_sgid_index, u8 *s_mac, u16 vlan_tag); diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c index 80642dd359bc..9b59348d51b5 100644 --- a/drivers/infiniband/hw/mlx5/ah.c +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -68,10 +68,11 @@ static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah, } } -int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata) +int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata) { + struct rdma_ah_attr *ah_attr = init_attr->ah_attr; struct mlx5_ib_ah *ah = to_mah(ibah); struct mlx5_ib_dev *dev = to_mdev(ibah->device); enum rdma_ah_attr_type ah_type = ah_attr->type; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index df375cb4efbb..7dffc87601eb 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1153,7 +1153,7 @@ void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db) void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index); -int mlx5_ib_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags, +int mlx5_ib_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); void mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags); diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 69a3e4f62fb1..bc3e3d741ca3 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -388,14 +388,15 @@ static void mthca_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) mthca_pd_free(to_mdev(pd->device), to_mpd(pd)); } -static int mthca_ah_create(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata) +static int mthca_ah_create(struct ib_ah *ibah, + struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata) { struct mthca_ah *ah = to_mah(ibah); - return mthca_create_ah(to_mdev(ibah->device), to_mpd(ibah->pd), ah_attr, - ah); + return mthca_create_ah(to_mdev(ibah->device), to_mpd(ibah->pd), + init_attr->ah_attr, ah); } static void mthca_ah_destroy(struct ib_ah *ah, u32 flags) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index 2b7f00ac41b0..6eea02b18968 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -155,7 +155,7 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, return status; } -int ocrdma_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, u32 flags, +int ocrdma_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata) { u32 *ahid_addr; @@ -165,6 +165,7 @@ int ocrdma_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, u32 flags, u16 vlan_tag = 0xffff; const struct ib_gid_attr *sgid_attr; struct ocrdma_pd *pd = get_ocrdma_pd(ibah->pd); + struct rdma_ah_attr *attr = init_attr->ah_attr; struct ocrdma_dev *dev = get_ocrdma_dev(ibah->device); if ((attr->type != RDMA_AH_ATTR_TYPE_ROCE) || diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h index 9780afcde780..8b73b3489f3a 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h @@ -51,7 +51,7 @@ enum { OCRDMA_AH_L3_TYPE_SHIFT = 0x1D /* 29 bits */ }; -int ocrdma_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags, +int ocrdma_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); void ocrdma_destroy_ah(struct ib_ah *ah, u32 flags); int ocrdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index a5bd3adaf90a..d6b94a713573 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -2750,12 +2750,12 @@ int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) return 0; } -int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, u32 flags, +int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata) { struct qedr_ah *ah = get_qedr_ah(ibah); - rdma_copy_ah_attr(&ah->attr, attr); + rdma_copy_ah_attr(&ah->attr, init_attr->ah_attr); return 0; } diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index 18027844eb87..5e02387e068d 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -70,7 +70,7 @@ int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); void qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_recv_wr); -int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, u32 flags, +int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); void qedr_destroy_ah(struct ib_ah *ibah, u32 flags); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c index faf7ecd7b3fa..ccbded2d26ce 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c @@ -509,9 +509,10 @@ void pvrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) * * @return: 0 on success, otherwise errno. */ -int pvrdma_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata) +int pvrdma_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata) { + struct rdma_ah_attr *ah_attr = init_attr->ah_attr; struct pvrdma_dev *dev = to_vdev(ibah->device); struct pvrdma_ah *ah = to_vah(ibah); const struct ib_global_route *grh; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h index e4a48f5c0c85..267702226f10 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h @@ -414,7 +414,7 @@ int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, void pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int pvrdma_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); -int pvrdma_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags, +int pvrdma_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); void pvrdma_destroy_ah(struct ib_ah *ah, u32 flags); diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index ee02c6176007..40480add7dd3 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -98,14 +98,14 @@ EXPORT_SYMBOL(rvt_check_ah); * * Return: 0 on success */ -int rvt_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, - u32 create_flags, struct ib_udata *udata) +int rvt_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata) { struct rvt_ah *ah = ibah_to_rvtah(ibah); struct rvt_dev_info *dev = ib_to_rvt(ibah->device); unsigned long flags; - if (rvt_check_ah(ibah->device, ah_attr)) + if (rvt_check_ah(ibah->device, init_attr->ah_attr)) return -EINVAL; spin_lock_irqsave(&dev->n_ahs_lock, flags); @@ -117,10 +117,11 @@ int rvt_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, dev->n_ahs_allocated++; spin_unlock_irqrestore(&dev->n_ahs_lock, flags); - rdma_copy_ah_attr(&ah->attr, ah_attr); + rdma_copy_ah_attr(&ah->attr, init_attr->ah_attr); if (dev->driver_f.notify_new_ah) - dev->driver_f.notify_new_ah(ibah->device, ah_attr, ah); + dev->driver_f.notify_new_ah(ibah->device, + init_attr->ah_attr, ah); return 0; } diff --git a/drivers/infiniband/sw/rdmavt/ah.h b/drivers/infiniband/sw/rdmavt/ah.h index bbb4d3bdec4e..40b7123fec76 100644 --- a/drivers/infiniband/sw/rdmavt/ah.h +++ b/drivers/infiniband/sw/rdmavt/ah.h @@ -50,8 +50,8 @@ #include -int rvt_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, - u32 create_flags, struct ib_udata *udata); +int rvt_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata); void rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags); int rvt_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); int rvt_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 9dd4bd7aea92..b8a22af724e8 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -195,15 +195,16 @@ static void rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) rxe_drop_ref(pd); } -static int rxe_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, - u32 flags, struct ib_udata *udata) +static int rxe_create_ah(struct ib_ah *ibah, + struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata) { int err; struct rxe_dev *rxe = to_rdev(ibah->device); struct rxe_ah *ah = to_rah(ibah); - err = rxe_av_chk_attr(rxe, attr); + err = rxe_av_chk_attr(rxe, init_attr->ah_attr); if (err) return err; @@ -211,7 +212,7 @@ static int rxe_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, if (err) return err; - rxe_init_av(attr, &ah->av); + rxe_init_av(init_attr->ah_attr, &ah->av); return 0; } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index bbc5cfb57cd2..20ea26810349 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -880,6 +880,11 @@ struct ib_mr_status { */ __attribute_const__ enum ib_rate mult_to_ib_rate(int mult); +struct rdma_ah_init_attr { + struct rdma_ah_attr *ah_attr; + u32 flags; +}; + enum rdma_ah_attr_type { RDMA_AH_ATTR_TYPE_UNDEFINED, RDMA_AH_ATTR_TYPE_IB, @@ -2403,8 +2408,8 @@ struct ib_device_ops { void (*disassociate_ucontext)(struct ib_ucontext *ibcontext); int (*alloc_pd)(struct ib_pd *pd, struct ib_udata *udata); void (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata); - int (*create_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata); + int (*create_ah)(struct ib_ah *ah, struct rdma_ah_init_attr *attr, + struct ib_udata *udata); int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); void (*destroy_ah)(struct ib_ah *ah, u32 flags); -- cgit v1.2.3 From bd3920eac133103f0d4aa5fc62290e6df9a6c6da Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 30 Apr 2020 22:21:43 +0300 Subject: RDMA/core: Add LAG functionality Add support to get the RoCE LAG xmit slave by building skb of the RoCE packet and call to master_get_xmit_slave. If driver wants to get the slave assume all slaves are available, then need to set RDMA_LAG_FLAGS_HASH_ALL_SLAVES in flags. Link: https://lore.kernel.org/r/20200430192146.12863-14-maorg@mellanox.com Signed-off-by: Maor Gottlieb Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/Makefile | 2 +- drivers/infiniband/core/lag.c | 136 +++++++++++++++++++++++++++++++++++++++ include/rdma/ib_verbs.h | 1 + include/rdma/lag.h | 23 +++++++ 4 files changed, 161 insertions(+), 1 deletion(-) create mode 100644 drivers/infiniband/core/lag.c create mode 100644 include/rdma/lag.h (limited to 'include/rdma') diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index d1b14887960e..870f0fcd54d5 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -12,7 +12,7 @@ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \ roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \ multicast.o mad.o smi.o agent.o mad_rmpp.o \ nldev.o restrack.o counters.o ib_core_uverbs.o \ - trace.o + trace.o lag.o ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o diff --git a/drivers/infiniband/core/lag.c b/drivers/infiniband/core/lag.c new file mode 100644 index 000000000000..a29533626a7c --- /dev/null +++ b/drivers/infiniband/core/lag.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2020 Mellanox Technologies. All rights reserved. + */ + +#include +#include +#include + +static struct sk_buff *rdma_build_skb(struct ib_device *device, + struct net_device *netdev, + struct rdma_ah_attr *ah_attr, + gfp_t flags) +{ + struct ipv6hdr *ip6h; + struct sk_buff *skb; + struct ethhdr *eth; + struct iphdr *iph; + struct udphdr *uh; + u8 smac[ETH_ALEN]; + bool is_ipv4; + int hdr_len; + + is_ipv4 = ipv6_addr_v4mapped((struct in6_addr *)ah_attr->grh.dgid.raw); + hdr_len = ETH_HLEN + sizeof(struct udphdr) + LL_RESERVED_SPACE(netdev); + hdr_len += is_ipv4 ? sizeof(struct iphdr) : sizeof(struct ipv6hdr); + + skb = alloc_skb(hdr_len, flags); + if (!skb) + return NULL; + + skb->dev = netdev; + skb_reserve(skb, hdr_len); + skb_push(skb, sizeof(struct udphdr)); + skb_reset_transport_header(skb); + uh = udp_hdr(skb); + uh->source = htons(0xC000); + uh->dest = htons(ROCE_V2_UDP_DPORT); + uh->len = htons(sizeof(struct udphdr)); + + if (is_ipv4) { + skb_push(skb, sizeof(struct iphdr)); + skb_reset_network_header(skb); + iph = ip_hdr(skb); + iph->frag_off = 0; + iph->version = 4; + iph->protocol = IPPROTO_UDP; + iph->ihl = 0x5; + iph->tot_len = htons(sizeof(struct udphdr) + sizeof(struct + iphdr)); + memcpy(&iph->saddr, ah_attr->grh.sgid_attr->gid.raw + 12, + sizeof(struct in_addr)); + memcpy(&iph->daddr, ah_attr->grh.dgid.raw + 12, + sizeof(struct in_addr)); + } else { + skb_push(skb, sizeof(struct ipv6hdr)); + skb_reset_network_header(skb); + ip6h = ipv6_hdr(skb); + ip6h->version = 6; + ip6h->nexthdr = IPPROTO_UDP; + memcpy(&ip6h->flow_lbl, &ah_attr->grh.flow_label, + sizeof(*ip6h->flow_lbl)); + memcpy(&ip6h->saddr, ah_attr->grh.sgid_attr->gid.raw, + sizeof(struct in6_addr)); + memcpy(&ip6h->daddr, ah_attr->grh.dgid.raw, + sizeof(struct in6_addr)); + } + + skb_push(skb, sizeof(struct ethhdr)); + skb_reset_mac_header(skb); + eth = eth_hdr(skb); + skb->protocol = eth->h_proto = htons(is_ipv4 ? ETH_P_IP : ETH_P_IPV6); + rdma_read_gid_l2_fields(ah_attr->grh.sgid_attr, NULL, smac); + memcpy(eth->h_source, smac, ETH_ALEN); + memcpy(eth->h_dest, ah_attr->roce.dmac, ETH_ALEN); + + return skb; +} + +static struct net_device *rdma_get_xmit_slave_udp(struct ib_device *device, + struct net_device *master, + struct rdma_ah_attr *ah_attr, + gfp_t flags) +{ + struct net_device *slave; + struct sk_buff *skb; + + skb = rdma_build_skb(device, master, ah_attr, flags); + if (!skb) + return ERR_PTR(-ENOMEM); + + rcu_read_lock(); + slave = netdev_get_xmit_slave(master, skb, + !!(device->lag_flags & + RDMA_LAG_FLAGS_HASH_ALL_SLAVES)); + if (slave) + dev_hold(slave); + rcu_read_unlock(); + kfree_skb(skb); + return slave; +} + +void rdma_lag_put_ah_roce_slave(struct net_device *xmit_slave) +{ + if (xmit_slave) + dev_put(xmit_slave); +} + +struct net_device *rdma_lag_get_ah_roce_slave(struct ib_device *device, + struct rdma_ah_attr *ah_attr, + gfp_t flags) +{ + struct net_device *slave = NULL; + struct net_device *master; + + if (!(ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE && + ah_attr->grh.sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)) + return NULL; + + rcu_read_lock(); + master = rdma_read_gid_attr_ndev_rcu(ah_attr->grh.sgid_attr); + if (IS_ERR(master)) { + rcu_read_unlock(); + return master; + } + dev_hold(master); + rcu_read_unlock(); + + if (!netif_is_bond_master(master)) + goto put; + + slave = rdma_get_xmit_slave_udp(device, master, ah_attr, flags); +put: + dev_put(master); + return slave; +} diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 20ea26810349..e6c18ec0365a 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2714,6 +2714,7 @@ struct ib_device { /* Used by iWarp CM */ char iw_ifname[IFNAMSIZ]; u32 iw_driver_flags; + u32 lag_flags; }; struct ib_client_nl_info; diff --git a/include/rdma/lag.h b/include/rdma/lag.h new file mode 100644 index 000000000000..7c06ec9b2eef --- /dev/null +++ b/include/rdma/lag.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2020 Mellanox Technologies. All rights reserved. + */ + +#ifndef _RDMA_LAG_H_ +#define _RDMA_LAG_H_ + +#include + +struct ib_device; +struct rdma_ah_attr; + +enum rdma_lag_flags { + RDMA_LAG_FLAGS_HASH_ALL_SLAVES = 1 << 0 +}; + +void rdma_lag_put_ah_roce_slave(struct net_device *xmit_slave); +struct net_device *rdma_lag_get_ah_roce_slave(struct ib_device *device, + struct rdma_ah_attr *ah_attr, + gfp_t flags); + +#endif /* _RDMA_LAG_H_ */ -- cgit v1.2.3 From 51aab12631dd7700385d275846ca49dc0b8c2124 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 30 Apr 2020 22:21:44 +0300 Subject: RDMA/core: Get xmit slave for LAG Add a call to rdma_lag_get_ah_roce_slave() when the address handle is created. Lower driver can use it to select the QP's affinity port. Link: https://lore.kernel.org/r/20200430192146.12863-15-maorg@mellanox.com Signed-off-by: Maor Gottlieb Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/verbs.c | 61 +++++++++++++++++++++++++++++------------ include/rdma/ib_verbs.h | 2 ++ 2 files changed, 45 insertions(+), 18 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 86be8a54a2d6..bf0249f76ae9 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -50,6 +50,7 @@ #include #include #include +#include #include "core_priv.h" #include @@ -500,7 +501,8 @@ rdma_update_sgid_attr(struct rdma_ah_attr *ah_attr, static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, u32 flags, - struct ib_udata *udata) + struct ib_udata *udata, + struct net_device *xmit_slave) { struct rdma_ah_init_attr init_attr = {}; struct ib_device *device = pd->device; @@ -524,6 +526,7 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, ah->sgid_attr = rdma_update_sgid_attr(ah_attr, NULL); init_attr.ah_attr = ah_attr; init_attr.flags = flags; + init_attr.xmit_slave = xmit_slave; ret = device->ops.create_ah(ah, &init_attr, udata); if (ret) { @@ -550,15 +553,22 @@ struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, u32 flags) { const struct ib_gid_attr *old_sgid_attr; + struct net_device *slave; struct ib_ah *ah; int ret; ret = rdma_fill_sgid_attr(pd->device, ah_attr, &old_sgid_attr); if (ret) return ERR_PTR(ret); - - ah = _rdma_create_ah(pd, ah_attr, flags, NULL); - + slave = rdma_lag_get_ah_roce_slave(pd->device, ah_attr, + (flags & RDMA_CREATE_AH_SLEEPABLE) ? + GFP_KERNEL : GFP_ATOMIC); + if (IS_ERR(slave)) { + rdma_unfill_sgid_attr(ah_attr, old_sgid_attr); + return (void *)slave; + } + ah = _rdma_create_ah(pd, ah_attr, flags, NULL, slave); + rdma_lag_put_ah_roce_slave(slave); rdma_unfill_sgid_attr(ah_attr, old_sgid_attr); return ah; } @@ -597,7 +607,8 @@ struct ib_ah *rdma_create_user_ah(struct ib_pd *pd, } } - ah = _rdma_create_ah(pd, ah_attr, RDMA_CREATE_AH_SLEEPABLE, udata); + ah = _rdma_create_ah(pd, ah_attr, RDMA_CREATE_AH_SLEEPABLE, + udata, NULL); out: rdma_unfill_sgid_attr(ah_attr, old_sgid_attr); @@ -1636,11 +1647,35 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, const struct ib_gid_attr *old_sgid_attr_alt_av; int ret; + attr->xmit_slave = NULL; if (attr_mask & IB_QP_AV) { ret = rdma_fill_sgid_attr(qp->device, &attr->ah_attr, &old_sgid_attr_av); if (ret) return ret; + + if (attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE && + is_qp_type_connected(qp)) { + struct net_device *slave; + + /* + * If the user provided the qp_attr then we have to + * resolve it. Kerne users have to provide already + * resolved rdma_ah_attr's. + */ + if (udata) { + ret = ib_resolve_eth_dmac(qp->device, + &attr->ah_attr); + if (ret) + goto out_av; + } + slave = rdma_lag_get_ah_roce_slave(qp->device, + &attr->ah_attr, + GFP_KERNEL); + if (IS_ERR(slave)) + goto out_av; + attr->xmit_slave = slave; + } } if (attr_mask & IB_QP_ALT_PATH) { /* @@ -1667,18 +1702,6 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, } } - /* - * If the user provided the qp_attr then we have to resolve it. Kernel - * users have to provide already resolved rdma_ah_attr's - */ - if (udata && (attr_mask & IB_QP_AV) && - attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE && - is_qp_type_connected(qp)) { - ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr); - if (ret) - goto out; - } - if (rdma_ib_or_roce(qp->device, port)) { if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) { dev_warn(&qp->device->dev, @@ -1720,8 +1743,10 @@ out: if (attr_mask & IB_QP_ALT_PATH) rdma_unfill_sgid_attr(&attr->alt_ah_attr, old_sgid_attr_alt_av); out_av: - if (attr_mask & IB_QP_AV) + if (attr_mask & IB_QP_AV) { + rdma_lag_put_ah_roce_slave(attr->xmit_slave); rdma_unfill_sgid_attr(&attr->ah_attr, old_sgid_attr_av); + } return ret; } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index e6c18ec0365a..8d29f2f79da8 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -883,6 +883,7 @@ __attribute_const__ enum ib_rate mult_to_ib_rate(int mult); struct rdma_ah_init_attr { struct rdma_ah_attr *ah_attr; u32 flags; + struct net_device *xmit_slave; }; enum rdma_ah_attr_type { @@ -1272,6 +1273,7 @@ struct ib_qp_attr { u8 alt_port_num; u8 alt_timeout; u32 rate_limit; + struct net_device *xmit_slave; }; enum ib_wr_opcode { -- cgit v1.2.3 From 04c349a96506961b1b31e8d03e784fe3c5413e0b Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Mon, 13 Apr 2020 16:24:08 +0300 Subject: RDMA/mad: Remove snoop interface Snoop interface is not used. Remove it. Link: https://lore.kernel.org/r/20200413132408.931084-1-leon@kernel.org Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/mad.c | 238 +----------------------------------------- include/rdma/ib_mad.h | 49 +-------- 2 files changed, 6 insertions(+), 281 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index c54db13fa9b0..e02b5c4fdf09 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -85,7 +85,6 @@ MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests module_param_named(recv_queue_size, mad_recvq_size, int, 0444); MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests"); -/* Client ID 0 is used for snoop-only clients */ static DEFINE_XARRAY_ALLOC1(ib_mad_clients); static u32 ib_mad_client_next; static struct list_head ib_mad_port_list; @@ -483,141 +482,12 @@ error1: } EXPORT_SYMBOL(ib_register_mad_agent); -static inline int is_snooping_sends(int mad_snoop_flags) -{ - return (mad_snoop_flags & - (/*IB_MAD_SNOOP_POSTED_SENDS | - IB_MAD_SNOOP_RMPP_SENDS |*/ - IB_MAD_SNOOP_SEND_COMPLETIONS /*| - IB_MAD_SNOOP_RMPP_SEND_COMPLETIONS*/)); -} - -static inline int is_snooping_recvs(int mad_snoop_flags) -{ - return (mad_snoop_flags & - (IB_MAD_SNOOP_RECVS /*| - IB_MAD_SNOOP_RMPP_RECVS*/)); -} - -static int register_snoop_agent(struct ib_mad_qp_info *qp_info, - struct ib_mad_snoop_private *mad_snoop_priv) -{ - struct ib_mad_snoop_private **new_snoop_table; - unsigned long flags; - int i; - - spin_lock_irqsave(&qp_info->snoop_lock, flags); - /* Check for empty slot in array. */ - for (i = 0; i < qp_info->snoop_table_size; i++) - if (!qp_info->snoop_table[i]) - break; - - if (i == qp_info->snoop_table_size) { - /* Grow table. */ - new_snoop_table = krealloc(qp_info->snoop_table, - sizeof mad_snoop_priv * - (qp_info->snoop_table_size + 1), - GFP_ATOMIC); - if (!new_snoop_table) { - i = -ENOMEM; - goto out; - } - - qp_info->snoop_table = new_snoop_table; - qp_info->snoop_table_size++; - } - qp_info->snoop_table[i] = mad_snoop_priv; - atomic_inc(&qp_info->snoop_count); -out: - spin_unlock_irqrestore(&qp_info->snoop_lock, flags); - return i; -} - -struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device, - u8 port_num, - enum ib_qp_type qp_type, - int mad_snoop_flags, - ib_mad_snoop_handler snoop_handler, - ib_mad_recv_handler recv_handler, - void *context) -{ - struct ib_mad_port_private *port_priv; - struct ib_mad_agent *ret; - struct ib_mad_snoop_private *mad_snoop_priv; - int qpn; - int err; - - /* Validate parameters */ - if ((is_snooping_sends(mad_snoop_flags) && !snoop_handler) || - (is_snooping_recvs(mad_snoop_flags) && !recv_handler)) { - ret = ERR_PTR(-EINVAL); - goto error1; - } - qpn = get_spl_qp_index(qp_type); - if (qpn == -1) { - ret = ERR_PTR(-EINVAL); - goto error1; - } - port_priv = ib_get_mad_port(device, port_num); - if (!port_priv) { - ret = ERR_PTR(-ENODEV); - goto error1; - } - /* Allocate structures */ - mad_snoop_priv = kzalloc(sizeof *mad_snoop_priv, GFP_KERNEL); - if (!mad_snoop_priv) { - ret = ERR_PTR(-ENOMEM); - goto error1; - } - - /* Now, fill in the various structures */ - mad_snoop_priv->qp_info = &port_priv->qp_info[qpn]; - mad_snoop_priv->agent.device = device; - mad_snoop_priv->agent.recv_handler = recv_handler; - mad_snoop_priv->agent.snoop_handler = snoop_handler; - mad_snoop_priv->agent.context = context; - mad_snoop_priv->agent.qp = port_priv->qp_info[qpn].qp; - mad_snoop_priv->agent.port_num = port_num; - mad_snoop_priv->mad_snoop_flags = mad_snoop_flags; - init_completion(&mad_snoop_priv->comp); - - err = ib_mad_agent_security_setup(&mad_snoop_priv->agent, qp_type); - if (err) { - ret = ERR_PTR(err); - goto error2; - } - - mad_snoop_priv->snoop_index = register_snoop_agent( - &port_priv->qp_info[qpn], - mad_snoop_priv); - if (mad_snoop_priv->snoop_index < 0) { - ret = ERR_PTR(mad_snoop_priv->snoop_index); - goto error3; - } - - atomic_set(&mad_snoop_priv->refcount, 1); - return &mad_snoop_priv->agent; -error3: - ib_mad_agent_security_cleanup(&mad_snoop_priv->agent); -error2: - kfree(mad_snoop_priv); -error1: - return ret; -} -EXPORT_SYMBOL(ib_register_mad_snoop); - static inline void deref_mad_agent(struct ib_mad_agent_private *mad_agent_priv) { if (atomic_dec_and_test(&mad_agent_priv->refcount)) complete(&mad_agent_priv->comp); } -static inline void deref_snoop_agent(struct ib_mad_snoop_private *mad_snoop_priv) -{ - if (atomic_dec_and_test(&mad_snoop_priv->refcount)) - complete(&mad_snoop_priv->comp); -} - static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) { struct ib_mad_port_private *port_priv; @@ -650,25 +520,6 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) kfree_rcu(mad_agent_priv, rcu); } -static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv) -{ - struct ib_mad_qp_info *qp_info; - unsigned long flags; - - qp_info = mad_snoop_priv->qp_info; - spin_lock_irqsave(&qp_info->snoop_lock, flags); - qp_info->snoop_table[mad_snoop_priv->snoop_index] = NULL; - atomic_dec(&qp_info->snoop_count); - spin_unlock_irqrestore(&qp_info->snoop_lock, flags); - - deref_snoop_agent(mad_snoop_priv); - wait_for_completion(&mad_snoop_priv->comp); - - ib_mad_agent_security_cleanup(&mad_snoop_priv->agent); - - kfree(mad_snoop_priv); -} - /* * ib_unregister_mad_agent - Unregisters a client from using MAD services * @@ -677,20 +528,11 @@ static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv) void ib_unregister_mad_agent(struct ib_mad_agent *mad_agent) { struct ib_mad_agent_private *mad_agent_priv; - struct ib_mad_snoop_private *mad_snoop_priv; - - /* If the TID is zero, the agent can only snoop. */ - if (mad_agent->hi_tid) { - mad_agent_priv = container_of(mad_agent, - struct ib_mad_agent_private, - agent); - unregister_mad_agent(mad_agent_priv); - } else { - mad_snoop_priv = container_of(mad_agent, - struct ib_mad_snoop_private, - agent); - unregister_mad_snoop(mad_snoop_priv); - } + + mad_agent_priv = container_of(mad_agent, + struct ib_mad_agent_private, + agent); + unregister_mad_agent(mad_agent_priv); } EXPORT_SYMBOL(ib_unregister_mad_agent); @@ -706,57 +548,6 @@ static void dequeue_mad(struct ib_mad_list_head *mad_list) spin_unlock_irqrestore(&mad_queue->lock, flags); } -static void snoop_send(struct ib_mad_qp_info *qp_info, - struct ib_mad_send_buf *send_buf, - struct ib_mad_send_wc *mad_send_wc, - int mad_snoop_flags) -{ - struct ib_mad_snoop_private *mad_snoop_priv; - unsigned long flags; - int i; - - spin_lock_irqsave(&qp_info->snoop_lock, flags); - for (i = 0; i < qp_info->snoop_table_size; i++) { - mad_snoop_priv = qp_info->snoop_table[i]; - if (!mad_snoop_priv || - !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags)) - continue; - - atomic_inc(&mad_snoop_priv->refcount); - spin_unlock_irqrestore(&qp_info->snoop_lock, flags); - mad_snoop_priv->agent.snoop_handler(&mad_snoop_priv->agent, - send_buf, mad_send_wc); - deref_snoop_agent(mad_snoop_priv); - spin_lock_irqsave(&qp_info->snoop_lock, flags); - } - spin_unlock_irqrestore(&qp_info->snoop_lock, flags); -} - -static void snoop_recv(struct ib_mad_qp_info *qp_info, - struct ib_mad_recv_wc *mad_recv_wc, - int mad_snoop_flags) -{ - struct ib_mad_snoop_private *mad_snoop_priv; - unsigned long flags; - int i; - - spin_lock_irqsave(&qp_info->snoop_lock, flags); - for (i = 0; i < qp_info->snoop_table_size; i++) { - mad_snoop_priv = qp_info->snoop_table[i]; - if (!mad_snoop_priv || - !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags)) - continue; - - atomic_inc(&mad_snoop_priv->refcount); - spin_unlock_irqrestore(&qp_info->snoop_lock, flags); - mad_snoop_priv->agent.recv_handler(&mad_snoop_priv->agent, NULL, - mad_recv_wc); - deref_snoop_agent(mad_snoop_priv); - spin_lock_irqsave(&qp_info->snoop_lock, flags); - } - spin_unlock_irqrestore(&qp_info->snoop_lock, flags); -} - static void build_smp_wc(struct ib_qp *qp, struct ib_cqe *cqe, u16 slid, u16 pkey_index, u8 port_num, struct ib_wc *wc) { @@ -2289,9 +2080,6 @@ static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc) recv->header.recv_wc.recv_buf.mad = (struct ib_mad *)recv->mad; recv->header.recv_wc.recv_buf.grh = &recv->grh; - if (atomic_read(&qp_info->snoop_count)) - snoop_recv(qp_info, &recv->header.recv_wc, IB_MAD_SNOOP_RECVS); - /* Validate MAD */ if (!validate_mad((const struct ib_mad_hdr *)recv->mad, qp_info, opa)) goto out; @@ -2538,9 +2326,6 @@ retry: mad_send_wc.send_buf = &mad_send_wr->send_buf; mad_send_wc.status = wc->status; mad_send_wc.vendor_err = wc->vendor_err; - if (atomic_read(&qp_info->snoop_count)) - snoop_send(qp_info, &mad_send_wr->send_buf, &mad_send_wc, - IB_MAD_SNOOP_SEND_COMPLETIONS); ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); if (queued_send_wr) { @@ -2782,10 +2567,6 @@ static void local_completions(struct work_struct *work) local->mad_priv->header.recv_wc.recv_buf.grh = NULL; local->mad_priv->header.recv_wc.recv_buf.mad = (struct ib_mad *)local->mad_priv->mad; - if (atomic_read(&recv_mad_agent->qp_info->snoop_count)) - snoop_recv(recv_mad_agent->qp_info, - &local->mad_priv->header.recv_wc, - IB_MAD_SNOOP_RECVS); recv_mad_agent->agent.recv_handler( &recv_mad_agent->agent, &local->mad_send_wr->send_buf, @@ -2800,10 +2581,6 @@ local_send_completion: mad_send_wc.status = IB_WC_SUCCESS; mad_send_wc.vendor_err = 0; mad_send_wc.send_buf = &local->mad_send_wr->send_buf; - if (atomic_read(&mad_agent_priv->qp_info->snoop_count)) - snoop_send(mad_agent_priv->qp_info, - &local->mad_send_wr->send_buf, - &mad_send_wc, IB_MAD_SNOOP_SEND_COMPLETIONS); mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, &mad_send_wc); @@ -3119,10 +2896,6 @@ static void init_mad_qp(struct ib_mad_port_private *port_priv, init_mad_queue(qp_info, &qp_info->send_queue); init_mad_queue(qp_info, &qp_info->recv_queue); INIT_LIST_HEAD(&qp_info->overflow_list); - spin_lock_init(&qp_info->snoop_lock); - qp_info->snoop_table = NULL; - qp_info->snoop_table_size = 0; - atomic_set(&qp_info->snoop_count, 0); } static int create_mad_qp(struct ib_mad_qp_info *qp_info, @@ -3166,7 +2939,6 @@ static void destroy_mad_qp(struct ib_mad_qp_info *qp_info) return; ib_destroy_qp(qp_info->qp); - kfree(qp_info->snoop_table); } /* diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index 4e62650e2127..8c093fc1bb9f 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -558,20 +558,6 @@ struct ib_mad_recv_wc; typedef void (*ib_mad_send_handler)(struct ib_mad_agent *mad_agent, struct ib_mad_send_wc *mad_send_wc); -/** - * ib_mad_snoop_handler - Callback handler for snooping sent MADs. - * @mad_agent: MAD agent that snooped the MAD. - * @send_buf: send MAD data buffer. - * @mad_send_wc: Work completion information on the sent MAD. Valid - * only for snooping that occurs on a send completion. - * - * Clients snooping MADs should not modify data referenced by the @send_buf - * or @mad_send_wc. - */ -typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent, - struct ib_mad_send_buf *send_buf, - struct ib_mad_send_wc *mad_send_wc); - /** * ib_mad_recv_handler - callback handler for a received MAD. * @mad_agent: MAD agent requesting the received MAD. @@ -581,8 +567,7 @@ typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent, * MADs received in response to a send request operation will be handed to * the user before the send operation completes. All data buffers given * to registered agents through this routine are owned by the receiving - * client, except for snooping agents. Clients snooping MADs should not - * modify the data referenced by @mad_recv_wc. + * client. */ typedef void (*ib_mad_recv_handler)(struct ib_mad_agent *mad_agent, struct ib_mad_send_buf *send_buf, @@ -595,7 +580,6 @@ typedef void (*ib_mad_recv_handler)(struct ib_mad_agent *mad_agent, * @mr: Memory region for system memory usable for DMA. * @recv_handler: Callback handler for a received MAD. * @send_handler: Callback handler for a sent MAD. - * @snoop_handler: Callback handler for snooped sent MADs. * @context: User-specified context associated with this registration. * @hi_tid: Access layer assigned transaction ID for this client. * Unsolicited MADs sent by this client will have the upper 32-bits @@ -612,7 +596,6 @@ struct ib_mad_agent { struct ib_qp *qp; ib_mad_recv_handler recv_handler; ib_mad_send_handler send_handler; - ib_mad_snoop_handler snoop_handler; void *context; u32 hi_tid; u32 flags; @@ -720,36 +703,6 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, ib_mad_recv_handler recv_handler, void *context, u32 registration_flags); - -enum ib_mad_snoop_flags { - /*IB_MAD_SNOOP_POSTED_SENDS = 1,*/ - /*IB_MAD_SNOOP_RMPP_SENDS = (1<<1),*/ - IB_MAD_SNOOP_SEND_COMPLETIONS = (1<<2), - /*IB_MAD_SNOOP_RMPP_SEND_COMPLETIONS = (1<<3),*/ - IB_MAD_SNOOP_RECVS = (1<<4) - /*IB_MAD_SNOOP_RMPP_RECVS = (1<<5),*/ - /*IB_MAD_SNOOP_REDIRECTED_QPS = (1<<6)*/ -}; - -/** - * ib_register_mad_snoop - Register to snoop sent and received MADs. - * @device: The device to register with. - * @port_num: The port on the specified device to use. - * @qp_type: Specifies which QP traffic to snoop. Must be either - * IB_QPT_SMI or IB_QPT_GSI. - * @mad_snoop_flags: Specifies information where snooping occurs. - * @send_handler: The callback routine invoked for a snooped send. - * @recv_handler: The callback routine invoked for a snooped receive. - * @context: User specified context associated with the registration. - */ -struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device, - u8 port_num, - enum ib_qp_type qp_type, - int mad_snoop_flags, - ib_mad_snoop_handler snoop_handler, - ib_mad_recv_handler recv_handler, - void *context); - /** * ib_unregister_mad_agent - Unregisters a client from using MAD services. * @mad_agent: Corresponding MAD registration request to deregister. -- cgit v1.2.3 From 11a0ae4c4bff9b2a471b54dbe910fc0f60e58e62 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 21 Apr 2020 20:24:40 +0300 Subject: RDMA: Allow ib_client's to fail when add() is called When a client is added it isn't allowed to fail, but all the client's have various failure paths within their add routines. This creates the very fringe condition where the client was added, failed during add and didn't set the client_data. The core code will then still call other client_data centric ops like remove(), rename(), get_nl_info(), and get_net_dev_by_params() with NULL client_data - which is confusing and unexpected. If the add() callback fails, then do not call any more client ops for the device, even remove. Remove all the now redundant checks for NULL client_data in ops callbacks. Update all the add() callbacks to return error codes appropriately. EOPNOTSUPP is used for cases where the ULP does not support the ib_device - eg because it only works with IB. Link: https://lore.kernel.org/r/20200421172440.387069-1-leon@kernel.org Signed-off-by: Leon Romanovsky Acked-by: Ursula Braun Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm.c | 24 ++++++++++++++---------- drivers/infiniband/core/cma.c | 23 ++++++++++++----------- drivers/infiniband/core/device.c | 16 ++++++++++++++-- drivers/infiniband/core/mad.c | 17 +++++++++++++---- drivers/infiniband/core/multicast.c | 12 +++++------- drivers/infiniband/core/sa_query.c | 22 ++++++++++++---------- drivers/infiniband/core/user_mad.c | 22 ++++++++++++---------- drivers/infiniband/core/uverbs_main.c | 24 ++++++++++++------------ drivers/infiniband/ulp/ipoib/ipoib_main.c | 15 +++++---------- drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c | 12 +++++------- drivers/infiniband/ulp/srp/ib_srp.c | 21 ++++++++++----------- drivers/infiniband/ulp/srpt/ib_srpt.c | 25 ++++++++++--------------- include/rdma/ib_verbs.h | 2 +- net/rds/ib.c | 21 +++++++++++++-------- net/smc/smc_ib.c | 10 ++++------ 15 files changed, 142 insertions(+), 124 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 4794113ecd59..68e1a9bba027 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -81,7 +81,7 @@ const char *__attribute_const__ ibcm_reject_msg(int reason) EXPORT_SYMBOL(ibcm_reject_msg); struct cm_id_private; -static void cm_add_one(struct ib_device *device); +static int cm_add_one(struct ib_device *device); static void cm_remove_one(struct ib_device *device, void *client_data); static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv, struct ib_cm_sidr_rep_param *param); @@ -4382,7 +4382,7 @@ static void cm_remove_port_fs(struct cm_port *port) } -static void cm_add_one(struct ib_device *ib_device) +static int cm_add_one(struct ib_device *ib_device) { struct cm_device *cm_dev; struct cm_port *port; @@ -4401,7 +4401,7 @@ static void cm_add_one(struct ib_device *ib_device) cm_dev = kzalloc(struct_size(cm_dev, port, ib_device->phys_port_cnt), GFP_KERNEL); if (!cm_dev) - return; + return -ENOMEM; cm_dev->ib_device = ib_device; cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay; @@ -4413,8 +4413,10 @@ static void cm_add_one(struct ib_device *ib_device) continue; port = kzalloc(sizeof *port, GFP_KERNEL); - if (!port) + if (!port) { + ret = -ENOMEM; goto error1; + } cm_dev->port[i-1] = port; port->cm_dev = cm_dev; @@ -4435,8 +4437,10 @@ static void cm_add_one(struct ib_device *ib_device) cm_recv_handler, port, 0); - if (IS_ERR(port->mad_agent)) + if (IS_ERR(port->mad_agent)) { + ret = PTR_ERR(port->mad_agent); goto error2; + } ret = ib_modify_port(ib_device, i, 0, &port_modify); if (ret) @@ -4445,15 +4449,17 @@ static void cm_add_one(struct ib_device *ib_device) count++; } - if (!count) + if (!count) { + ret = -EOPNOTSUPP; goto free; + } ib_set_client_data(ib_device, &cm_client, cm_dev); write_lock_irqsave(&cm.device_lock, flags); list_add_tail(&cm_dev->list, &cm.device_list); write_unlock_irqrestore(&cm.device_lock, flags); - return; + return 0; error3: ib_unregister_mad_agent(port->mad_agent); @@ -4475,6 +4481,7 @@ error1: } free: kfree(cm_dev); + return ret; } static void cm_remove_one(struct ib_device *ib_device, void *client_data) @@ -4489,9 +4496,6 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data) unsigned long flags; int i; - if (!cm_dev) - return; - write_lock_irqsave(&cm.device_lock, flags); list_del(&cm_dev->list); write_unlock_irqrestore(&cm.device_lock, flags); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 6406a597dfb6..e8d99b71f44a 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -153,7 +153,7 @@ struct rdma_cm_id *rdma_res_to_id(struct rdma_restrack_entry *res) } EXPORT_SYMBOL(rdma_res_to_id); -static void cma_add_one(struct ib_device *device); +static int cma_add_one(struct ib_device *device); static void cma_remove_one(struct ib_device *device, void *client_data); static struct ib_client cma_client = { @@ -4638,29 +4638,34 @@ static struct notifier_block cma_nb = { .notifier_call = cma_netdev_callback }; -static void cma_add_one(struct ib_device *device) +static int cma_add_one(struct ib_device *device) { struct cma_device *cma_dev; struct rdma_id_private *id_priv; unsigned int i; unsigned long supported_gids = 0; + int ret; cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL); if (!cma_dev) - return; + return -ENOMEM; cma_dev->device = device; cma_dev->default_gid_type = kcalloc(device->phys_port_cnt, sizeof(*cma_dev->default_gid_type), GFP_KERNEL); - if (!cma_dev->default_gid_type) + if (!cma_dev->default_gid_type) { + ret = -ENOMEM; goto free_cma_dev; + } cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt, sizeof(*cma_dev->default_roce_tos), GFP_KERNEL); - if (!cma_dev->default_roce_tos) + if (!cma_dev->default_roce_tos) { + ret = -ENOMEM; goto free_gid_type; + } rdma_for_each_port (device, i) { supported_gids = roce_gid_type_mask_support(device, i); @@ -4686,15 +4691,14 @@ static void cma_add_one(struct ib_device *device) mutex_unlock(&lock); trace_cm_add_one(device); - return; + return 0; free_gid_type: kfree(cma_dev->default_gid_type); free_cma_dev: kfree(cma_dev); - - return; + return ret; } static int cma_remove_id_dev(struct rdma_id_private *id_priv) @@ -4756,9 +4760,6 @@ static void cma_remove_one(struct ib_device *device, void *client_data) trace_cm_remove_one(device); - if (!cma_dev) - return; - mutex_lock(&lock); list_del(&cma_dev->list); mutex_unlock(&lock); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index d0b3d35ad3e4..d9f565a779df 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -677,8 +677,20 @@ static int add_client_context(struct ib_device *device, if (ret) goto out; downgrade_write(&device->client_data_rwsem); - if (client->add) - client->add(device); + if (client->add) { + if (client->add(device)) { + /* + * If a client fails to add then the error code is + * ignored, but we won't call any more ops on this + * client. + */ + xa_erase(&device->client_data, client->client_id); + up_read(&device->client_data_rwsem); + ib_device_put(device); + ib_client_put(client); + return 0; + } + } /* Readers shall not see a client until add has been completed */ xa_set_mark(&device->client_data, client->client_id, diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index e02b5c4fdf09..186e0d652e8b 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -3076,9 +3076,11 @@ static int ib_mad_port_close(struct ib_device *device, int port_num) return 0; } -static void ib_mad_init_device(struct ib_device *device) +static int ib_mad_init_device(struct ib_device *device) { int start, i; + unsigned int count = 0; + int ret; start = rdma_start_port(device); @@ -3086,17 +3088,23 @@ static void ib_mad_init_device(struct ib_device *device) if (!rdma_cap_ib_mad(device, i)) continue; - if (ib_mad_port_open(device, i)) { + ret = ib_mad_port_open(device, i); + if (ret) { dev_err(&device->dev, "Couldn't open port %d\n", i); goto error; } - if (ib_agent_port_open(device, i)) { + ret = ib_agent_port_open(device, i); + if (ret) { dev_err(&device->dev, "Couldn't open port %d for agents\n", i); goto error_agent; } + count++; } - return; + if (!count) + return -EOPNOTSUPP; + + return 0; error_agent: if (ib_mad_port_close(device, i)) @@ -3113,6 +3121,7 @@ error: if (ib_mad_port_close(device, i)) dev_err(&device->dev, "Couldn't close port %d\n", i); } + return ret; } static void ib_mad_remove_device(struct ib_device *device, void *client_data) diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index 9c2d8b7f1af9..740f03ecc05d 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -42,7 +42,7 @@ #include #include "sa.h" -static void mcast_add_one(struct ib_device *device); +static int mcast_add_one(struct ib_device *device); static void mcast_remove_one(struct ib_device *device, void *client_data); static struct ib_client mcast_client = { @@ -815,7 +815,7 @@ static void mcast_event_handler(struct ib_event_handler *handler, } } -static void mcast_add_one(struct ib_device *device) +static int mcast_add_one(struct ib_device *device) { struct mcast_device *dev; struct mcast_port *port; @@ -825,7 +825,7 @@ static void mcast_add_one(struct ib_device *device) dev = kmalloc(struct_size(dev, port, device->phys_port_cnt), GFP_KERNEL); if (!dev) - return; + return -ENOMEM; dev->start_port = rdma_start_port(device); dev->end_port = rdma_end_port(device); @@ -845,7 +845,7 @@ static void mcast_add_one(struct ib_device *device) if (!count) { kfree(dev); - return; + return -EOPNOTSUPP; } dev->device = device; @@ -853,6 +853,7 @@ static void mcast_add_one(struct ib_device *device) INIT_IB_EVENT_HANDLER(&dev->event_handler, device, mcast_event_handler); ib_register_event_handler(&dev->event_handler); + return 0; } static void mcast_remove_one(struct ib_device *device, void *client_data) @@ -861,9 +862,6 @@ static void mcast_remove_one(struct ib_device *device, void *client_data) struct mcast_port *port; int i; - if (!dev) - return; - ib_unregister_event_handler(&dev->event_handler); flush_workqueue(mcast_wq); diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 2dd326f2beed..5c878646ff62 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -174,7 +174,7 @@ static const struct nla_policy ib_nl_policy[LS_NLA_TYPE_MAX] = { }; -static void ib_sa_add_one(struct ib_device *device); +static int ib_sa_add_one(struct ib_device *device); static void ib_sa_remove_one(struct ib_device *device, void *client_data); static struct ib_client sa_client = { @@ -2322,18 +2322,19 @@ static void ib_sa_event(struct ib_event_handler *handler, } } -static void ib_sa_add_one(struct ib_device *device) +static int ib_sa_add_one(struct ib_device *device) { struct ib_sa_device *sa_dev; int s, e, i; int count = 0; + int ret; s = rdma_start_port(device); e = rdma_end_port(device); sa_dev = kzalloc(struct_size(sa_dev, port, e - s + 1), GFP_KERNEL); if (!sa_dev) - return; + return -ENOMEM; sa_dev->start_port = s; sa_dev->end_port = e; @@ -2353,8 +2354,10 @@ static void ib_sa_add_one(struct ib_device *device) ib_register_mad_agent(device, i + s, IB_QPT_GSI, NULL, 0, send_handler, recv_handler, sa_dev, 0); - if (IS_ERR(sa_dev->port[i].agent)) + if (IS_ERR(sa_dev->port[i].agent)) { + ret = PTR_ERR(sa_dev->port[i].agent); goto err; + } INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah); INIT_DELAYED_WORK(&sa_dev->port[i].ib_cpi_work, @@ -2363,8 +2366,10 @@ static void ib_sa_add_one(struct ib_device *device) count++; } - if (!count) + if (!count) { + ret = -EOPNOTSUPP; goto free; + } ib_set_client_data(device, &sa_client, sa_dev); @@ -2383,7 +2388,7 @@ static void ib_sa_add_one(struct ib_device *device) update_sm_ah(&sa_dev->port[i].update_task); } - return; + return 0; err: while (--i >= 0) { @@ -2392,7 +2397,7 @@ err: } free: kfree(sa_dev); - return; + return ret; } static void ib_sa_remove_one(struct ib_device *device, void *client_data) @@ -2400,9 +2405,6 @@ static void ib_sa_remove_one(struct ib_device *device, void *client_data) struct ib_sa_device *sa_dev = client_data; int i; - if (!sa_dev) - return; - ib_unregister_event_handler(&sa_dev->event_handler); flush_workqueue(ib_wq); diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index da229eab5903..b0d0b522cc76 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -142,7 +142,7 @@ static dev_t dynamic_issm_dev; static DEFINE_IDA(umad_ida); -static void ib_umad_add_one(struct ib_device *device); +static int ib_umad_add_one(struct ib_device *device); static void ib_umad_remove_one(struct ib_device *device, void *client_data); static void ib_umad_dev_free(struct kref *kref) @@ -1352,37 +1352,41 @@ static void ib_umad_kill_port(struct ib_umad_port *port) put_device(&port->dev); } -static void ib_umad_add_one(struct ib_device *device) +static int ib_umad_add_one(struct ib_device *device) { struct ib_umad_device *umad_dev; int s, e, i; int count = 0; + int ret; s = rdma_start_port(device); e = rdma_end_port(device); umad_dev = kzalloc(struct_size(umad_dev, ports, e - s + 1), GFP_KERNEL); if (!umad_dev) - return; + return -ENOMEM; kref_init(&umad_dev->kref); for (i = s; i <= e; ++i) { if (!rdma_cap_ib_mad(device, i)) continue; - if (ib_umad_init_port(device, i, umad_dev, - &umad_dev->ports[i - s])) + ret = ib_umad_init_port(device, i, umad_dev, + &umad_dev->ports[i - s]); + if (ret) goto err; count++; } - if (!count) + if (!count) { + ret = -EOPNOTSUPP; goto free; + } ib_set_client_data(device, &umad_client, umad_dev); - return; + return 0; err: while (--i >= s) { @@ -1394,6 +1398,7 @@ err: free: /* balances kref_init */ ib_umad_dev_put(umad_dev); + return ret; } static void ib_umad_remove_one(struct ib_device *device, void *client_data) @@ -1401,9 +1406,6 @@ static void ib_umad_remove_one(struct ib_device *device, void *client_data) struct ib_umad_device *umad_dev = client_data; unsigned int i; - if (!umad_dev) - return; - rdma_for_each_port (device, i) { if (rdma_cap_ib_mad(device, i)) ib_umad_kill_port( diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 8710a3427146..d52eb870533b 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -75,7 +75,7 @@ static dev_t dynamic_uverbs_dev; static struct class *uverbs_class; static DEFINE_IDA(uverbs_ida); -static void ib_uverbs_add_one(struct ib_device *device); +static int ib_uverbs_add_one(struct ib_device *device); static void ib_uverbs_remove_one(struct ib_device *device, void *client_data); /* @@ -1091,7 +1091,7 @@ static int ib_uverbs_create_uapi(struct ib_device *device, return 0; } -static void ib_uverbs_add_one(struct ib_device *device) +static int ib_uverbs_add_one(struct ib_device *device) { int devnum; dev_t base; @@ -1099,16 +1099,16 @@ static void ib_uverbs_add_one(struct ib_device *device) int ret; if (!device->ops.alloc_ucontext) - return; + return -EOPNOTSUPP; uverbs_dev = kzalloc(sizeof(*uverbs_dev), GFP_KERNEL); if (!uverbs_dev) - return; + return -ENOMEM; ret = init_srcu_struct(&uverbs_dev->disassociate_srcu); if (ret) { kfree(uverbs_dev); - return; + return -ENOMEM; } device_initialize(&uverbs_dev->dev); @@ -1128,15 +1128,18 @@ static void ib_uverbs_add_one(struct ib_device *device) devnum = ida_alloc_max(&uverbs_ida, IB_UVERBS_MAX_DEVICES - 1, GFP_KERNEL); - if (devnum < 0) + if (devnum < 0) { + ret = -ENOMEM; goto err; + } uverbs_dev->devnum = devnum; if (devnum >= IB_UVERBS_NUM_FIXED_MINOR) base = dynamic_uverbs_dev + devnum - IB_UVERBS_NUM_FIXED_MINOR; else base = IB_UVERBS_BASE_DEV + devnum; - if (ib_uverbs_create_uapi(device, uverbs_dev)) + ret = ib_uverbs_create_uapi(device, uverbs_dev); + if (ret) goto err_uapi; uverbs_dev->dev.devt = base; @@ -1151,7 +1154,7 @@ static void ib_uverbs_add_one(struct ib_device *device) goto err_uapi; ib_set_client_data(device, &uverbs_client, uverbs_dev); - return; + return 0; err_uapi: ida_free(&uverbs_ida, devnum); @@ -1160,7 +1163,7 @@ err: ib_uverbs_comp_dev(uverbs_dev); wait_for_completion(&uverbs_dev->comp); put_device(&uverbs_dev->dev); - return; + return ret; } static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, @@ -1203,9 +1206,6 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data) struct ib_uverbs_device *uverbs_dev = client_data; int wait_clients = 1; - if (!uverbs_dev) - return; - cdev_device_del(&uverbs_dev->cdev, &uverbs_dev->dev); ida_free(&uverbs_ida, uverbs_dev->devnum); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 81b8227214f1..d4c6a97ce4c0 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -86,7 +86,7 @@ struct workqueue_struct *ipoib_workqueue; struct ib_sa_client ipoib_sa_client; -static void ipoib_add_one(struct ib_device *device); +static int ipoib_add_one(struct ib_device *device); static void ipoib_remove_one(struct ib_device *device, void *client_data); static void ipoib_neigh_reclaim(struct rcu_head *rp); static struct net_device *ipoib_get_net_dev_by_params( @@ -479,9 +479,6 @@ static struct net_device *ipoib_get_net_dev_by_params( if (ret) return NULL; - if (!dev_list) - return NULL; - /* See if we can find a unique device matching the L2 parameters */ matches = __ipoib_get_net_dev_by_params(dev_list, port, pkey_index, gid, NULL, &net_dev); @@ -2514,7 +2511,7 @@ sysfs_failed: return ERR_PTR(-ENOMEM); } -static void ipoib_add_one(struct ib_device *device) +static int ipoib_add_one(struct ib_device *device) { struct list_head *dev_list; struct net_device *dev; @@ -2524,7 +2521,7 @@ static void ipoib_add_one(struct ib_device *device) dev_list = kmalloc(sizeof(*dev_list), GFP_KERNEL); if (!dev_list) - return; + return -ENOMEM; INIT_LIST_HEAD(dev_list); @@ -2541,10 +2538,11 @@ static void ipoib_add_one(struct ib_device *device) if (!count) { kfree(dev_list); - return; + return -EOPNOTSUPP; } ib_set_client_data(device, &ipoib_client, dev_list); + return 0; } static void ipoib_remove_one(struct ib_device *device, void *client_data) @@ -2552,9 +2550,6 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data) struct ipoib_dev_priv *priv, *tmp, *cpriv, *tcpriv; struct list_head *dev_list = client_data; - if (!dev_list) - return; - list_for_each_entry_safe(priv, tmp, dev_list, list) { LIST_HEAD(head); ipoib_parent_unregister_pre(priv->dev); diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c index 6e8d650c17c7..874a8eb7638c 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c @@ -113,7 +113,7 @@ struct opa_vnic_vema_port { struct mutex lock; }; -static void opa_vnic_vema_add_one(struct ib_device *device); +static int opa_vnic_vema_add_one(struct ib_device *device); static void opa_vnic_vema_rem_one(struct ib_device *device, void *client_data); @@ -989,18 +989,18 @@ static void opa_vnic_ctrl_config_dev(struct opa_vnic_ctrl_port *cport, bool en) * * Allocate the vnic control port and initialize it. */ -static void opa_vnic_vema_add_one(struct ib_device *device) +static int opa_vnic_vema_add_one(struct ib_device *device) { struct opa_vnic_ctrl_port *cport; int rc, size = sizeof(*cport); if (!rdma_cap_opa_vnic(device)) - return; + return -EOPNOTSUPP; size += device->phys_port_cnt * sizeof(struct opa_vnic_vema_port); cport = kzalloc(size, GFP_KERNEL); if (!cport) - return; + return -ENOMEM; cport->num_ports = device->phys_port_cnt; cport->ibdev = device; @@ -1012,6 +1012,7 @@ static void opa_vnic_vema_add_one(struct ib_device *device) ib_set_client_data(device, &opa_vnic_client, cport); opa_vnic_ctrl_config_dev(cport, true); + return 0; } /** @@ -1026,9 +1027,6 @@ static void opa_vnic_vema_rem_one(struct ib_device *device, { struct opa_vnic_ctrl_port *cport = client_data; - if (!cport) - return; - c_info("removing VNIC client\n"); opa_vnic_ctrl_config_dev(cport, false); vema_unregister(cport); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index cd1181c39ed2..00b4f88b113e 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -146,7 +146,7 @@ module_param(ch_count, uint, 0444); MODULE_PARM_DESC(ch_count, "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA."); -static void srp_add_one(struct ib_device *device); +static int srp_add_one(struct ib_device *device); static void srp_remove_one(struct ib_device *device, void *client_data); static void srp_rename_dev(struct ib_device *device, void *client_data); static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc); @@ -4132,7 +4132,7 @@ static void srp_rename_dev(struct ib_device *device, void *client_data) } } -static void srp_add_one(struct ib_device *device) +static int srp_add_one(struct ib_device *device) { struct srp_device *srp_dev; struct ib_device_attr *attr = &device->attrs; @@ -4144,7 +4144,7 @@ static void srp_add_one(struct ib_device *device) srp_dev = kzalloc(sizeof(*srp_dev), GFP_KERNEL); if (!srp_dev) - return; + return -ENOMEM; /* * Use the smallest page size supported by the HCA, down to a @@ -4197,8 +4197,12 @@ static void srp_add_one(struct ib_device *device) srp_dev->dev = device; srp_dev->pd = ib_alloc_pd(device, flags); - if (IS_ERR(srp_dev->pd)) - goto free_dev; + if (IS_ERR(srp_dev->pd)) { + int ret = PTR_ERR(srp_dev->pd); + + kfree(srp_dev); + return ret; + } if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) { srp_dev->global_rkey = srp_dev->pd->unsafe_global_rkey; @@ -4212,10 +4216,7 @@ static void srp_add_one(struct ib_device *device) } ib_set_client_data(device, &srp_client, srp_dev); - return; - -free_dev: - kfree(srp_dev); + return 0; } static void srp_remove_one(struct ib_device *device, void *client_data) @@ -4225,8 +4226,6 @@ static void srp_remove_one(struct ib_device *device, void *client_data) struct srp_target_port *target; srp_dev = client_data; - if (!srp_dev) - return; list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) { device_unregister(&host->dev); diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 9d02d8088f1c..7ed38d1cb997 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -3101,7 +3101,7 @@ static int srpt_use_srq(struct srpt_device *sdev, bool use_srq) * srpt_add_one - InfiniBand device addition callback function * @device: Describes a HCA. */ -static void srpt_add_one(struct ib_device *device) +static int srpt_add_one(struct ib_device *device) { struct srpt_device *sdev; struct srpt_port *sport; @@ -3112,14 +3112,16 @@ static void srpt_add_one(struct ib_device *device) sdev = kzalloc(struct_size(sdev, port, device->phys_port_cnt), GFP_KERNEL); if (!sdev) - goto err; + return -ENOMEM; sdev->device = device; mutex_init(&sdev->sdev_mutex); sdev->pd = ib_alloc_pd(device, 0); - if (IS_ERR(sdev->pd)) + if (IS_ERR(sdev->pd)) { + ret = PTR_ERR(sdev->pd); goto free_dev; + } sdev->lkey = sdev->pd->local_dma_lkey; @@ -3135,6 +3137,7 @@ static void srpt_add_one(struct ib_device *device) if (IS_ERR(sdev->cm_id)) { pr_info("ib_create_cm_id() failed: %ld\n", PTR_ERR(sdev->cm_id)); + ret = PTR_ERR(sdev->cm_id); sdev->cm_id = NULL; if (!rdma_cm_id) goto err_ring; @@ -3179,7 +3182,8 @@ static void srpt_add_one(struct ib_device *device) mutex_init(&sport->port_gid_id.mutex); INIT_LIST_HEAD(&sport->port_gid_id.tpg_list); - if (srpt_refresh_port(sport)) { + ret = srpt_refresh_port(sport); + if (ret) { pr_err("MAD registration failed for %s-%d.\n", dev_name(&sdev->device->dev), i); goto err_event; @@ -3190,10 +3194,9 @@ static void srpt_add_one(struct ib_device *device) list_add_tail(&sdev->list, &srpt_dev_list); spin_unlock(&srpt_dev_lock); -out: ib_set_client_data(device, &srpt_client, sdev); pr_debug("added %s.\n", dev_name(&device->dev)); - return; + return 0; err_event: ib_unregister_event_handler(&sdev->event_handler); @@ -3205,10 +3208,8 @@ err_ring: ib_dealloc_pd(sdev->pd); free_dev: kfree(sdev); -err: - sdev = NULL; pr_info("%s(%s) failed.\n", __func__, dev_name(&device->dev)); - goto out; + return ret; } /** @@ -3221,12 +3222,6 @@ static void srpt_remove_one(struct ib_device *device, void *client_data) struct srpt_device *sdev = client_data; int i; - if (!sdev) { - pr_info("%s(%s): nothing to do.\n", __func__, - dev_name(&device->dev)); - return; - } - srpt_unregister_mad_agent(sdev); ib_unregister_event_handler(&sdev->event_handler); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 8d29f2f79da8..c3d715e2fc66 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2722,7 +2722,7 @@ struct ib_device { struct ib_client_nl_info; struct ib_client { const char *name; - void (*add) (struct ib_device *); + int (*add)(struct ib_device *ibdev); void (*remove)(struct ib_device *, void *client_data); void (*rename)(struct ib_device *dev, void *client_data); int (*get_nl_info)(struct ib_device *ibdev, void *client_data, diff --git a/net/rds/ib.c b/net/rds/ib.c index a792d8a3872a..90212ed3edf1 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -127,19 +127,20 @@ void rds_ib_dev_put(struct rds_ib_device *rds_ibdev) queue_work(rds_wq, &rds_ibdev->free_work); } -static void rds_ib_add_one(struct ib_device *device) +static int rds_ib_add_one(struct ib_device *device) { struct rds_ib_device *rds_ibdev; bool has_fr, has_fmr; + int ret; /* Only handle IB (no iWARP) devices */ if (device->node_type != RDMA_NODE_IB_CA) - return; + return -EOPNOTSUPP; rds_ibdev = kzalloc_node(sizeof(struct rds_ib_device), GFP_KERNEL, ibdev_to_node(device)); if (!rds_ibdev) - return; + return -ENOMEM; spin_lock_init(&rds_ibdev->spinlock); refcount_set(&rds_ibdev->refcount, 1); @@ -182,12 +183,14 @@ static void rds_ib_add_one(struct ib_device *device) if (!rds_ibdev->vector_load) { pr_err("RDS/IB: %s failed to allocate vector memory\n", __func__); + ret = -ENOMEM; goto put_dev; } rds_ibdev->dev = device; rds_ibdev->pd = ib_alloc_pd(device, 0); if (IS_ERR(rds_ibdev->pd)) { + ret = PTR_ERR(rds_ibdev->pd); rds_ibdev->pd = NULL; goto put_dev; } @@ -195,12 +198,15 @@ static void rds_ib_add_one(struct ib_device *device) device->dma_device, sizeof(struct rds_header), L1_CACHE_BYTES, 0); - if (!rds_ibdev->rid_hdrs_pool) + if (!rds_ibdev->rid_hdrs_pool) { + ret = -ENOMEM; goto put_dev; + } rds_ibdev->mr_1m_pool = rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_1M_POOL); if (IS_ERR(rds_ibdev->mr_1m_pool)) { + ret = PTR_ERR(rds_ibdev->mr_1m_pool); rds_ibdev->mr_1m_pool = NULL; goto put_dev; } @@ -208,6 +214,7 @@ static void rds_ib_add_one(struct ib_device *device) rds_ibdev->mr_8k_pool = rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_8K_POOL); if (IS_ERR(rds_ibdev->mr_8k_pool)) { + ret = PTR_ERR(rds_ibdev->mr_8k_pool); rds_ibdev->mr_8k_pool = NULL; goto put_dev; } @@ -227,12 +234,13 @@ static void rds_ib_add_one(struct ib_device *device) refcount_inc(&rds_ibdev->refcount); ib_set_client_data(device, &rds_ib_client, rds_ibdev); - refcount_inc(&rds_ibdev->refcount); rds_ib_nodev_connect(); + return 0; put_dev: rds_ib_dev_put(rds_ibdev); + return ret; } /* @@ -274,9 +282,6 @@ static void rds_ib_remove_one(struct ib_device *device, void *client_data) { struct rds_ib_device *rds_ibdev = client_data; - if (!rds_ibdev) - return; - rds_ib_dev_shutdown(rds_ibdev); /* stop connection attempts from getting a reference to this device. */ diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index e7e7c3c6e94a..2fad5f3fe093 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -547,18 +547,18 @@ static void smc_ib_cleanup_per_ibdev(struct smc_ib_device *smcibdev) static struct ib_client smc_ib_client; /* callback function for ib_register_client() */ -static void smc_ib_add_dev(struct ib_device *ibdev) +static int smc_ib_add_dev(struct ib_device *ibdev) { struct smc_ib_device *smcibdev; u8 port_cnt; int i; if (ibdev->node_type != RDMA_NODE_IB_CA) - return; + return -EOPNOTSUPP; smcibdev = kzalloc(sizeof(*smcibdev), GFP_KERNEL); if (!smcibdev) - return; + return -ENOMEM; smcibdev->ibdev = ibdev; INIT_WORK(&smcibdev->port_event_work, smc_ib_port_event_work); @@ -583,6 +583,7 @@ static void smc_ib_add_dev(struct ib_device *ibdev) smcibdev->pnetid[i]); } schedule_work(&smcibdev->port_event_work); + return 0; } /* callback function for ib_unregister_client() */ @@ -590,9 +591,6 @@ static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data) { struct smc_ib_device *smcibdev = client_data; - if (!smcibdev || smcibdev->ibdev != ibdev) - return; - ib_set_client_data(ibdev, &smc_ib_client, NULL); spin_lock(&smc_ib_devices.lock); list_del_init(&smcibdev->list); /* remove from smc_ib_devices */ spin_unlock(&smc_ib_devices.lock); -- cgit v1.2.3 From d5665a21250efeeb73579a2f8d71ee1820f37952 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Mon, 4 May 2020 08:19:31 +0300 Subject: RDMA/core: Add hash functions to calculate RoCEv2 flowlabel and UDP source port Add two hash functions to distribute RoCE v2 UDP source and Flowlabel symmetrically. These are user visible API and any change in the implementation needs to be tested for inter-operability between old and new variant. Link: https://lore.kernel.org/r/20200504051935.269708-2-leon@kernel.org Signed-off-by: Mark Zhang Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index c3d715e2fc66..4c488cade70f 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4709,4 +4709,48 @@ static inline struct ib_device *rdma_device_to_ibdev(struct device *device) bool rdma_dev_access_netns(const struct ib_device *device, const struct net *net); + +#define IB_ROCE_UDP_ENCAP_VALID_PORT_MIN (0xC000) +#define IB_GRH_FLOWLABEL_MASK (0x000FFFFF) + +/** + * rdma_flow_label_to_udp_sport - generate a RoCE v2 UDP src port value based + * on the flow_label + * + * This function will convert the 20 bit flow_label input to a valid RoCE v2 + * UDP src port 14 bit value. All RoCE V2 drivers should use this same + * convention. + */ +static inline u16 rdma_flow_label_to_udp_sport(u32 fl) +{ + u32 fl_low = fl & 0x03fff, fl_high = fl & 0xFC000; + + fl_low ^= fl_high >> 14; + return (u16)(fl_low | IB_ROCE_UDP_ENCAP_VALID_PORT_MIN); +} + +/** + * rdma_calc_flow_label - generate a RDMA symmetric flow label value based on + * local and remote qpn values + * + * This function folded the multiplication results of two qpns, 24 bit each, + * fields, and converts it to a 20 bit results. + * + * This function will create symmetric flow_label value based on the local + * and remote qpn values. this will allow both the requester and responder + * to calculate the same flow_label for a given connection. + * + * This helper function should be used by driver in case the upper layer + * provide a zero flow_label value. This is to improve entropy of RDMA + * traffic in the network. + */ +static inline u32 rdma_calc_flow_label(u32 lqpn, u32 rqpn) +{ + u64 v = (u64)lqpn * rqpn; + + v ^= v >> 20; + v ^= v >> 40; + + return (u32)(v & IB_GRH_FLOWLABEL_MASK); +} #endif /* IB_VERBS_H */ -- cgit v1.2.3 From 0cb9e4f9e98af4b969b9140e964c646ad6ed7d40 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 May 2020 13:53:42 -0500 Subject: IB/rdmavt: Replace zero-length array with flexible-array The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] sizeof(flexible-array-member) triggers a warning because flexible array members have incomplete type[1]. There are some instances of code in which the sizeof operator is being incorrectly/erroneously applied to zero-length arrays and the result is zero. Such instances may be hiding some bugs. So, this work (flexible-array member conversions) will also help to get completely rid of those sorts of issues. This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Link: https://lore.kernel.org/r/20200507185342.GA14476@embeddedor Signed-off-by: Gustavo A. R. Silva Signed-off-by: Jason Gunthorpe --- include/rdma/rdmavt_qp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/rdma') diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 5fc10108703a..982bf2340840 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -440,7 +440,7 @@ struct rvt_qp { /* * This sge list MUST be last. Do not add anything below here. */ - struct rvt_sge r_sg_list[0] /* verified SGEs */ + struct rvt_sge r_sg_list[] /* verified SGEs */ ____cacheline_aligned_in_smp; }; -- cgit v1.2.3 From b0810b037de0b62a3c6e3abfc123fe2734335f53 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 6 May 2020 11:24:39 +0300 Subject: RDMA/core: Consolidate ib_create_srq flows The uverbs layer largely duplicate the code in ib_create_srq(), with the slight difference that it passes in a udata. Move all the code together into ib_create_srq_user() and provide an inline for kernel users, similar to other create calls. Link: https://lore.kernel.org/r/20200506082444.14502-6-leon@kernel.org Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 40 ++++++------------------------------ drivers/infiniband/core/verbs.c | 29 +++++++++++++++++++------- include/rdma/ib_verbs.h | 27 +++++++++++------------- 3 files changed, 40 insertions(+), 56 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index e03f3a43996b..d5642bcf93ee 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3444,38 +3444,15 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs, attr.attr.srq_limit = cmd->srq_limit; INIT_LIST_HEAD(&obj->uevent.event_list); + obj->uevent.uobject.user_handle = cmd->user_handle; - srq = rdma_zalloc_drv_obj(ib_dev, ib_srq); - if (!srq) { - ret = -ENOMEM; - goto err_put; - } - - srq->device = pd->device; - srq->pd = pd; - srq->srq_type = cmd->srq_type; - srq->uobject = obj; - srq->event_handler = attr.event_handler; - - ret = pd->device->ops.create_srq(srq, &attr, udata); - if (ret) - goto err_free; - - if (ib_srq_has_cq(cmd->srq_type)) { - srq->ext.cq = attr.ext.cq; - atomic_inc(&attr.ext.cq->usecnt); - } - - if (cmd->srq_type == IB_SRQT_XRC) { - srq->ext.xrc.xrcd = attr.ext.xrc.xrcd; - atomic_inc(&attr.ext.xrc.xrcd->usecnt); + srq = ib_create_srq_user(pd, &attr, obj, udata); + if (IS_ERR(srq)) { + ret = PTR_ERR(srq); + goto err_put_pd; } - atomic_inc(&pd->usecnt); - atomic_set(&srq->usecnt, 0); - obj->uevent.uobject.object = srq; - obj->uevent.uobject.user_handle = cmd->user_handle; memset(&resp, 0, sizeof resp); resp.srq_handle = obj->uevent.uobject.id; @@ -3501,13 +3478,8 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs, err_copy: ib_destroy_srq_user(srq, uverbs_get_cleared_udata(attrs)); - /* It was released in ib_destroy_srq_user */ - srq = NULL; -err_free: - kfree(srq); -err_put: +err_put_pd: uobj_put_obj_read(pd); - err_put_cq: if (ib_srq_has_cq(cmd->srq_type)) rdma_lookup_put_uobject(&attr.ext.cq->uobject->uevent.uobject, diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index bf0249f76ae9..e2c9430a3ff1 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -981,15 +981,29 @@ EXPORT_SYMBOL(rdma_destroy_ah_user); /* Shared receive queues */ -struct ib_srq *ib_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *srq_init_attr) +/** + * ib_create_srq_user - Creates a SRQ associated with the specified protection + * domain. + * @pd: The protection domain associated with the SRQ. + * @srq_init_attr: A list of initial attributes required to create the + * SRQ. If SRQ creation succeeds, then the attributes are updated to + * the actual capabilities of the created SRQ. + * @uobject - uobject pointer if this is not a kernel SRQ + * @udata - udata pointer if this is not a kernel SRQ + * + * srq_attr->max_wr and srq_attr->max_sge are read the determine the + * requested size of the SRQ, and set to the actual values allocated + * on return. If ib_create_srq() succeeds, then max_wr and max_sge + * will always be at least as large as the requested values. + */ +struct ib_srq *ib_create_srq_user(struct ib_pd *pd, + struct ib_srq_init_attr *srq_init_attr, + struct ib_usrq_object *uobject, + struct ib_udata *udata) { struct ib_srq *srq; int ret; - if (!pd->device->ops.create_srq) - return ERR_PTR(-EOPNOTSUPP); - srq = rdma_zalloc_drv_obj(pd->device, ib_srq); if (!srq) return ERR_PTR(-ENOMEM); @@ -999,6 +1013,7 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd, srq->event_handler = srq_init_attr->event_handler; srq->srq_context = srq_init_attr->srq_context; srq->srq_type = srq_init_attr->srq_type; + srq->uobject = uobject; if (ib_srq_has_cq(srq->srq_type)) { srq->ext.cq = srq_init_attr->ext.cq; @@ -1010,7 +1025,7 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd, } atomic_inc(&pd->usecnt); - ret = pd->device->ops.create_srq(srq, srq_init_attr, NULL); + ret = pd->device->ops.create_srq(srq, srq_init_attr, udata); if (ret) { atomic_dec(&srq->pd->usecnt); if (srq->srq_type == IB_SRQT_XRC) @@ -1023,7 +1038,7 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd, return srq; } -EXPORT_SYMBOL(ib_create_srq); +EXPORT_SYMBOL(ib_create_srq_user); int ib_modify_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr, diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 4c488cade70f..db58f11552f1 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -3559,21 +3559,18 @@ static inline int rdma_destroy_ah(struct ib_ah *ah, u32 flags) return rdma_destroy_ah_user(ah, flags, NULL); } -/** - * ib_create_srq - Creates a SRQ associated with the specified protection - * domain. - * @pd: The protection domain associated with the SRQ. - * @srq_init_attr: A list of initial attributes required to create the - * SRQ. If SRQ creation succeeds, then the attributes are updated to - * the actual capabilities of the created SRQ. - * - * srq_attr->max_wr and srq_attr->max_sge are read the determine the - * requested size of the SRQ, and set to the actual values allocated - * on return. If ib_create_srq() succeeds, then max_wr and max_sge - * will always be at least as large as the requested values. - */ -struct ib_srq *ib_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *srq_init_attr); +struct ib_srq *ib_create_srq_user(struct ib_pd *pd, + struct ib_srq_init_attr *srq_init_attr, + struct ib_usrq_object *uobject, + struct ib_udata *udata); +static inline struct ib_srq * +ib_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *srq_init_attr) +{ + if (!pd->device->ops.create_srq) + return ERR_PTR(-EOPNOTSUPP); + + return ib_create_srq_user(pd, srq_init_attr, NULL, NULL); +} /** * ib_modify_srq - Modifies the attributes for the specified SRQ. -- cgit v1.2.3 From d99dc602e2a55a99940ba9506a7126dfa54d54ea Mon Sep 17 00:00:00 2001 From: Gary Leshner Date: Mon, 11 May 2020 12:05:48 -0400 Subject: IB/hfi1: Add functions to transmit datagram ipoib packets This patch implements the mechanism to accelerate the transmit side of a multiple transmit queue RDMA netdev by submitting the packets to the SDMA engine directly instead of sending through the verbs layer. This patch also changes the UD/SEND_ONLY op to output the entropy value in byte 0 of deth[1]. UD/SEND_ONLY_WITH_IMMEDIATE uses the previous behavior with no entropy value being output. The code in the ipoib rdma netdev which submits tx requests upon successful submission will call trace_sdma_output_ibhdr to output the ibhdr to the trace buffer. Link: https://lore.kernel.org/r/20200511160548.173205.45616.stgit@awfm-01.aw.intel.com Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Gary Leshner Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/Makefile | 1 + drivers/infiniband/hw/hfi1/ipoib.h | 145 ++++++ drivers/infiniband/hw/hfi1/ipoib_tx.c | 828 ++++++++++++++++++++++++++++++++++ drivers/infiniband/hw/hfi1/trace.c | 10 +- include/rdma/ib_verbs.h | 1 + 5 files changed, 984 insertions(+), 1 deletion(-) create mode 100644 drivers/infiniband/hw/hfi1/ipoib.h create mode 100644 drivers/infiniband/hw/hfi1/ipoib_tx.c (limited to 'include/rdma') diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile index 0405d26d0833..09ef0b8b8ac7 100644 --- a/drivers/infiniband/hw/hfi1/Makefile +++ b/drivers/infiniband/hw/hfi1/Makefile @@ -22,6 +22,7 @@ hfi1-y := \ init.o \ intr.o \ iowait.o \ + ipoib_tx.o \ mad.o \ mmu_rb.o \ msix.o \ diff --git a/drivers/infiniband/hw/hfi1/ipoib.h b/drivers/infiniband/hw/hfi1/ipoib.h new file mode 100644 index 000000000000..2b541abde266 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/ipoib.h @@ -0,0 +1,145 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ +/* + * Copyright(c) 2020 Intel Corporation. + * + */ + +/* + * This file contains HFI1 support for IPOIB functionality + */ + +#ifndef HFI1_IPOIB_H +#define HFI1_IPOIB_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "hfi.h" +#include "iowait.h" + +#include + +#define HFI1_IPOIB_ENTROPY_SHIFT 24 + +#define HFI1_IPOIB_TXREQ_NAME_LEN 32 + +#define HFI1_IPOIB_ENCAP_LEN 4 + +struct hfi1_ipoib_dev_priv; + +union hfi1_ipoib_flow { + u16 as_int; + struct { + u8 tx_queue; + u8 sc5; + } __attribute__((__packed__)); +}; + +/** + * struct hfi1_ipoib_circ_buf - List of items to be processed + * @items: ring of items + * @head: ring head + * @tail: ring tail + * @max_items: max items + 1 that the ring can contain + * @producer_lock: producer sync lock + * @consumer_lock: consumer sync lock + */ +struct hfi1_ipoib_circ_buf { + void **items; + unsigned long head; + unsigned long tail; + unsigned long max_items; + spinlock_t producer_lock; /* head sync lock */ + spinlock_t consumer_lock; /* tail sync lock */ +}; + +/** + * struct hfi1_ipoib_txq - IPOIB per Tx queue information + * @priv: private pointer + * @sde: sdma engine + * @tx_list: tx request list + * @sent_txreqs: count of txreqs posted to sdma + * @flow: tracks when list needs to be flushed for a flow change + * @q_idx: ipoib Tx queue index + * @pkts_sent: indicator packets have been sent from this queue + * @wait: iowait structure + * @complete_txreqs: count of txreqs completed by sdma + * @napi: pointer to tx napi interface + * @tx_ring: ring of ipoib txreqs to be reaped by napi callback + */ +struct hfi1_ipoib_txq { + struct hfi1_ipoib_dev_priv *priv; + struct sdma_engine *sde; + struct list_head tx_list; + u64 sent_txreqs; + union hfi1_ipoib_flow flow; + u8 q_idx; + bool pkts_sent; + struct iowait wait; + + atomic64_t ____cacheline_aligned_in_smp complete_txreqs; + struct napi_struct *napi; + struct hfi1_ipoib_circ_buf tx_ring; +}; + +struct hfi1_ipoib_dev_priv { + struct hfi1_devdata *dd; + struct net_device *netdev; + struct ib_device *device; + struct hfi1_ipoib_txq *txqs; + struct kmem_cache *txreq_cache; + struct napi_struct *tx_napis; + u16 pkey; + u16 pkey_index; + u32 qkey; + u8 port_num; + + const struct net_device_ops *netdev_ops; + struct rvt_qp *qp; + struct pcpu_sw_netstats __percpu *netstats; +}; + +/* hfi1 ipoib rdma netdev's private data structure */ +struct hfi1_ipoib_rdma_netdev { + struct rdma_netdev rn; /* keep this first */ + /* followed by device private data */ + struct hfi1_ipoib_dev_priv dev_priv; +}; + +static inline struct hfi1_ipoib_dev_priv * +hfi1_ipoib_priv(const struct net_device *dev) +{ + return &((struct hfi1_ipoib_rdma_netdev *)netdev_priv(dev))->dev_priv; +} + +static inline void +hfi1_ipoib_update_tx_netstats(struct hfi1_ipoib_dev_priv *priv, + u64 packets, + u64 bytes) +{ + struct pcpu_sw_netstats *netstats = this_cpu_ptr(priv->netstats); + + u64_stats_update_begin(&netstats->syncp); + netstats->tx_packets += packets; + netstats->tx_bytes += bytes; + u64_stats_update_end(&netstats->syncp); +} + +int hfi1_ipoib_send_dma(struct net_device *dev, + struct sk_buff *skb, + struct ib_ah *address, + u32 dqpn); + +int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv); +void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv); + +void hfi1_ipoib_napi_tx_enable(struct net_device *dev); +void hfi1_ipoib_napi_tx_disable(struct net_device *dev); + +#endif /* _IPOIB_H */ diff --git a/drivers/infiniband/hw/hfi1/ipoib_tx.c b/drivers/infiniband/hw/hfi1/ipoib_tx.c new file mode 100644 index 000000000000..883cb9d48022 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/ipoib_tx.c @@ -0,0 +1,828 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) +/* + * Copyright(c) 2020 Intel Corporation. + * + */ + +/* + * This file contains HFI1 support for IPOIB SDMA functionality + */ + +#include +#include + +#include "sdma.h" +#include "verbs.h" +#include "trace_ibhdrs.h" +#include "ipoib.h" + +/* Add a convenience helper */ +#define CIRC_ADD(val, add, size) (((val) + (add)) & ((size) - 1)) +#define CIRC_NEXT(val, size) CIRC_ADD(val, 1, size) +#define CIRC_PREV(val, size) CIRC_ADD(val, -1, size) + +/** + * struct ipoib_txreq - IPOIB transmit descriptor + * @txreq: sdma transmit request + * @sdma_hdr: 9b ib headers + * @sdma_status: status returned by sdma engine + * @priv: ipoib netdev private data + * @txq: txq on which skb was output + * @skb: skb to send + */ +struct ipoib_txreq { + struct sdma_txreq txreq; + struct hfi1_sdma_header sdma_hdr; + int sdma_status; + struct hfi1_ipoib_dev_priv *priv; + struct hfi1_ipoib_txq *txq; + struct sk_buff *skb; +}; + +struct ipoib_txparms { + struct hfi1_devdata *dd; + struct rdma_ah_attr *ah_attr; + struct hfi1_ibport *ibp; + struct hfi1_ipoib_txq *txq; + union hfi1_ipoib_flow flow; + u32 dqpn; + u8 hdr_dwords; + u8 entropy; +}; + +static u64 hfi1_ipoib_txreqs(const u64 sent, const u64 completed) +{ + return sent - completed; +} + +static void hfi1_ipoib_check_queue_depth(struct hfi1_ipoib_txq *txq) +{ + if (unlikely(hfi1_ipoib_txreqs(++txq->sent_txreqs, + atomic64_read(&txq->complete_txreqs)) >= + min_t(unsigned int, txq->priv->netdev->tx_queue_len, + txq->tx_ring.max_items - 1))) + netif_stop_subqueue(txq->priv->netdev, txq->q_idx); +} + +static void hfi1_ipoib_check_queue_stopped(struct hfi1_ipoib_txq *txq) +{ + struct net_device *dev = txq->priv->netdev; + + /* If the queue is already running just return */ + if (likely(!__netif_subqueue_stopped(dev, txq->q_idx))) + return; + + /* If shutting down just return as queue state is irrelevant */ + if (unlikely(dev->reg_state != NETREG_REGISTERED)) + return; + + /* + * When the queue has been drained to less than half full it will be + * restarted. + * The size of the txreq ring is fixed at initialization. + * The tx queue len can be adjusted upward while the interface is + * running. + * The tx queue len can be large enough to overflow the txreq_ring. + * Use the minimum of the current tx_queue_len or the rings max txreqs + * to protect against ring overflow. + */ + if (hfi1_ipoib_txreqs(txq->sent_txreqs, + atomic64_read(&txq->complete_txreqs)) + < min_t(unsigned int, dev->tx_queue_len, + txq->tx_ring.max_items) >> 1) + netif_wake_subqueue(dev, txq->q_idx); +} + +static void hfi1_ipoib_free_tx(struct ipoib_txreq *tx, int budget) +{ + struct hfi1_ipoib_dev_priv *priv = tx->priv; + + if (likely(!tx->sdma_status)) { + hfi1_ipoib_update_tx_netstats(priv, 1, tx->skb->len); + } else { + ++priv->netdev->stats.tx_errors; + dd_dev_warn(priv->dd, + "%s: Status = 0x%x pbc 0x%llx txq = %d sde = %d\n", + __func__, tx->sdma_status, + le64_to_cpu(tx->sdma_hdr.pbc), tx->txq->q_idx, + tx->txq->sde->this_idx); + } + + napi_consume_skb(tx->skb, budget); + sdma_txclean(priv->dd, &tx->txreq); + kmem_cache_free(priv->txreq_cache, tx); +} + +static int hfi1_ipoib_drain_tx_ring(struct hfi1_ipoib_txq *txq, int budget) +{ + struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring; + unsigned long head; + unsigned long tail; + unsigned int max_tx; + int work_done; + int tx_count; + + spin_lock_bh(&tx_ring->consumer_lock); + + /* Read index before reading contents at that index. */ + head = smp_load_acquire(&tx_ring->head); + tail = tx_ring->tail; + max_tx = tx_ring->max_items; + + work_done = min_t(int, CIRC_CNT(head, tail, max_tx), budget); + + for (tx_count = work_done; tx_count; tx_count--) { + hfi1_ipoib_free_tx(tx_ring->items[tail], budget); + tail = CIRC_NEXT(tail, max_tx); + } + + atomic64_add(work_done, &txq->complete_txreqs); + + /* Finished freeing tx items so store the tail value. */ + smp_store_release(&tx_ring->tail, tail); + + spin_unlock_bh(&tx_ring->consumer_lock); + + hfi1_ipoib_check_queue_stopped(txq); + + return work_done; +} + +static int hfi1_ipoib_process_tx_ring(struct napi_struct *napi, int budget) +{ + struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(napi->dev); + struct hfi1_ipoib_txq *txq = &priv->txqs[napi - priv->tx_napis]; + + int work_done = hfi1_ipoib_drain_tx_ring(txq, budget); + + if (work_done < budget) + napi_complete_done(napi, work_done); + + return work_done; +} + +static void hfi1_ipoib_add_tx(struct ipoib_txreq *tx) +{ + struct hfi1_ipoib_circ_buf *tx_ring = &tx->txq->tx_ring; + unsigned long head; + unsigned long tail; + size_t max_tx; + + spin_lock(&tx_ring->producer_lock); + + head = tx_ring->head; + tail = READ_ONCE(tx_ring->tail); + max_tx = tx_ring->max_items; + + if (likely(CIRC_SPACE(head, tail, max_tx))) { + tx_ring->items[head] = tx; + + /* Finish storing txreq before incrementing head. */ + smp_store_release(&tx_ring->head, CIRC_ADD(head, 1, max_tx)); + napi_schedule(tx->txq->napi); + } else { + struct hfi1_ipoib_txq *txq = tx->txq; + struct hfi1_ipoib_dev_priv *priv = tx->priv; + + /* Ring was full */ + hfi1_ipoib_free_tx(tx, 0); + atomic64_inc(&txq->complete_txreqs); + dd_dev_dbg(priv->dd, "txq %d full.\n", txq->q_idx); + } + + spin_unlock(&tx_ring->producer_lock); +} + +static void hfi1_ipoib_sdma_complete(struct sdma_txreq *txreq, int status) +{ + struct ipoib_txreq *tx = container_of(txreq, struct ipoib_txreq, txreq); + + tx->sdma_status = status; + + hfi1_ipoib_add_tx(tx); +} + +static int hfi1_ipoib_build_ulp_payload(struct ipoib_txreq *tx, + struct ipoib_txparms *txp) +{ + struct hfi1_devdata *dd = txp->dd; + struct sdma_txreq *txreq = &tx->txreq; + struct sk_buff *skb = tx->skb; + int ret = 0; + int i; + + if (skb_headlen(skb)) { + ret = sdma_txadd_kvaddr(dd, txreq, skb->data, skb_headlen(skb)); + if (unlikely(ret)) + return ret; + } + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + ret = sdma_txadd_page(dd, + txreq, + skb_frag_page(frag), + frag->bv_offset, + skb_frag_size(frag)); + if (unlikely(ret)) + break; + } + + return ret; +} + +static int hfi1_ipoib_build_tx_desc(struct ipoib_txreq *tx, + struct ipoib_txparms *txp) +{ + struct hfi1_devdata *dd = txp->dd; + struct sdma_txreq *txreq = &tx->txreq; + struct hfi1_sdma_header *sdma_hdr = &tx->sdma_hdr; + u16 pkt_bytes = + sizeof(sdma_hdr->pbc) + (txp->hdr_dwords << 2) + tx->skb->len; + int ret; + + ret = sdma_txinit(txreq, 0, pkt_bytes, hfi1_ipoib_sdma_complete); + if (unlikely(ret)) + return ret; + + /* add pbc + headers */ + ret = sdma_txadd_kvaddr(dd, + txreq, + sdma_hdr, + sizeof(sdma_hdr->pbc) + (txp->hdr_dwords << 2)); + if (unlikely(ret)) + return ret; + + /* add the ulp payload */ + return hfi1_ipoib_build_ulp_payload(tx, txp); +} + +static void hfi1_ipoib_build_ib_tx_headers(struct ipoib_txreq *tx, + struct ipoib_txparms *txp) +{ + struct hfi1_ipoib_dev_priv *priv = tx->priv; + struct hfi1_sdma_header *sdma_hdr = &tx->sdma_hdr; + struct sk_buff *skb = tx->skb; + struct hfi1_pportdata *ppd = ppd_from_ibp(txp->ibp); + struct rdma_ah_attr *ah_attr = txp->ah_attr; + struct ib_other_headers *ohdr; + struct ib_grh *grh; + u16 dwords; + u16 slid; + u16 dlid; + u16 lrh0; + u32 bth0; + u32 sqpn = (u32)(priv->netdev->dev_addr[1] << 16 | + priv->netdev->dev_addr[2] << 8 | + priv->netdev->dev_addr[3]); + u16 payload_dwords; + u8 pad_cnt; + + pad_cnt = -skb->len & 3; + + /* Includes ICRC */ + payload_dwords = ((skb->len + pad_cnt) >> 2) + SIZE_OF_CRC; + + /* header size in dwords LRH+BTH+DETH = (8+12+8)/4. */ + txp->hdr_dwords = 7; + + if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) { + grh = &sdma_hdr->hdr.ibh.u.l.grh; + txp->hdr_dwords += + hfi1_make_grh(txp->ibp, + grh, + rdma_ah_read_grh(ah_attr), + txp->hdr_dwords - LRH_9B_DWORDS, + payload_dwords); + lrh0 = HFI1_LRH_GRH; + ohdr = &sdma_hdr->hdr.ibh.u.l.oth; + } else { + lrh0 = HFI1_LRH_BTH; + ohdr = &sdma_hdr->hdr.ibh.u.oth; + } + + lrh0 |= (rdma_ah_get_sl(ah_attr) & 0xf) << 4; + lrh0 |= (txp->flow.sc5 & 0xf) << 12; + + dlid = opa_get_lid(rdma_ah_get_dlid(ah_attr), 9B); + if (dlid == be16_to_cpu(IB_LID_PERMISSIVE)) { + slid = be16_to_cpu(IB_LID_PERMISSIVE); + } else { + u16 lid = (u16)ppd->lid; + + if (lid) { + lid |= rdma_ah_get_path_bits(ah_attr) & + ((1 << ppd->lmc) - 1); + slid = lid; + } else { + slid = be16_to_cpu(IB_LID_PERMISSIVE); + } + } + + /* Includes ICRC */ + dwords = txp->hdr_dwords + payload_dwords; + + /* Build the lrh */ + sdma_hdr->hdr.hdr_type = HFI1_PKT_TYPE_9B; + hfi1_make_ib_hdr(&sdma_hdr->hdr.ibh, lrh0, dwords, dlid, slid); + + /* Build the bth */ + bth0 = (IB_OPCODE_UD_SEND_ONLY << 24) | (pad_cnt << 20) | priv->pkey; + + ohdr->bth[0] = cpu_to_be32(bth0); + ohdr->bth[1] = cpu_to_be32(txp->dqpn); + ohdr->bth[2] = cpu_to_be32(mask_psn((u32)txp->txq->sent_txreqs)); + + /* Build the deth */ + ohdr->u.ud.deth[0] = cpu_to_be32(priv->qkey); + ohdr->u.ud.deth[1] = cpu_to_be32((txp->entropy << + HFI1_IPOIB_ENTROPY_SHIFT) | sqpn); + + /* Construct the pbc. */ + sdma_hdr->pbc = + cpu_to_le64(create_pbc(ppd, + ib_is_sc5(txp->flow.sc5) << + PBC_DC_INFO_SHIFT, + 0, + sc_to_vlt(priv->dd, txp->flow.sc5), + dwords - SIZE_OF_CRC + + (sizeof(sdma_hdr->pbc) >> 2))); +} + +static struct ipoib_txreq *hfi1_ipoib_send_dma_common(struct net_device *dev, + struct sk_buff *skb, + struct ipoib_txparms *txp) +{ + struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); + struct ipoib_txreq *tx; + int ret; + + tx = kmem_cache_alloc_node(priv->txreq_cache, + GFP_ATOMIC, + priv->dd->node); + if (unlikely(!tx)) + return ERR_PTR(-ENOMEM); + + /* so that we can test if the sdma decriptors are there */ + tx->txreq.num_desc = 0; + tx->priv = priv; + tx->txq = txp->txq; + tx->skb = skb; + + hfi1_ipoib_build_ib_tx_headers(tx, txp); + + ret = hfi1_ipoib_build_tx_desc(tx, txp); + if (likely(!ret)) { + if (txp->txq->flow.as_int != txp->flow.as_int) { + txp->txq->flow.tx_queue = txp->flow.tx_queue; + txp->txq->flow.sc5 = txp->flow.sc5; + txp->txq->sde = + sdma_select_engine_sc(priv->dd, + txp->flow.tx_queue, + txp->flow.sc5); + } + + return tx; + } + + sdma_txclean(priv->dd, &tx->txreq); + kmem_cache_free(priv->txreq_cache, tx); + + return ERR_PTR(ret); +} + +static int hfi1_ipoib_submit_tx_list(struct net_device *dev, + struct hfi1_ipoib_txq *txq) +{ + int ret; + u16 count_out; + + ret = sdma_send_txlist(txq->sde, + iowait_get_ib_work(&txq->wait), + &txq->tx_list, + &count_out); + if (likely(!ret) || ret == -EBUSY || ret == -ECOMM) + return ret; + + dd_dev_warn(txq->priv->dd, "cannot send skb tx list, err %d.\n", ret); + + return ret; +} + +static int hfi1_ipoib_flush_tx_list(struct net_device *dev, + struct hfi1_ipoib_txq *txq) +{ + int ret = 0; + + if (!list_empty(&txq->tx_list)) { + /* Flush the current list */ + ret = hfi1_ipoib_submit_tx_list(dev, txq); + + if (unlikely(ret)) + if (ret != -EBUSY) + ++dev->stats.tx_carrier_errors; + } + + return ret; +} + +static int hfi1_ipoib_submit_tx(struct hfi1_ipoib_txq *txq, + struct ipoib_txreq *tx) +{ + int ret; + + ret = sdma_send_txreq(txq->sde, + iowait_get_ib_work(&txq->wait), + &tx->txreq, + txq->pkts_sent); + if (likely(!ret)) { + txq->pkts_sent = true; + iowait_starve_clear(txq->pkts_sent, &txq->wait); + } + + return ret; +} + +static int hfi1_ipoib_send_dma_single(struct net_device *dev, + struct sk_buff *skb, + struct ipoib_txparms *txp) +{ + struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); + struct hfi1_ipoib_txq *txq = txp->txq; + struct ipoib_txreq *tx; + int ret; + + tx = hfi1_ipoib_send_dma_common(dev, skb, txp); + if (IS_ERR(tx)) { + int ret = PTR_ERR(tx); + + dev_kfree_skb_any(skb); + + if (ret == -ENOMEM) + ++dev->stats.tx_errors; + else + ++dev->stats.tx_carrier_errors; + + return NETDEV_TX_OK; + } + + ret = hfi1_ipoib_submit_tx(txq, tx); + if (likely(!ret)) { + trace_sdma_output_ibhdr(tx->priv->dd, + &tx->sdma_hdr.hdr, + ib_is_sc5(txp->flow.sc5)); + hfi1_ipoib_check_queue_depth(txq); + return NETDEV_TX_OK; + } + + txq->pkts_sent = false; + + if (ret == -EBUSY) { + list_add_tail(&tx->txreq.list, &txq->tx_list); + + trace_sdma_output_ibhdr(tx->priv->dd, + &tx->sdma_hdr.hdr, + ib_is_sc5(txp->flow.sc5)); + hfi1_ipoib_check_queue_depth(txq); + return NETDEV_TX_OK; + } + + if (ret == -ECOMM) { + hfi1_ipoib_check_queue_depth(txq); + return NETDEV_TX_OK; + } + + sdma_txclean(priv->dd, &tx->txreq); + dev_kfree_skb_any(skb); + kmem_cache_free(priv->txreq_cache, tx); + ++dev->stats.tx_carrier_errors; + + return NETDEV_TX_OK; +} + +static int hfi1_ipoib_send_dma_list(struct net_device *dev, + struct sk_buff *skb, + struct ipoib_txparms *txp) +{ + struct hfi1_ipoib_txq *txq = txp->txq; + struct ipoib_txreq *tx; + + /* Has the flow change ? */ + if (txq->flow.as_int != txp->flow.as_int) + (void)hfi1_ipoib_flush_tx_list(dev, txq); + + tx = hfi1_ipoib_send_dma_common(dev, skb, txp); + if (IS_ERR(tx)) { + int ret = PTR_ERR(tx); + + dev_kfree_skb_any(skb); + + if (ret == -ENOMEM) + ++dev->stats.tx_errors; + else + ++dev->stats.tx_carrier_errors; + + return NETDEV_TX_OK; + } + + list_add_tail(&tx->txreq.list, &txq->tx_list); + + hfi1_ipoib_check_queue_depth(txq); + + trace_sdma_output_ibhdr(tx->priv->dd, + &tx->sdma_hdr.hdr, + ib_is_sc5(txp->flow.sc5)); + + if (!netdev_xmit_more()) + (void)hfi1_ipoib_flush_tx_list(dev, txq); + + return NETDEV_TX_OK; +} + +static u8 hfi1_ipoib_calc_entropy(struct sk_buff *skb) +{ + if (skb_transport_header_was_set(skb)) { + u8 *hdr = (u8 *)skb_transport_header(skb); + + return (hdr[0] ^ hdr[1] ^ hdr[2] ^ hdr[3]); + } + + return (u8)skb_get_queue_mapping(skb); +} + +int hfi1_ipoib_send_dma(struct net_device *dev, + struct sk_buff *skb, + struct ib_ah *address, + u32 dqpn) +{ + struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); + struct ipoib_txparms txp; + struct rdma_netdev *rn = netdev_priv(dev); + + if (unlikely(skb->len > rn->mtu + HFI1_IPOIB_ENCAP_LEN)) { + dd_dev_warn(priv->dd, "packet len %d (> %d) too long to send, dropping\n", + skb->len, + rn->mtu + HFI1_IPOIB_ENCAP_LEN); + ++dev->stats.tx_dropped; + ++dev->stats.tx_errors; + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + + txp.dd = priv->dd; + txp.ah_attr = &ibah_to_rvtah(address)->attr; + txp.ibp = to_iport(priv->device, priv->port_num); + txp.txq = &priv->txqs[skb_get_queue_mapping(skb)]; + txp.dqpn = dqpn; + txp.flow.sc5 = txp.ibp->sl_to_sc[rdma_ah_get_sl(txp.ah_attr)]; + txp.flow.tx_queue = (u8)skb_get_queue_mapping(skb); + txp.entropy = hfi1_ipoib_calc_entropy(skb); + + if (netdev_xmit_more() || !list_empty(&txp.txq->tx_list)) + return hfi1_ipoib_send_dma_list(dev, skb, &txp); + + return hfi1_ipoib_send_dma_single(dev, skb, &txp); +} + +/* + * hfi1_ipoib_sdma_sleep - ipoib sdma sleep function + * + * This function gets called from sdma_send_txreq() when there are not enough + * sdma descriptors available to send the packet. It adds Tx queue's wait + * structure to sdma engine's dmawait list to be woken up when descriptors + * become available. + */ +static int hfi1_ipoib_sdma_sleep(struct sdma_engine *sde, + struct iowait_work *wait, + struct sdma_txreq *txreq, + uint seq, + bool pkts_sent) +{ + struct hfi1_ipoib_txq *txq = + container_of(wait->iow, struct hfi1_ipoib_txq, wait); + + write_seqlock(&sde->waitlock); + + if (likely(txq->priv->netdev->reg_state == NETREG_REGISTERED)) { + if (sdma_progress(sde, seq, txreq)) { + write_sequnlock(&sde->waitlock); + return -EAGAIN; + } + + netif_stop_subqueue(txq->priv->netdev, txq->q_idx); + + if (list_empty(&txq->wait.list)) + iowait_queue(pkts_sent, wait->iow, &sde->dmawait); + + write_sequnlock(&sde->waitlock); + return -EBUSY; + } + + write_sequnlock(&sde->waitlock); + return -EINVAL; +} + +/* + * hfi1_ipoib_sdma_wakeup - ipoib sdma wakeup function + * + * This function gets called when SDMA descriptors becomes available and Tx + * queue's wait structure was previously added to sdma engine's dmawait list. + */ +static void hfi1_ipoib_sdma_wakeup(struct iowait *wait, int reason) +{ + struct hfi1_ipoib_txq *txq = + container_of(wait, struct hfi1_ipoib_txq, wait); + + if (likely(txq->priv->netdev->reg_state == NETREG_REGISTERED)) + iowait_schedule(wait, system_highpri_wq, WORK_CPU_UNBOUND); +} + +static void hfi1_ipoib_flush_txq(struct work_struct *work) +{ + struct iowait_work *ioww = + container_of(work, struct iowait_work, iowork); + struct iowait *wait = iowait_ioww_to_iow(ioww); + struct hfi1_ipoib_txq *txq = + container_of(wait, struct hfi1_ipoib_txq, wait); + struct net_device *dev = txq->priv->netdev; + + if (likely(dev->reg_state == NETREG_REGISTERED) && + likely(__netif_subqueue_stopped(dev, txq->q_idx)) && + likely(!hfi1_ipoib_flush_tx_list(dev, txq))) + netif_wake_subqueue(dev, txq->q_idx); +} + +int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv) +{ + struct net_device *dev = priv->netdev; + char buf[HFI1_IPOIB_TXREQ_NAME_LEN]; + unsigned long tx_ring_size; + int i; + + /* + * Ring holds 1 less than tx_ring_size + * Round up to next power of 2 in order to hold at least tx_queue_len + */ + tx_ring_size = roundup_pow_of_two((unsigned long)dev->tx_queue_len + 1); + + snprintf(buf, sizeof(buf), "hfi1_%u_ipoib_txreq_cache", priv->dd->unit); + priv->txreq_cache = kmem_cache_create(buf, + sizeof(struct ipoib_txreq), + 0, + 0, + NULL); + if (!priv->txreq_cache) + return -ENOMEM; + + priv->tx_napis = kcalloc_node(dev->num_tx_queues, + sizeof(struct napi_struct), + GFP_ATOMIC, + priv->dd->node); + if (!priv->tx_napis) + goto free_txreq_cache; + + priv->txqs = kcalloc_node(dev->num_tx_queues, + sizeof(struct hfi1_ipoib_txq), + GFP_ATOMIC, + priv->dd->node); + if (!priv->txqs) + goto free_tx_napis; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct hfi1_ipoib_txq *txq = &priv->txqs[i]; + + iowait_init(&txq->wait, + 0, + hfi1_ipoib_flush_txq, + NULL, + hfi1_ipoib_sdma_sleep, + hfi1_ipoib_sdma_wakeup, + NULL, + NULL); + txq->priv = priv; + txq->sde = NULL; + INIT_LIST_HEAD(&txq->tx_list); + atomic64_set(&txq->complete_txreqs, 0); + txq->q_idx = i; + txq->flow.tx_queue = 0xff; + txq->flow.sc5 = 0xff; + txq->pkts_sent = false; + + netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i), + priv->dd->node); + + txq->tx_ring.items = + vzalloc_node(array_size(tx_ring_size, + sizeof(struct ipoib_txreq)), + priv->dd->node); + if (!txq->tx_ring.items) + goto free_txqs; + + spin_lock_init(&txq->tx_ring.producer_lock); + spin_lock_init(&txq->tx_ring.consumer_lock); + txq->tx_ring.max_items = tx_ring_size; + + txq->napi = &priv->tx_napis[i]; + netif_tx_napi_add(dev, txq->napi, + hfi1_ipoib_process_tx_ring, + NAPI_POLL_WEIGHT); + } + + return 0; + +free_txqs: + for (i--; i >= 0; i--) { + struct hfi1_ipoib_txq *txq = &priv->txqs[i]; + + netif_napi_del(txq->napi); + vfree(txq->tx_ring.items); + } + + kfree(priv->txqs); + priv->txqs = NULL; + +free_tx_napis: + kfree(priv->tx_napis); + priv->tx_napis = NULL; + +free_txreq_cache: + kmem_cache_destroy(priv->txreq_cache); + priv->txreq_cache = NULL; + return -ENOMEM; +} + +static void hfi1_ipoib_drain_tx_list(struct hfi1_ipoib_txq *txq) +{ + struct sdma_txreq *txreq; + struct sdma_txreq *txreq_tmp; + atomic64_t *complete_txreqs = &txq->complete_txreqs; + + list_for_each_entry_safe(txreq, txreq_tmp, &txq->tx_list, list) { + struct ipoib_txreq *tx = + container_of(txreq, struct ipoib_txreq, txreq); + + list_del(&txreq->list); + sdma_txclean(txq->priv->dd, &tx->txreq); + dev_kfree_skb_any(tx->skb); + kmem_cache_free(txq->priv->txreq_cache, tx); + atomic64_inc(complete_txreqs); + } + + if (hfi1_ipoib_txreqs(txq->sent_txreqs, atomic64_read(complete_txreqs))) + dd_dev_warn(txq->priv->dd, + "txq %d not empty found %llu requests\n", + txq->q_idx, + hfi1_ipoib_txreqs(txq->sent_txreqs, + atomic64_read(complete_txreqs))); +} + +void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv) +{ + int i; + + for (i = 0; i < priv->netdev->num_tx_queues; i++) { + struct hfi1_ipoib_txq *txq = &priv->txqs[i]; + + iowait_cancel_work(&txq->wait); + iowait_sdma_drain(&txq->wait); + hfi1_ipoib_drain_tx_list(txq); + netif_napi_del(txq->napi); + (void)hfi1_ipoib_drain_tx_ring(txq, txq->tx_ring.max_items); + vfree(txq->tx_ring.items); + } + + kfree(priv->txqs); + priv->txqs = NULL; + + kfree(priv->tx_napis); + priv->tx_napis = NULL; + + kmem_cache_destroy(priv->txreq_cache); + priv->txreq_cache = NULL; +} + +void hfi1_ipoib_napi_tx_enable(struct net_device *dev) +{ + struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); + int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct hfi1_ipoib_txq *txq = &priv->txqs[i]; + + napi_enable(txq->napi); + } +} + +void hfi1_ipoib_napi_tx_disable(struct net_device *dev) +{ + struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); + int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct hfi1_ipoib_txq *txq = &priv->txqs[i]; + + napi_disable(txq->napi); + (void)hfi1_ipoib_drain_tx_ring(txq, txq->tx_ring.max_items); + } +} diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c index 9a3d236bcc88..c8a9988d972d 100644 --- a/drivers/infiniband/hw/hfi1/trace.c +++ b/drivers/infiniband/hw/hfi1/trace.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2018 Intel Corporation. + * Copyright(c) 2015 - 2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -47,6 +47,7 @@ #define CREATE_TRACE_POINTS #include "trace.h" #include "exp_rcv.h" +#include "ipoib.h" static u8 __get_ib_hdr_len(struct ib_header *hdr) { @@ -126,6 +127,7 @@ const char *hfi1_trace_get_packet_l2_str(u8 l2) #define RETH_PRN "reth vaddr:0x%.16llx rkey:0x%.8x dlen:0x%.8x" #define AETH_PRN "aeth syn:0x%.2x %s msn:0x%.8x" #define DETH_PRN "deth qkey:0x%.8x sqpn:0x%.6x" +#define DETH_ENTROPY_PRN "deth qkey:0x%.8x sqpn:0x%.6x entropy:0x%.2x" #define IETH_PRN "ieth rkey:0x%.8x" #define ATOMICACKETH_PRN "origdata:%llx" #define ATOMICETH_PRN "vaddr:0x%llx rkey:0x%.8x sdata:%llx cdata:%llx" @@ -444,6 +446,12 @@ const char *parse_everbs_hdrs( break; /* deth */ case OP(UD, SEND_ONLY): + trace_seq_printf(p, DETH_ENTROPY_PRN, + be32_to_cpu(eh->ud.deth[0]), + be32_to_cpu(eh->ud.deth[1]) & RVT_QPN_MASK, + be32_to_cpu(eh->ud.deth[1]) >> + HFI1_IPOIB_ENTROPY_SHIFT); + break; case OP(UD, SEND_ONLY_WITH_IMMEDIATE): trace_seq_printf(p, DETH_PRN, be32_to_cpu(eh->ud.deth[0]), diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index db58f11552f1..029541a8faeb 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2205,6 +2205,7 @@ struct rdma_netdev { void *clnt_priv; struct ib_device *hca; u8 port_num; + int mtu; /* * cleanup function must be specified. -- cgit v1.2.3 From 84e3b19a27f8f37c8cf98f8b7cdf3f8674bf8e97 Mon Sep 17 00:00:00 2001 From: Gary Leshner Date: Mon, 11 May 2020 12:06:00 -0400 Subject: IB/hfi1: Remove module parameter for KDETH qpns The module parameter for KDETH qpns is being removed in favor of always using the default value of 0x80 as the qpn prefix. Defines have been added for various KDETH values including the prefix of 0x80. The reserved range now starts at the base value for KDETH qpns (0x80) and extends up to and including the last qpn for other reserved QP prefixed types. Adjust other QP prefixed define names to match KDETH defined names. Link: https://lore.kernel.org/r/20200511160600.173205.27508.stgit@awfm-01.aw.intel.com Reviewed-by: Dennis Dalessandro Reviewed-by: Mike Marciniszyn Signed-off-by: Gary Leshner Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/chip.c | 19 +++---------------- drivers/infiniband/hw/hfi1/common.h | 7 ------- drivers/infiniband/hw/hfi1/file_ops.c | 4 ++-- drivers/infiniband/hw/hfi1/hfi.h | 3 +-- drivers/infiniband/hw/hfi1/tid_rdma.c | 4 ++-- drivers/infiniband/hw/hfi1/verbs.c | 7 +++---- include/rdma/rdmavt_qp.h | 29 ++++++++++++++++++++++++++++- 7 files changed, 39 insertions(+), 34 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index e0b1238d31df..c08bf813d6fa 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2018 Intel Corporation. + * Copyright(c) 2015 - 2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -67,10 +67,6 @@ #include "debugfs.h" #include "fault.h" -uint kdeth_qp; -module_param_named(kdeth_qp, kdeth_qp, uint, S_IRUGO); -MODULE_PARM_DESC(kdeth_qp, "Set the KDETH queue pair prefix"); - uint num_vls = HFI1_MAX_VLS_SUPPORTED; module_param(num_vls, uint, S_IRUGO); MODULE_PARM_DESC(num_vls, "Set number of Virtual Lanes to use (1-8)"); @@ -14119,21 +14115,12 @@ static void init_early_variables(struct hfi1_devdata *dd) static void init_kdeth_qp(struct hfi1_devdata *dd) { - /* user changed the KDETH_QP */ - if (kdeth_qp != 0 && kdeth_qp >= 0xff) { - /* out of range or illegal value */ - dd_dev_err(dd, "Invalid KDETH queue pair prefix, ignoring"); - kdeth_qp = 0; - } - if (kdeth_qp == 0) /* not set, or failed range check */ - kdeth_qp = DEFAULT_KDETH_QP; - write_csr(dd, SEND_BTH_QP, - (kdeth_qp & SEND_BTH_QP_KDETH_QP_MASK) << + (RVT_KDETH_QP_PREFIX & SEND_BTH_QP_KDETH_QP_MASK) << SEND_BTH_QP_KDETH_QP_SHIFT); write_csr(dd, RCV_BTH_QP, - (kdeth_qp & RCV_BTH_QP_KDETH_QP_MASK) << + (RVT_KDETH_QP_PREFIX & RCV_BTH_QP_KDETH_QP_MASK) << RCV_BTH_QP_KDETH_QP_SHIFT); } diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h index 1f7107e35a43..606254513640 100644 --- a/drivers/infiniband/hw/hfi1/common.h +++ b/drivers/infiniband/hw/hfi1/common.h @@ -72,13 +72,6 @@ * compilation unit */ -/* - * If a packet's QP[23:16] bits match this value, then it is - * a PSM packet and the hardware will expect a KDETH header - * following the BTH. - */ -#define DEFAULT_KDETH_QP 0x80 - /* driver/hw feature set bitmask */ #define HFI1_CAP_USER_SHIFT 24 #define HFI1_CAP_MASK ((1UL << HFI1_CAP_USER_SHIFT) - 1) diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index e7fdd70c6e78..8ca51e43cf53 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015-2017 Intel Corporation. + * Copyright(c) 2015-2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -1264,7 +1264,7 @@ static int get_base_info(struct hfi1_filedata *fd, unsigned long arg, u32 len) memset(&binfo, 0, sizeof(binfo)); binfo.hw_version = dd->revision; binfo.sw_version = HFI1_KERN_SWVERSION; - binfo.bthqp = kdeth_qp; + binfo.bthqp = RVT_KDETH_QP_PREFIX; binfo.jkey = uctxt->jkey; /* * If more than 64 contexts are enabled the allocated credit diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index b06c2594105a..ed13051d38da 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1,7 +1,7 @@ #ifndef _HFI1_KERNEL_H #define _HFI1_KERNEL_H /* - * Copyright(c) 2015-2018 Intel Corporation. + * Copyright(c) 2015-2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -2250,7 +2250,6 @@ extern int num_user_contexts; extern unsigned long n_krcvqs; extern uint krcvqs[]; extern int krcvqsset; -extern uint kdeth_qp; extern uint loopback; extern uint quick_linkup; extern uint rcv_intr_timeout; diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index 8a2e0d9351e9..243b4ba0b6f6 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) /* - * Copyright(c) 2018 Intel Corporation. + * Copyright(c) 2018 - 2020 Intel Corporation. * */ @@ -194,7 +194,7 @@ void tid_rdma_opfn_init(struct rvt_qp *qp, struct tid_rdma_params *p) { struct hfi1_qp_priv *priv = qp->priv; - p->qp = (kdeth_qp << 16) | priv->rcd->ctxt; + p->qp = (RVT_KDETH_QP_PREFIX << 16) | priv->rcd->ctxt; p->max_len = TID_RDMA_MAX_SEGMENT_SIZE; p->jkey = priv->rcd->jkey; p->max_read = TID_RDMA_MAX_READ_SEGS_PER_REQ; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 2f6323ad9c59..c1c6fa986cd1 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2018 Intel Corporation. + * Copyright(c) 2015 - 2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -1863,9 +1863,8 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dd->verbs_dev.rdi.dparms.qpn_start = 0; dd->verbs_dev.rdi.dparms.qpn_inc = 1; dd->verbs_dev.rdi.dparms.qos_shift = dd->qos_shift; - dd->verbs_dev.rdi.dparms.qpn_res_start = kdeth_qp << 16; - dd->verbs_dev.rdi.dparms.qpn_res_end = - dd->verbs_dev.rdi.dparms.qpn_res_start + 65535; + dd->verbs_dev.rdi.dparms.qpn_res_start = RVT_KDETH_QP_BASE; + dd->verbs_dev.rdi.dparms.qpn_res_end = RVT_AIP_QP_MAX; dd->verbs_dev.rdi.dparms.max_rdma_atomic = HFI1_MAX_RDMA_ATOMIC; dd->verbs_dev.rdi.dparms.psn_mask = PSN_MASK; dd->verbs_dev.rdi.dparms.psn_shift = PSN_SHIFT; diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 982bf2340840..c4369a6c2951 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -2,7 +2,7 @@ #define DEF_RDMAVT_INCQP_H /* - * Copyright(c) 2016 - 2019 Intel Corporation. + * Copyright(c) 2016 - 2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -68,6 +68,33 @@ #define RVT_R_RSP_SEND 0x08 #define RVT_R_COMM_EST 0x10 +/* + * If a packet's QP[23:16] bits match this value, then it is + * a PSM packet and the hardware will expect a KDETH header + * following the BTH. + */ +#define RVT_KDETH_QP_PREFIX 0x80 +#define RVT_KDETH_QP_SUFFIX 0xffff +#define RVT_KDETH_QP_PREFIX_MASK 0x00ff0000 +#define RVT_KDETH_QP_PREFIX_SHIFT 16 +#define RVT_KDETH_QP_BASE (u32)(RVT_KDETH_QP_PREFIX << \ + RVT_KDETH_QP_PREFIX_SHIFT) +#define RVT_KDETH_QP_MAX (u32)(RVT_KDETH_QP_BASE + RVT_KDETH_QP_SUFFIX) + +/* + * If a packet's LNH == BTH and DEST QPN[23:16] in the BTH match this + * prefix value, then it is an AIP packet with a DETH containing the entropy + * value in byte 4 following the BTH. + */ +#define RVT_AIP_QP_PREFIX 0x81 +#define RVT_AIP_QP_SUFFIX 0xffff +#define RVT_AIP_QP_PREFIX_MASK 0x00ff0000 +#define RVT_AIP_QP_PREFIX_SHIFT 16 +#define RVT_AIP_QP_BASE (u32)(RVT_AIP_QP_PREFIX << \ + RVT_AIP_QP_PREFIX_SHIFT) +#define RVT_AIP_QPN_MAX BIT(RVT_AIP_QP_PREFIX_SHIFT) +#define RVT_AIP_QP_MAX (u32)(RVT_AIP_QP_BASE + RVT_AIP_QPN_MAX - 1) + /* * Bit definitions for s_flags. * -- cgit v1.2.3 From 7f90a5a069f8dff9c76505b9853f95667d117c15 Mon Sep 17 00:00:00 2001 From: Gary Leshner Date: Mon, 11 May 2020 12:06:07 -0400 Subject: IB/{rdmavt, hfi1}: Implement creation of accelerated UD QPs Adds capability to create a qpn to be recognized as an accelerated UD QP for ipoib. This is accomplished by reserving 0x81 in byte[0] of the qpn as the prefix for these qp types and reserving qpns between 0x810000 and 0x81ffff. The hfi1 capability mask already contained a flag for the VNIC netdev. This has been renamed and extended to include both VNIC and ipoib. The rvt code to allocate qps now recognizes this flag and sets 0x81 into byte[0] of the qpn. The code to allocate qpns is modified to reset the qpn numbering when it is detected that a value is located in byte[0] for a UD QP and it is a qpn being requested for net dev use. If it is a regular UD QP then it is allowable to have bits set in byte[0] of the qpn and provide the previously normal behavior. The code to free the qpn now checks for the AIP prefix value of 0x81 and removes it from the qpn before being freed so that the lower 16 bit number can be reused. This patch requires minor changes in the IB core and ipoib to facilitate the creation of accelerated UP QPs. Link: https://lore.kernel.org/r/20200511160607.173205.11757.stgit@awfm-01.aw.intel.com Reviewed-by: Dennis Dalessandro Reviewed-by: Mike Marciniszyn Signed-off-by: Gary Leshner Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/verbs.c | 2 +- drivers/infiniband/sw/rdmavt/qp.c | 24 +++++++++++++++++++----- drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 3 +++ include/rdma/ib_verbs.h | 4 ++-- include/rdma/opa_vnic.h | 4 ++-- 5 files changed, 27 insertions(+), 10 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index c1c6fa986cd1..c61b2916d420 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1342,7 +1342,7 @@ static void hfi1_fill_device_attr(struct hfi1_devdata *dd) IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE | IB_DEVICE_MEM_MGT_EXTENSIONS | - IB_DEVICE_RDMA_NETDEV_OPA_VNIC; + IB_DEVICE_RDMA_NETDEV_OPA; rdi->dparms.props.page_size_cap = PAGE_SIZE; rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3; rdi->dparms.props.vendor_part_id = dd->pcidev->device; diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 0e1b291d2cec..91ad6c571080 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2016 - 2019 Intel Corporation. + * Copyright(c) 2016 - 2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -525,15 +525,18 @@ static inline unsigned mk_qpn(struct rvt_qpn_table *qpt, * @rdi: rvt device info structure * @qpt: queue pair number table pointer * @port_num: IB port number, 1 based, comes from core + * @exclude_prefix: prefix of special queue pair number being allocated * * Return: The queue pair number */ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, - enum ib_qp_type type, u8 port_num) + enum ib_qp_type type, u8 port_num, u8 exclude_prefix) { u32 i, offset, max_scan, qpn; struct rvt_qpn_map *map; u32 ret; + u32 max_qpn = exclude_prefix == RVT_AIP_QP_PREFIX ? + RVT_AIP_QPN_MAX : RVT_QPN_MAX; if (rdi->driver_f.alloc_qpn) return rdi->driver_f.alloc_qpn(rdi, qpt, type, port_num); @@ -553,7 +556,7 @@ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, } qpn = qpt->last + qpt->incr; - if (qpn >= RVT_QPN_MAX) + if (qpn >= max_qpn) qpn = qpt->incr | ((qpt->last & 1) ^ 1); /* offset carries bit 0 */ offset = qpn & RVT_BITS_PER_PAGE_MASK; @@ -987,6 +990,9 @@ static void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn) { struct rvt_qpn_map *map; + if ((qpn & RVT_AIP_QP_PREFIX_MASK) == RVT_AIP_QP_BASE) + qpn &= RVT_AIP_QP_SUFFIX; + map = qpt->map + (qpn & RVT_QPN_MASK) / RVT_BITS_PER_PAGE; if (map->page) clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page); @@ -1074,13 +1080,15 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, struct rvt_dev_info *rdi = ib_to_rvt(ibpd->device); void *priv = NULL; size_t sqsize; + u8 exclude_prefix = 0; if (!rdi) return ERR_PTR(-EINVAL); if (init_attr->cap.max_send_sge > rdi->dparms.props.max_send_sge || init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr || - init_attr->create_flags) + (init_attr->create_flags && + init_attr->create_flags != IB_QP_CREATE_NETDEV_USE)) return ERR_PTR(-EINVAL); /* Check receive queue parameters if no SRQ is specified. */ @@ -1199,14 +1207,20 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, goto bail_driver_priv; } + if (init_attr->create_flags & IB_QP_CREATE_NETDEV_USE) + exclude_prefix = RVT_AIP_QP_PREFIX; + err = alloc_qpn(rdi, &rdi->qp_dev->qpn_table, init_attr->qp_type, - init_attr->port_num); + init_attr->port_num, + exclude_prefix); if (err < 0) { ret = ERR_PTR(err); goto bail_rq_wq; } qp->ibqp.qp_num = err; + if (init_attr->create_flags & IB_QP_CREATE_NETDEV_USE) + qp->ibqp.qp_num |= RVT_AIP_QP_BASE; qp->port_num = init_attr->port_num; rvt_init_qp(rdi, qp, init_attr->qp_type); if (rdi->driver_f.qp_priv_init) { diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index b69304d28f06..587252fd6f57 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -206,6 +206,9 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) if (priv->hca_caps & IB_DEVICE_MANAGED_FLOW_STEERING) init_attr.create_flags |= IB_QP_CREATE_NETIF_QP; + if (priv->hca_caps & IB_DEVICE_RDMA_NETDEV_OPA) + init_attr.create_flags |= IB_QP_CREATE_NETDEV_USE; + priv->qp = ib_create_qp(priv->pd, &init_attr); if (IS_ERR(priv->qp)) { pr_warn("%s: failed to create QP\n", ca->name); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 029541a8faeb..6278e4e040fc 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -305,7 +305,7 @@ enum ib_device_cap_flags { IB_DEVICE_VIRTUAL_FUNCTION = (1ULL << 33), /* Deprecated. Please use IB_RAW_PACKET_CAP_SCATTER_FCS. */ IB_DEVICE_RAW_SCATTER_FCS = (1ULL << 34), - IB_DEVICE_RDMA_NETDEV_OPA_VNIC = (1ULL << 35), + IB_DEVICE_RDMA_NETDEV_OPA = (1ULL << 35), /* The device supports padding incoming writes to cacheline. */ IB_DEVICE_PCI_WRITE_END_PADDING = (1ULL << 36), IB_DEVICE_ALLOW_USER_UNREG = (1ULL << 37), @@ -1117,7 +1117,7 @@ enum ib_qp_create_flags { IB_QP_CREATE_MANAGED_RECV = 1 << 4, IB_QP_CREATE_NETIF_QP = 1 << 5, IB_QP_CREATE_INTEGRITY_EN = 1 << 6, - /* FREE = 1 << 7, */ + IB_QP_CREATE_NETDEV_USE = 1 << 7, IB_QP_CREATE_SCATTER_FCS = 1 << 8, IB_QP_CREATE_CVLAN_STRIPPING = 1 << 9, IB_QP_CREATE_SOURCE_QPN = 1 << 10, diff --git a/include/rdma/opa_vnic.h b/include/rdma/opa_vnic.h index e90b149fe92a..6f244e759b4f 100644 --- a/include/rdma/opa_vnic.h +++ b/include/rdma/opa_vnic.h @@ -1,7 +1,7 @@ #ifndef _OPA_VNIC_H #define _OPA_VNIC_H /* - * Copyright(c) 2017 Intel Corporation. + * Copyright(c) 2017 - 2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -132,7 +132,7 @@ struct opa_vnic_stats { static inline bool rdma_cap_opa_vnic(struct ib_device *device) { return !!(device->attrs.device_cap_flags & - IB_DEVICE_RDMA_NETDEV_OPA_VNIC); + IB_DEVICE_RDMA_NETDEV_OPA); } #endif /* _OPA_VNIC_H */ -- cgit v1.2.3 From 6d72344cf6c47010cc2055a832e16c7fcdd16f82 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Mon, 11 May 2020 12:06:18 -0400 Subject: IB/ipoib: Increase ipoib Datagram mode MTU's upper limit Currently the ipoib UD mtu is restricted to 4K bytes. Remove this limitation so that the IPOIB module can potentially use an MTU (in UD mode) that is bounded by the MTU of the underlying device. A field is added to the ib_port_attr structure to indicate the maximum physical MTU the underlying device supports. Link: https://lore.kernel.org/r/20200511160618.173205.23053.stgit@awfm-01.aw.intel.com Reviewed-by: Dennis Dalessandro Reviewed-by: Mike Marciniszyn Signed-off-by: Sadanand Warrier Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/qp.c | 18 +----- drivers/infiniband/hw/hfi1/verbs.c | 2 + drivers/infiniband/ulp/ipoib/ipoib_main.c | 2 +- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 11 ++-- include/rdma/ib_verbs.h | 77 ++++++++++++++++++++++++++ include/rdma/opa_port_info.h | 10 +--- 6 files changed, 88 insertions(+), 32 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index f8e733aa3bb8..0c2ae9f7b3e8 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2019 Intel Corporation. + * Copyright(c) 2015 - 2020 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -186,15 +186,6 @@ static void flush_iowait(struct rvt_qp *qp) write_sequnlock_irqrestore(lock, flags); } -static inline int opa_mtu_enum_to_int(int mtu) -{ - switch (mtu) { - case OPA_MTU_8192: return 8192; - case OPA_MTU_10240: return 10240; - default: return -1; - } -} - /** * This function is what we would push to the core layer if we wanted to be a * "first class citizen". Instead we hide this here and rely on Verbs ULPs @@ -202,15 +193,10 @@ static inline int opa_mtu_enum_to_int(int mtu) */ static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu) { - int val; - /* Constraining 10KB packets to 8KB packets */ if (mtu == (enum ib_mtu)OPA_MTU_10240) mtu = OPA_MTU_8192; - val = opa_mtu_enum_to_int((int)mtu); - if (val > 0) - return val; - return ib_mtu_enum_to_int(mtu); + return opa_mtu_enum_to_int((enum opa_mtu)mtu); } int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index c61b2916d420..19d5d0061b01 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1439,6 +1439,8 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num, 4096 : hfi1_max_mtu), IB_MTU_4096); props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu : mtu_to_enum(ppd->ibmtu, IB_MTU_4096); + props->phys_mtu = HFI1_CAP_IS_KSET(AIP) ? hfi1_max_mtu : + ib_mtu_enum_to_int(props->max_mtu); return 0; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index d4c6a97ce4c0..22216f181b24 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1855,7 +1855,7 @@ static int ipoib_parent_init(struct net_device *ndev) priv->port); return result; } - priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu); + priv->max_ib_mtu = rdma_mtu_from_attr(priv->ca, priv->port, &attr); result = ib_query_pkey(priv->ca, priv->port, 0, &priv->pkey); if (result) { diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index b9e9562f5034..7166ee9b7a25 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -218,6 +218,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, struct rdma_ah_attr av; int ret; int set_qkey = 0; + int mtu; mcast->mcmember = *mcmember; @@ -240,13 +241,11 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, priv->broadcast->mcmember.flow_label = mcmember->flow_label; priv->broadcast->mcmember.hop_limit = mcmember->hop_limit; /* assume if the admin and the mcast are the same both can be changed */ + mtu = rdma_mtu_enum_to_int(priv->ca, priv->port, + priv->broadcast->mcmember.mtu); if (priv->mcast_mtu == priv->admin_mtu) - priv->admin_mtu = - priv->mcast_mtu = - IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu)); - else - priv->mcast_mtu = - IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu)); + priv->admin_mtu = IPOIB_UD_MTU(mtu); + priv->mcast_mtu = IPOIB_UD_MTU(mtu); priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey); spin_unlock_irq(&priv->lock); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 6278e4e040fc..641f4751b062 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -462,6 +462,11 @@ enum ib_mtu { IB_MTU_4096 = 5 }; +enum opa_mtu { + OPA_MTU_8192 = 6, + OPA_MTU_10240 = 7 +}; + static inline int ib_mtu_enum_to_int(enum ib_mtu mtu) { switch (mtu) { @@ -488,6 +493,28 @@ static inline enum ib_mtu ib_mtu_int_to_enum(int mtu) return IB_MTU_256; } +static inline int opa_mtu_enum_to_int(enum opa_mtu mtu) +{ + switch (mtu) { + case OPA_MTU_8192: + return 8192; + case OPA_MTU_10240: + return 10240; + default: + return(ib_mtu_enum_to_int((enum ib_mtu)mtu)); + } +} + +static inline enum opa_mtu opa_mtu_int_to_enum(int mtu) +{ + if (mtu >= 10240) + return OPA_MTU_10240; + else if (mtu >= 8192) + return OPA_MTU_8192; + else + return ((enum opa_mtu)ib_mtu_int_to_enum(mtu)); +} + enum ib_port_state { IB_PORT_NOP = 0, IB_PORT_DOWN = 1, @@ -651,6 +678,7 @@ struct ib_port_attr { enum ib_port_state state; enum ib_mtu max_mtu; enum ib_mtu active_mtu; + u32 phys_mtu; int gid_tbl_len; unsigned int ip_gids:1; /* This is the value from PortInfo CapabilityMask, defined by IBA */ @@ -3364,6 +3392,55 @@ static inline unsigned int rdma_find_pg_bit(unsigned long addr, return __fls(pgsz); } +/** + * rdma_core_cap_opa_port - Return whether the RDMA Port is OPA or not. + * @device: Device + * @port_num: 1 based Port number + * + * Return true if port is an Intel OPA port , false if not + */ +static inline bool rdma_core_cap_opa_port(struct ib_device *device, + u32 port_num) +{ + return (device->port_data[port_num].immutable.core_cap_flags & + RDMA_CORE_PORT_INTEL_OPA) == RDMA_CORE_PORT_INTEL_OPA; +} + +/** + * rdma_mtu_enum_to_int - Return the mtu of the port as an integer value. + * @device: Device + * @port_num: Port number + * @mtu: enum value of MTU + * + * Return the MTU size supported by the port as an integer value. Will return + * -1 if enum value of mtu is not supported. + */ +static inline int rdma_mtu_enum_to_int(struct ib_device *device, u8 port, + int mtu) +{ + if (rdma_core_cap_opa_port(device, port)) + return opa_mtu_enum_to_int((enum opa_mtu)mtu); + else + return ib_mtu_enum_to_int((enum ib_mtu)mtu); +} + +/** + * rdma_mtu_from_attr - Return the mtu of the port from the port attribute. + * @device: Device + * @port_num: Port number + * @attr: port attribute + * + * Return the MTU size supported by the port as an integer value. + */ +static inline int rdma_mtu_from_attr(struct ib_device *device, u8 port, + struct ib_port_attr *attr) +{ + if (rdma_core_cap_opa_port(device, port)) + return attr->phys_mtu; + else + return ib_mtu_enum_to_int(attr->max_mtu); +} + int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port, int state); int ib_get_vf_config(struct ib_device *device, int vf, u8 port, diff --git a/include/rdma/opa_port_info.h b/include/rdma/opa_port_info.h index bdbfe25d3854..0d9e6d74c385 100644 --- a/include/rdma/opa_port_info.h +++ b/include/rdma/opa_port_info.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2017 Intel Corporation. All rights reserved. + * Copyright (c) 2014-2020 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -139,14 +139,6 @@ #define OPA_CAP_MASK3_IsVLMarkerSupported (1 << 1) #define OPA_CAP_MASK3_IsVLrSupported (1 << 0) -/** - * new MTU values - */ -enum { - OPA_MTU_8192 = 6, - OPA_MTU_10240 = 7, -}; - enum { OPA_PORT_PHYS_CONF_DISCONNECTED = 0, OPA_PORT_PHYS_CONF_STANDARD = 1, -- cgit v1.2.3 From 0ac8903cbbe618d947b5815d6e0f7b044ee83aa3 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 19 May 2020 10:27:05 +0300 Subject: RDMA/core: Allow the ioctl layer to abort a fully created uobject While creating a uobject every create reaches a point where the uobject is fully initialized. For ioctls that go on to copy_to_user this means they need to open code the destruction of a fully created uobject - ie the RDMA_REMOVE_DESTROY sort of flow. Open coding this creates bugs, eg the CQ does not properly flush the events list when it does its error unwind. Provide a uverbs_finalize_uobj_create() function which indicates that the uobject is fully initialized and that abort should call to destroy_hw to destroy the uobj->object and related. Methods can call this function if they go on to have error cases after setting uobj->object. Once done those error cases can simply do return, without an error unwind. Link: https://lore.kernel.org/r/20200519072711.257271-2-leon@kernel.org Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 25 ++++++++++++++++++++----- drivers/infiniband/core/rdma_core.h | 4 ++-- drivers/infiniband/core/uverbs_cmd.c | 2 +- drivers/infiniband/core/uverbs_ioctl.c | 22 ++++++++++++++++++++-- drivers/infiniband/core/uverbs_std_types_cq.c | 8 ++------ drivers/infiniband/core/uverbs_std_types_mr.c | 12 +++--------- drivers/infiniband/hw/mlx5/devx.c | 10 ++++------ drivers/infiniband/hw/mlx5/main.c | 24 ++++++------------------ drivers/infiniband/hw/mlx5/qos.c | 13 +++++-------- include/rdma/ib_verbs.h | 5 +++++ include/rdma/uverbs_ioctl.h | 3 +++ include/rdma/uverbs_std_types.h | 2 +- include/rdma/uverbs_types.h | 3 ++- 13 files changed, 74 insertions(+), 59 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index bf8e149d3191..de3858515275 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -130,6 +130,17 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj, lockdep_assert_held(&ufile->hw_destroy_rwsem); assert_uverbs_usecnt(uobj, UVERBS_LOOKUP_WRITE); + if (reason == RDMA_REMOVE_ABORT_HWOBJ) { + reason = RDMA_REMOVE_ABORT; + ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason, + attrs); + /* + * Drivers are not permitted to ignore RDMA_REMOVE_ABORT, see + * ib_is_destroy_retryable, cleanup_retryable == false here. + */ + WARN_ON(ret); + } + if (reason == RDMA_REMOVE_ABORT) { WARN_ON(!list_empty(&uobj->list)); WARN_ON(!uobj->context); @@ -647,11 +658,15 @@ void rdma_alloc_commit_uobject(struct ib_uobject *uobj, * object and anything else connected to uobj before calling this. */ void rdma_alloc_abort_uobject(struct ib_uobject *uobj, - struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs, + bool hw_obj_valid) { struct ib_uverbs_file *ufile = uobj->ufile; - uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT, attrs); + uverbs_destroy_uobject(uobj, + hw_obj_valid ? RDMA_REMOVE_ABORT_HWOBJ : + RDMA_REMOVE_ABORT, + attrs); /* Matches the down_read in rdma_alloc_begin_uobject */ up_read(&ufile->hw_destroy_rwsem); @@ -921,8 +936,8 @@ uverbs_get_uobject_from_file(u16 object_id, enum uverbs_obj_access access, } void uverbs_finalize_object(struct ib_uobject *uobj, - enum uverbs_obj_access access, bool commit, - struct uverbs_attr_bundle *attrs) + enum uverbs_obj_access access, bool hw_obj_valid, + bool commit, struct uverbs_attr_bundle *attrs) { /* * refcounts should be handled at the object level and not at the @@ -945,7 +960,7 @@ void uverbs_finalize_object(struct ib_uobject *uobj, if (commit) rdma_alloc_commit_uobject(uobj, attrs); else - rdma_alloc_abort_uobject(uobj, attrs); + rdma_alloc_abort_uobject(uobj, attrs, hw_obj_valid); break; default: WARN_ON(true); diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index 33978e0f1262..2b529233e159 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -64,8 +64,8 @@ uverbs_get_uobject_from_file(u16 object_id, enum uverbs_obj_access access, s64 id, struct uverbs_attr_bundle *attrs); void uverbs_finalize_object(struct ib_uobject *uobj, - enum uverbs_obj_access access, bool commit, - struct uverbs_attr_bundle *attrs); + enum uverbs_obj_access access, bool hw_obj_valid, + bool commit, struct uverbs_attr_bundle *attrs); int uverbs_output_written(const struct uverbs_attr_bundle *bundle, size_t idx); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index d5642bcf93ee..86c97221872d 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -311,7 +311,7 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs) return 0; err_uobj: - rdma_alloc_abort_uobject(uobj, attrs); + rdma_alloc_abort_uobject(uobj, attrs, false); err_ucontext: kfree(attrs->context); attrs->context = NULL; diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 538affbc517e..42c5696f03bd 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -58,6 +58,7 @@ struct bundle_priv { DECLARE_BITMAP(uobj_finalize, UVERBS_API_ATTR_BKEY_LEN); DECLARE_BITMAP(spec_finalize, UVERBS_API_ATTR_BKEY_LEN); + DECLARE_BITMAP(uobj_hw_obj_valid, UVERBS_API_ATTR_BKEY_LEN); /* * Must be last. bundle ends in a flex array which overlaps @@ -230,7 +231,8 @@ static void uverbs_free_idrs_array(const struct uverbs_api_attr *attr_uapi, for (i = 0; i != attr->len; i++) uverbs_finalize_object(attr->uobjects[i], - spec->u2.objs_arr.access, commit, attrs); + spec->u2.objs_arr.access, false, commit, + attrs); } static int uverbs_process_attr(struct bundle_priv *pbundle, @@ -502,7 +504,9 @@ static void bundle_destroy(struct bundle_priv *pbundle, bool commit) uverbs_finalize_object( attr->obj_attr.uobject, - attr->obj_attr.attr_elm->spec.u.obj.access, commit, + attr->obj_attr.attr_elm->spec.u.obj.access, + test_bit(i, pbundle->uobj_hw_obj_valid), + commit, &pbundle->bundle); } @@ -590,6 +594,8 @@ static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile, sizeof(pbundle->bundle.attr_present)); memset(pbundle->uobj_finalize, 0, sizeof(pbundle->uobj_finalize)); memset(pbundle->spec_finalize, 0, sizeof(pbundle->spec_finalize)); + memset(pbundle->uobj_hw_obj_valid, 0, + sizeof(pbundle->uobj_hw_obj_valid)); ret = ib_uverbs_run_method(pbundle, hdr->num_attrs); bundle_destroy(pbundle, ret == 0); @@ -784,3 +790,15 @@ int uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle, } return uverbs_copy_to(bundle, idx, from, size); } + +/* Once called an abort will call through to the type's destroy_hw() */ +void uverbs_finalize_uobj_create(const struct uverbs_attr_bundle *bundle, + u16 idx) +{ + struct bundle_priv *pbundle = + container_of(bundle, struct bundle_priv, bundle); + + __set_bit(uapi_bkey_attr(uapi_key_attr(idx)), + pbundle->uobj_hw_obj_valid); +} +EXPORT_SYMBOL(uverbs_finalize_uobj_create); diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index da4110a0eea2..73fbbeb2586c 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -129,16 +129,12 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( obj->uevent.uobject.object = cq; obj->uevent.uobject.user_handle = user_handle; rdma_restrack_uadd(&cq->res); + uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_CREATE_CQ_HANDLE); ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_CQ_RESP_CQE, &cq->cqe, sizeof(cq->cqe)); - if (ret) - goto err_cq; + return ret; - return 0; -err_cq: - ib_destroy_cq_user(cq, uverbs_get_cleared_udata(attrs)); - cq = NULL; err_free: kfree(cq); err_event_file: diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c index c1286a52dc84..a2722ef8496e 100644 --- a/drivers/infiniband/core/uverbs_std_types_mr.c +++ b/drivers/infiniband/core/uverbs_std_types_mr.c @@ -136,21 +136,15 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)( uobj->object = mr; + uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_REG_DM_MR_HANDLE); + ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_DM_MR_RESP_LKEY, &mr->lkey, sizeof(mr->lkey)); if (ret) - goto err_dereg; + return ret; ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_DM_MR_RESP_RKEY, &mr->rkey, sizeof(mr->rkey)); - if (ret) - goto err_dereg; - - return 0; - -err_dereg: - ib_dereg_mr_user(mr, uverbs_get_cleared_udata(attrs)); - return ret; } diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index c339dd5ee694..3047e7d60a9b 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -2218,14 +2218,12 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)( obj->mdev = dev->mdev; uobj->object = obj; devx_obj_build_destroy_cmd(cmd.in, cmd.out, obj->dinbox, &obj->dinlen, &obj_id); - err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, &obj_id, sizeof(obj_id)); - if (err) - goto err_umem_destroy; + uverbs_finalize_uobj_create(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE); - return 0; + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, &obj_id, + sizeof(obj_id)); + return err; -err_umem_destroy: - mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, cmd.out, sizeof(cmd.out)); err_umem_release: ib_umem_release(obj->umem); err_obj_free: diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 26f0b39c7f74..623d7898ae6d 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -6187,26 +6187,20 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_VAR_OBJ_ALLOC)( mmap_offset = mlx5_entry_to_mmap_offset(entry); length = entry->rdma_entry.npages * PAGE_SIZE; uobj->object = entry; + uverbs_finalize_uobj_create(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_HANDLE); err = uverbs_copy_to(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_OFFSET, &mmap_offset, sizeof(mmap_offset)); if (err) - goto err; + return err; err = uverbs_copy_to(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_PAGE_ID, &entry->page_idx, sizeof(entry->page_idx)); if (err) - goto err; + return err; err = uverbs_copy_to(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_LENGTH, &length, sizeof(length)); - if (err) - goto err; - - return 0; - -err: - rdma_user_mmap_entry_remove(&entry->rdma_entry); return err; } @@ -6320,26 +6314,20 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_UAR_OBJ_ALLOC)( mmap_offset = mlx5_entry_to_mmap_offset(entry); length = entry->rdma_entry.npages * PAGE_SIZE; uobj->object = entry; + uverbs_finalize_uobj_create(attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_HANDLE); err = uverbs_copy_to(attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_OFFSET, &mmap_offset, sizeof(mmap_offset)); if (err) - goto err; + return err; err = uverbs_copy_to(attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_PAGE_ID, &entry->page_idx, sizeof(entry->page_idx)); if (err) - goto err; + return err; err = uverbs_copy_to(attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_LENGTH, &length, sizeof(length)); - if (err) - goto err; - - return 0; - -err: - rdma_user_mmap_entry_remove(&entry->rdma_entry); return err; } diff --git a/drivers/infiniband/hw/mlx5/qos.c b/drivers/infiniband/hw/mlx5/qos.c index cac878a70edb..dce92554142a 100644 --- a/drivers/infiniband/hw/mlx5/qos.c +++ b/drivers/infiniband/hw/mlx5/qos.c @@ -69,17 +69,14 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_PP_OBJ_ALLOC)( if (err) goto err; - err = uverbs_copy_to(attrs, MLX5_IB_ATTR_PP_OBJ_ALLOC_INDEX, - &pp_entry->index, sizeof(pp_entry->index)); - if (err) - goto clean; - pp_entry->mdev = dev->mdev; uobj->object = pp_entry; - return 0; + uverbs_finalize_uobj_create(attrs, MLX5_IB_ATTR_PP_OBJ_ALLOC_HANDLE); + + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_PP_OBJ_ALLOC_INDEX, + &pp_entry->index, sizeof(pp_entry->index)); + return err; -clean: - mlx5_rl_remove_rate_raw(dev->mdev, pp_entry->index); err: kfree(pp_entry); return err; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 641f4751b062..c988e9205cf9 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1491,6 +1491,11 @@ enum rdma_remove_reason { RDMA_REMOVE_DRIVER_REMOVE, /* uobj is being cleaned-up before being committed */ RDMA_REMOVE_ABORT, + /* + * uobj has been fully created, with the uobj->object set, but is being + * cleaned up before being comitted + */ + RDMA_REMOVE_ABORT_HWOBJ, }; struct ib_rdmacg_object { diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 9f3b1e004046..5bd2b037e914 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -737,6 +737,9 @@ uverbs_attr_get_len(const struct uverbs_attr_bundle *attrs_bundle, u16 idx) return attr->ptr_attr.len; } +void uverbs_finalize_uobj_create(const struct uverbs_attr_bundle *attrs_bundle, + u16 idx); + /* * uverbs_attr_ptr_get_array_size() - Get array size pointer by a ptr * attribute. diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h index 1b28ce1aba07..d6784be27e4b 100644 --- a/include/rdma/uverbs_std_types.h +++ b/include/rdma/uverbs_std_types.h @@ -107,7 +107,7 @@ static inline void uobj_put_write(struct ib_uobject *uobj) static inline void uobj_alloc_abort(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs) { - rdma_alloc_abort_uobject(uobj, attrs); + rdma_alloc_abort_uobject(uobj, attrs, false); } static inline struct ib_uobject * diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index f1cbdae67250..c15b298aa62f 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -139,7 +139,8 @@ void rdma_lookup_put_uobject(struct ib_uobject *uobj, struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj, struct uverbs_attr_bundle *attrs); void rdma_alloc_abort_uobject(struct ib_uobject *uobj, - struct uverbs_attr_bundle *attrs); + struct uverbs_attr_bundle *attrs, + bool hw_obj_valid); void rdma_alloc_commit_uobject(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs); -- cgit v1.2.3 From 175ba58d62c84e1216cdf8b4f49f79e55e1ed04b Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 19 May 2020 10:27:08 +0300 Subject: IB/uverbs: Move QP, SRQ, WQ type and flags to UAPI These constants are going to be used in the ioctl interface in coming patches so they are part of the UAPI, place them in the correct header for clarity. Link: https://lore.kernel.org/r/20200519072711.257271-5-leon@kernel.org Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 43 ++++++++++++++++++--------------- include/uapi/rdma/ib_user_ioctl_verbs.h | 34 ++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 19 deletions(-) (limited to 'include/rdma') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index c988e9205cf9..94533ae16697 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1040,9 +1040,9 @@ enum ib_cq_notify_flags { }; enum ib_srq_type { - IB_SRQT_BASIC, - IB_SRQT_XRC, - IB_SRQT_TM, + IB_SRQT_BASIC = IB_UVERBS_SRQT_BASIC, + IB_SRQT_XRC = IB_UVERBS_SRQT_XRC, + IB_SRQT_TM = IB_UVERBS_SRQT_TM, }; static inline bool ib_srq_has_cq(enum ib_srq_type srq_type) @@ -1111,16 +1111,16 @@ enum ib_qp_type { IB_QPT_SMI, IB_QPT_GSI, - IB_QPT_RC, - IB_QPT_UC, - IB_QPT_UD, + IB_QPT_RC = IB_UVERBS_QPT_RC, + IB_QPT_UC = IB_UVERBS_QPT_UC, + IB_QPT_UD = IB_UVERBS_QPT_UD, IB_QPT_RAW_IPV6, IB_QPT_RAW_ETHERTYPE, - IB_QPT_RAW_PACKET = 8, - IB_QPT_XRC_INI = 9, - IB_QPT_XRC_TGT, + IB_QPT_RAW_PACKET = IB_UVERBS_QPT_RAW_PACKET, + IB_QPT_XRC_INI = IB_UVERBS_QPT_XRC_INI, + IB_QPT_XRC_TGT = IB_UVERBS_QPT_XRC_TGT, IB_QPT_MAX, - IB_QPT_DRIVER = 0xFF, + IB_QPT_DRIVER = IB_UVERBS_QPT_DRIVER, /* Reserve a range for qp types internal to the low level driver. * These qp types will not be visible at the IB core layer, so the * IB_QPT_MAX usages should not be affected in the core layer @@ -1139,17 +1139,21 @@ enum ib_qp_type { enum ib_qp_create_flags { IB_QP_CREATE_IPOIB_UD_LSO = 1 << 0, - IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1, + IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = + IB_UVERBS_QP_CREATE_BLOCK_MULTICAST_LOOPBACK, IB_QP_CREATE_CROSS_CHANNEL = 1 << 2, IB_QP_CREATE_MANAGED_SEND = 1 << 3, IB_QP_CREATE_MANAGED_RECV = 1 << 4, IB_QP_CREATE_NETIF_QP = 1 << 5, IB_QP_CREATE_INTEGRITY_EN = 1 << 6, IB_QP_CREATE_NETDEV_USE = 1 << 7, - IB_QP_CREATE_SCATTER_FCS = 1 << 8, - IB_QP_CREATE_CVLAN_STRIPPING = 1 << 9, + IB_QP_CREATE_SCATTER_FCS = + IB_UVERBS_QP_CREATE_SCATTER_FCS, + IB_QP_CREATE_CVLAN_STRIPPING = + IB_UVERBS_QP_CREATE_CVLAN_STRIPPING, IB_QP_CREATE_SOURCE_QPN = 1 << 10, - IB_QP_CREATE_PCI_WRITE_END_PADDING = 1 << 11, + IB_QP_CREATE_PCI_WRITE_END_PADDING = + IB_UVERBS_QP_CREATE_PCI_WRITE_END_PADDING, /* reserve bits 26-31 for low level drivers' internal use */ IB_QP_CREATE_RESERVED_START = 1 << 26, IB_QP_CREATE_RESERVED_END = 1 << 31, @@ -1654,7 +1658,7 @@ enum ib_raw_packet_caps { }; enum ib_wq_type { - IB_WQT_RQ + IB_WQT_RQ = IB_UVERBS_WQT_RQ, }; enum ib_wq_state { @@ -1677,10 +1681,11 @@ struct ib_wq { }; enum ib_wq_flags { - IB_WQ_FLAGS_CVLAN_STRIPPING = 1 << 0, - IB_WQ_FLAGS_SCATTER_FCS = 1 << 1, - IB_WQ_FLAGS_DELAY_DROP = 1 << 2, - IB_WQ_FLAGS_PCI_WRITE_END_PADDING = 1 << 3, + IB_WQ_FLAGS_CVLAN_STRIPPING = IB_UVERBS_WQ_FLAGS_CVLAN_STRIPPING, + IB_WQ_FLAGS_SCATTER_FCS = IB_UVERBS_WQ_FLAGS_SCATTER_FCS, + IB_WQ_FLAGS_DELAY_DROP = IB_UVERBS_WQ_FLAGS_DELAY_DROP, + IB_WQ_FLAGS_PCI_WRITE_END_PADDING = + IB_UVERBS_WQ_FLAGS_PCI_WRITE_END_PADDING, }; struct ib_wq_init_attr { diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h index a640bb814be0..b1662dfe86a6 100644 --- a/include/uapi/rdma/ib_user_ioctl_verbs.h +++ b/include/uapi/rdma/ib_user_ioctl_verbs.h @@ -64,6 +64,40 @@ enum ib_uverbs_access_flags { ~(IB_UVERBS_ACCESS_OPTIONAL_FIRST - 1) }; +enum ib_uverbs_srq_type { + IB_UVERBS_SRQT_BASIC, + IB_UVERBS_SRQT_XRC, + IB_UVERBS_SRQT_TM, +}; + +enum ib_uverbs_wq_type { + IB_UVERBS_WQT_RQ, +}; + +enum ib_uverbs_wq_flags { + IB_UVERBS_WQ_FLAGS_CVLAN_STRIPPING = 1 << 0, + IB_UVERBS_WQ_FLAGS_SCATTER_FCS = 1 << 1, + IB_UVERBS_WQ_FLAGS_DELAY_DROP = 1 << 2, + IB_UVERBS_WQ_FLAGS_PCI_WRITE_END_PADDING = 1 << 3, +}; + +enum ib_uverbs_qp_type { + IB_UVERBS_QPT_RC = 2, + IB_UVERBS_QPT_UC, + IB_UVERBS_QPT_UD, + IB_UVERBS_QPT_RAW_PACKET = 8, + IB_UVERBS_QPT_XRC_INI, + IB_UVERBS_QPT_XRC_TGT, + IB_UVERBS_QPT_DRIVER = 0xFF, +}; + +enum ib_uverbs_qp_create_flags { + IB_UVERBS_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1, + IB_UVERBS_QP_CREATE_SCATTER_FCS = 1 << 8, + IB_UVERBS_QP_CREATE_CVLAN_STRIPPING = 1 << 9, + IB_UVERBS_QP_CREATE_PCI_WRITE_END_PADDING = 1 << 11, +}; + enum ib_uverbs_query_port_cap_flags { IB_UVERBS_PCF_SM = 1 << 1, IB_UVERBS_PCF_NOTICE_SUP = 1 << 2, -- cgit v1.2.3 From bebcfe85f4338ba1434561a460169a5e0af78f98 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 27 May 2020 09:41:52 -0500 Subject: RDMA/core: Use sizeof_field() helper Make use of the sizeof_field() helper instead of an open-coded version. Link: https://lore.kernel.org/r/20200527144152.GA22605@embeddedor Signed-off-by: Gustavo A. R. Silva Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/sa_query.c | 14 +++++++------- drivers/infiniband/core/ud_header.c | 2 +- drivers/infiniband/core/uverbs_cmd.c | 2 +- drivers/infiniband/core/uverbs_ioctl.c | 2 +- include/rdma/uverbs_ioctl.h | 12 ++++++------ 5 files changed, 16 insertions(+), 16 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 5c878646ff62..a2ed09a3c714 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -190,7 +190,7 @@ static u32 tid; #define PATH_REC_FIELD(field) \ .struct_offset_bytes = offsetof(struct sa_path_rec, field), \ - .struct_size_bytes = sizeof((struct sa_path_rec *)0)->field, \ + .struct_size_bytes = sizeof_field(struct sa_path_rec, field), \ .field_name = "sa_path_rec:" #field static const struct ib_field path_rec_table[] = { @@ -292,7 +292,7 @@ static const struct ib_field path_rec_table[] = { .struct_offset_bytes = \ offsetof(struct sa_path_rec, field), \ .struct_size_bytes = \ - sizeof((struct sa_path_rec *)0)->field, \ + sizeof_field(struct sa_path_rec, field), \ .field_name = "sa_path_rec:" #field static const struct ib_field opa_path_rec_table[] = { @@ -420,7 +420,7 @@ static const struct ib_field opa_path_rec_table[] = { #define MCMEMBER_REC_FIELD(field) \ .struct_offset_bytes = offsetof(struct ib_sa_mcmember_rec, field), \ - .struct_size_bytes = sizeof ((struct ib_sa_mcmember_rec *) 0)->field, \ + .struct_size_bytes = sizeof_field(struct ib_sa_mcmember_rec, field), \ .field_name = "sa_mcmember_rec:" #field static const struct ib_field mcmember_rec_table[] = { @@ -504,7 +504,7 @@ static const struct ib_field mcmember_rec_table[] = { #define SERVICE_REC_FIELD(field) \ .struct_offset_bytes = offsetof(struct ib_sa_service_rec, field), \ - .struct_size_bytes = sizeof ((struct ib_sa_service_rec *) 0)->field, \ + .struct_size_bytes = sizeof_field(struct ib_sa_service_rec, field), \ .field_name = "sa_service_rec:" #field static const struct ib_field service_rec_table[] = { @@ -552,7 +552,7 @@ static const struct ib_field service_rec_table[] = { #define CLASSPORTINFO_REC_FIELD(field) \ .struct_offset_bytes = offsetof(struct ib_class_port_info, field), \ - .struct_size_bytes = sizeof((struct ib_class_port_info *)0)->field, \ + .struct_size_bytes = sizeof_field(struct ib_class_port_info, field), \ .field_name = "ib_class_port_info:" #field static const struct ib_field ib_classport_info_rec_table[] = { @@ -630,7 +630,7 @@ static const struct ib_field ib_classport_info_rec_table[] = { .struct_offset_bytes =\ offsetof(struct opa_class_port_info, field), \ .struct_size_bytes = \ - sizeof((struct opa_class_port_info *)0)->field, \ + sizeof_field(struct opa_class_port_info, field), \ .field_name = "opa_class_port_info:" #field static const struct ib_field opa_classport_info_rec_table[] = { @@ -710,7 +710,7 @@ static const struct ib_field opa_classport_info_rec_table[] = { #define GUIDINFO_REC_FIELD(field) \ .struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field), \ - .struct_size_bytes = sizeof((struct ib_sa_guidinfo_rec *) 0)->field, \ + .struct_size_bytes = sizeof_field(struct ib_sa_guidinfo_rec, field), \ .field_name = "sa_guidinfo_rec:" #field static const struct ib_field guidinfo_rec_table[] = { diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c index 29a45d2f8898..d65d541b9a25 100644 --- a/drivers/infiniband/core/ud_header.c +++ b/drivers/infiniband/core/ud_header.c @@ -41,7 +41,7 @@ #define STRUCT_FIELD(header, field) \ .struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field), \ - .struct_size_bytes = sizeof ((struct ib_unpacked_ ## header *) 0)->field, \ + .struct_size_bytes = sizeof_field(struct ib_unpacked_ ## header, field), \ .field_name = #header ":" #field static const struct ib_field lrh_table[] = { diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 4859ac0df17c..2067a939788b 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3741,7 +3741,7 @@ static int ib_uverbs_ex_modify_cq(struct uverbs_attr_bundle *attrs) #define UAPI_DEF_WRITE_IO(req, resp) \ .write.has_resp = 1 + \ BUILD_BUG_ON_ZERO(offsetof(req, response) != 0) + \ - BUILD_BUG_ON_ZERO(sizeof(((req *)0)->response) != \ + BUILD_BUG_ON_ZERO(sizeof_field(req, response) != \ sizeof(u64)), \ .write.req_size = sizeof(req), .write.resp_size = sizeof(resp) diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 42c5696f03bd..2d882c02387c 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -137,7 +137,7 @@ EXPORT_SYMBOL(_uverbs_alloc); static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr, u16 len) { - if (uattr->len > sizeof(((struct ib_uverbs_attr *)0)->data)) + if (uattr->len > sizeof_field(struct ib_uverbs_attr, data)) return ib_is_buffer_cleared(u64_to_user_ptr(uattr->data) + len, uattr->len - len); diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 5bd2b037e914..0418d7bddf3e 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -420,9 +420,9 @@ struct uapi_definition { .scope = UAPI_SCOPE_OBJECT, \ .needs_fn_offset = \ offsetof(struct ib_device_ops, ibdev_fn) + \ - BUILD_BUG_ON_ZERO( \ - sizeof(((struct ib_device_ops *)0)->ibdev_fn) != \ - sizeof(void *)), \ + BUILD_BUG_ON_ZERO(sizeof_field(struct ib_device_ops, \ + ibdev_fn) != \ + sizeof(void *)), \ } /* @@ -435,9 +435,9 @@ struct uapi_definition { .scope = UAPI_SCOPE_METHOD, \ .needs_fn_offset = \ offsetof(struct ib_device_ops, ibdev_fn) + \ - BUILD_BUG_ON_ZERO( \ - sizeof(((struct ib_device_ops *)0)->ibdev_fn) != \ - sizeof(void *)), \ + BUILD_BUG_ON_ZERO(sizeof_field(struct ib_device_ops, \ + ibdev_fn) != \ + sizeof(void *)), \ } /* Call a function to determine if the entire object is supported or not */ -- cgit v1.2.3 From fef17f91da7d4af11dde1ff832b82d6a64f89562 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 26 May 2020 13:32:59 +0300 Subject: RDMA/cm: Add Enhanced Connection Establishment (ECE) bits Extend REQ (request for communications), REP (reply to request for communication), rejected reason and SIDR_REP (service ID resolution response) structures with hardware vendor ID bits according to IBTA v1.4. Link: https://lore.kernel.org/r/20200526103304.196371-2-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/ibta_vol1_c12.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/ibta_vol1_c12.h b/include/rdma/ibta_vol1_c12.h index 269904425d3f..960c86bec76c 100644 --- a/include/rdma/ibta_vol1_c12.h +++ b/include/rdma/ibta_vol1_c12.h @@ -38,6 +38,7 @@ /* Table 106 REQ Message Contents */ #define CM_REQ_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_req_msg, 0, 32) +#define CM_REQ_VENDOR_ID CM_FIELD32_LOC(struct cm_req_msg, 5, 24) #define CM_REQ_SERVICE_ID CM_FIELD64_LOC(struct cm_req_msg, 8) #define CM_REQ_LOCAL_CA_GUID CM_FIELD64_LOC(struct cm_req_msg, 16) #define CM_REQ_LOCAL_Q_KEY CM_FIELD32_LOC(struct cm_req_msg, 28, 32) @@ -119,8 +120,11 @@ CM_STRUCT(struct cm_rej_msg, 84 * 8 + 1184); #define CM_REP_REMOTE_COMM_ID CM_FIELD32_LOC(struct cm_rep_msg, 4, 32) #define CM_REP_LOCAL_Q_KEY CM_FIELD32_LOC(struct cm_rep_msg, 8, 32) #define CM_REP_LOCAL_QPN CM_FIELD32_LOC(struct cm_rep_msg, 12, 24) +#define CM_REP_VENDOR_ID_H CM_FIELD8_LOC(struct cm_rep_msg, 15, 8) #define CM_REP_LOCAL_EE_CONTEXT_NUMBER CM_FIELD32_LOC(struct cm_rep_msg, 16, 24) +#define CM_REP_VENDOR_ID_M CM_FIELD8_LOC(struct cm_rep_msg, 19, 8) #define CM_REP_STARTING_PSN CM_FIELD32_LOC(struct cm_rep_msg, 20, 24) +#define CM_REP_VENDOR_ID_L CM_FIELD8_LOC(struct cm_rep_msg, 23, 8) #define CM_REP_RESPONDER_RESOURCES CM_FIELD8_LOC(struct cm_rep_msg, 24, 8) #define CM_REP_INITIATOR_DEPTH CM_FIELD8_LOC(struct cm_rep_msg, 25, 8) #define CM_REP_TARGET_ACK_DELAY CM_FIELD8_LOC(struct cm_rep_msg, 26, 5) @@ -201,7 +205,9 @@ CM_STRUCT(struct cm_sidr_req_msg, 16 * 8 + 1728); #define CM_SIDR_REP_STATUS CM_FIELD8_LOC(struct cm_sidr_rep_msg, 4, 8) #define CM_SIDR_REP_ADDITIONAL_INFORMATION_LENGTH \ CM_FIELD8_LOC(struct cm_sidr_rep_msg, 5, 8) +#define CM_SIDR_REP_VENDOR_ID_H CM_FIELD16_LOC(struct cm_sidr_rep_msg, 6, 16) #define CM_SIDR_REP_QPN CM_FIELD32_LOC(struct cm_sidr_rep_msg, 8, 24) +#define CM_SIDR_REP_VENDOR_ID_L CM_FIELD8_LOC(struct cm_sidr_rep_msg, 11, 8) #define CM_SIDR_REP_SERVICEID CM_FIELD64_LOC(struct cm_sidr_rep_msg, 12) #define CM_SIDR_REP_Q_KEY CM_FIELD32_LOC(struct cm_sidr_rep_msg, 20, 32) #define CM_SIDR_REP_ADDITIONAL_INFORMATION \ -- cgit v1.2.3 From 34e2ab57a911f8b32b22580d11a02f0b79108245 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 26 May 2020 13:33:00 +0300 Subject: RDMA/ucma: Extend ucma_connect to receive ECE parameters Active side of CMID initiates connection through librdmacm's rdma_connect() and kernel's ucma_connect(). Extend UCMA interface to handle those new parameters. Link: https://lore.kernel.org/r/20200526103304.196371-3-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 21 +++++++++++++++++++++ drivers/infiniband/core/cma_priv.h | 1 + drivers/infiniband/core/ucma.c | 14 +++++++++++--- include/rdma/rdma_cm.h | 3 +++ include/uapi/rdma/rdma_user_cm.h | 6 ++++++ 5 files changed, 42 insertions(+), 3 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 432eec472164..e81b8a523a3e 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -4036,6 +4036,27 @@ err: } EXPORT_SYMBOL(rdma_connect); +/** + * rdma_connect_ece - Initiate an active connection request with ECE data. + * @id: Connection identifier to connect. + * @conn_param: Connection information used for connected QPs. + * @ece: ECE parameters + * + * See rdma_connect() explanation. + */ +int rdma_connect_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, + struct rdma_ucm_ece *ece) +{ + struct rdma_id_private *id_priv = + container_of(id, struct rdma_id_private, id); + + id_priv->ece.vendor_id = ece->vendor_id; + id_priv->ece.attr_mod = ece->attr_mod; + + return rdma_connect(id, conn_param); +} +EXPORT_SYMBOL(rdma_connect_ece); + static int cma_accept_ib(struct rdma_id_private *id_priv, struct rdma_conn_param *conn_param) { diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h index 5edcf44a9307..caece96ebcf5 100644 --- a/drivers/infiniband/core/cma_priv.h +++ b/drivers/infiniband/core/cma_priv.h @@ -95,6 +95,7 @@ struct rdma_id_private { * Internal to RDMA/core, don't use in the drivers */ struct rdma_restrack_entry res; + struct rdma_ucm_ece ece; }; #if IS_ENABLED(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 06127c800a49..7cbb63690241 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -1072,12 +1072,15 @@ static void ucma_copy_conn_param(struct rdma_cm_id *id, static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { - struct rdma_ucm_connect cmd; struct rdma_conn_param conn_param; + struct rdma_ucm_ece ece = {}; + struct rdma_ucm_connect cmd; struct ucma_context *ctx; + size_t in_size; int ret; - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + in_size = min_t(size_t, in_len, sizeof(cmd)); + if (copy_from_user(&cmd, inbuf, in_size)) return -EFAULT; if (!cmd.conn_param.valid) @@ -1088,8 +1091,13 @@ static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf, return PTR_ERR(ctx); ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); + if (offsetofend(typeof(cmd), ece) <= in_size) { + ece.vendor_id = cmd.ece.vendor_id; + ece.attr_mod = cmd.ece.attr_mod; + } + mutex_lock(&ctx->mutex); - ret = rdma_connect(ctx->cm_id, &conn_param); + ret = rdma_connect_ece(ctx->cm_id, &conn_param, &ece); mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index ea8e794785ed..4e2975eb3643 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -264,6 +264,9 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, */ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param); +int rdma_connect_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, + struct rdma_ucm_ece *ece); + /** * rdma_listen - This function is called by the passive side to * listen for incoming connection requests. diff --git a/include/uapi/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h index 1bb6e75d254b..c1409dd7225f 100644 --- a/include/uapi/rdma/rdma_user_cm.h +++ b/include/uapi/rdma/rdma_user_cm.h @@ -210,10 +210,16 @@ struct rdma_ucm_ud_param { __u8 reserved[7]; }; +struct rdma_ucm_ece { + __u32 vendor_id; + __u32 attr_mod; +}; + struct rdma_ucm_connect { struct rdma_ucm_conn_param conn_param; __u32 id; __u32 reserved; + struct rdma_ucm_ece ece; }; struct rdma_ucm_listen { -- cgit v1.2.3 From 93531ee7b9d1313227d2b4f354989895e8d57b72 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 26 May 2020 13:33:01 +0300 Subject: RDMA/ucma: Deliver ECE parameters through UCMA events Passive side of CMID connection receives ECE request through REQ message and needs to respond with relevant REP message which will be forwarded to active side. The UCMA events interface is responsible for such communication with the user space (librdmacm). Extend it to provide ECE wire data. Link: https://lore.kernel.org/r/20200526103304.196371-4-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/ucma.c | 6 +++++- include/rdma/rdma_cm.h | 1 + include/uapi/rdma/rdma_user_cm.h | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 7cbb63690241..3e5268cfa164 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -360,6 +360,9 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id, ucma_copy_conn_event(&uevent->resp.param.conn, &event->param.conn); + uevent->resp.ece.vendor_id = event->ece.vendor_id; + uevent->resp.ece.attr_mod = event->ece.attr_mod; + if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) { if (!ctx->backlog) { ret = -ENOMEM; @@ -404,7 +407,8 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf, * Old 32 bit user space does not send the 4 byte padding in the * reserved field. We don't care, allow it to keep working. */ - if (out_len < sizeof(uevent->resp) - sizeof(uevent->resp.reserved)) + if (out_len < sizeof(uevent->resp) - sizeof(uevent->resp.reserved) - + sizeof(uevent->resp.ece)) return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index 4e2975eb3643..418590c9a9e8 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -111,6 +111,7 @@ struct rdma_cm_event { struct rdma_conn_param conn; struct rdma_ud_param ud; } param; + struct rdma_ucm_ece ece; }; struct rdma_cm_id; diff --git a/include/uapi/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h index c1409dd7225f..19c5c3f74af9 100644 --- a/include/uapi/rdma/rdma_user_cm.h +++ b/include/uapi/rdma/rdma_user_cm.h @@ -297,6 +297,7 @@ struct rdma_ucm_event_resp { struct rdma_ucm_ud_param ud; } param; __u32 reserved; + struct rdma_ucm_ece ece; }; /* Option levels */ -- cgit v1.2.3 From a20652e175f2c5cea74c90503eeaeafabd08abed Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 26 May 2020 13:33:02 +0300 Subject: RDMA/cm: Send and receive ECE parameter over the wire ECE parameters are exchanged through REQ->REP/SIDR_REP messages, this patch adds the data to provide to other side of CMID communication channel. Link: https://lore.kernel.org/r/20200526103304.196371-5-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm.c | 39 ++++++++++++++++++++++++++++++++++----- drivers/infiniband/core/cma.c | 8 ++++++++ include/rdma/ib_cm.h | 9 ++++++++- 3 files changed, 50 insertions(+), 6 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index f38ff46abe8f..085c146fe400 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -66,6 +66,8 @@ static const char * const ibcm_rej_reason_strs[] = { [IB_CM_REJ_INVALID_CLASS_VERSION] = "invalid class version", [IB_CM_REJ_INVALID_FLOW_LABEL] = "invalid flow label", [IB_CM_REJ_INVALID_ALT_FLOW_LABEL] = "invalid alt flow label", + [IB_CM_REJ_VENDOR_OPTION_NOT_SUPPORTED] = + "vendor option is not supported", }; const char *__attribute_const__ ibcm_reject_msg(int reason) @@ -290,6 +292,8 @@ struct cm_id_private { struct list_head work_list; atomic_t work_count; + + struct rdma_ucm_ece ece; }; static void cm_work_handler(struct work_struct *work); @@ -1318,6 +1322,13 @@ static void cm_format_mad_hdr(struct ib_mad_hdr *hdr, hdr->tid = tid; } +static void cm_format_mad_ece_hdr(struct ib_mad_hdr *hdr, __be16 attr_id, + __be64 tid, u32 attr_mod) +{ + cm_format_mad_hdr(hdr, attr_id, tid); + hdr->attr_mod = cpu_to_be32(attr_mod); +} + static void cm_format_req(struct cm_req_msg *req_msg, struct cm_id_private *cm_id_priv, struct ib_cm_req_param *param) @@ -1330,8 +1341,8 @@ static void cm_format_req(struct cm_req_msg *req_msg, pri_ext = opa_is_extended_lid(pri_path->opa.dlid, pri_path->opa.slid); - cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID, - cm_form_tid(cm_id_priv)); + cm_format_mad_ece_hdr(&req_msg->hdr, CM_REQ_ATTR_ID, + cm_form_tid(cm_id_priv), param->ece.attr_mod); IBA_SET(CM_REQ_LOCAL_COMM_ID, req_msg, be32_to_cpu(cm_id_priv->id.local_id)); @@ -1454,6 +1465,7 @@ static void cm_format_req(struct cm_req_msg *req_msg, cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay, alt_path->packet_life_time)); } + IBA_SET(CM_REQ_VENDOR_ID, req_msg, param->ece.vendor_id); if (param->private_data && param->private_data_len) IBA_SET_MEM(CM_REQ_PRIVATE_DATA, req_msg, param->private_data, @@ -1810,6 +1822,9 @@ static void cm_format_req_event(struct cm_work *work, param->rnr_retry_count = IBA_GET(CM_REQ_RNR_RETRY_COUNT, req_msg); param->srq = IBA_GET(CM_REQ_SRQ, req_msg); param->ppath_sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr; + param->ece.vendor_id = IBA_GET(CM_REQ_VENDOR_ID, req_msg); + param->ece.attr_mod = be32_to_cpu(req_msg->hdr.attr_mod); + work->cm_event.private_data = IBA_GET_MEM_PTR(CM_REQ_PRIVATE_DATA, req_msg); } @@ -2202,7 +2217,8 @@ static void cm_format_rep(struct cm_rep_msg *rep_msg, struct cm_id_private *cm_id_priv, struct ib_cm_rep_param *param) { - cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid); + cm_format_mad_ece_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid, + param->ece.attr_mod); IBA_SET(CM_REP_LOCAL_COMM_ID, rep_msg, be32_to_cpu(cm_id_priv->id.local_id)); IBA_SET(CM_REP_REMOTE_COMM_ID, rep_msg, @@ -2229,6 +2245,10 @@ static void cm_format_rep(struct cm_rep_msg *rep_msg, IBA_SET(CM_REP_LOCAL_EE_CONTEXT_NUMBER, rep_msg, param->qp_num); } + IBA_SET(CM_REP_VENDOR_ID_L, rep_msg, param->ece.vendor_id); + IBA_SET(CM_REP_VENDOR_ID_M, rep_msg, param->ece.vendor_id >> 8); + IBA_SET(CM_REP_VENDOR_ID_H, rep_msg, param->ece.vendor_id >> 16); + if (param->private_data && param->private_data_len) IBA_SET_MEM(CM_REP_PRIVATE_DATA, rep_msg, param->private_data, param->private_data_len); @@ -2376,6 +2396,11 @@ static void cm_format_rep_event(struct cm_work *work, enum ib_qp_type qp_type) param->flow_control = IBA_GET(CM_REP_END_TO_END_FLOW_CONTROL, rep_msg); param->rnr_retry_count = IBA_GET(CM_REP_RNR_RETRY_COUNT, rep_msg); param->srq = IBA_GET(CM_REP_SRQ, rep_msg); + param->ece.vendor_id = IBA_GET(CM_REP_VENDOR_ID_H, rep_msg) << 16; + param->ece.vendor_id |= IBA_GET(CM_REP_VENDOR_ID_M, rep_msg) << 8; + param->ece.vendor_id |= IBA_GET(CM_REP_VENDOR_ID_L, rep_msg); + param->ece.attr_mod = be32_to_cpu(rep_msg->hdr.attr_mod); + work->cm_event.private_data = IBA_GET_MEM_PTR(CM_REP_PRIVATE_DATA, rep_msg); } @@ -3597,8 +3622,8 @@ static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg, struct cm_id_private *cm_id_priv, struct ib_cm_sidr_rep_param *param) { - cm_format_mad_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID, - cm_id_priv->tid); + cm_format_mad_ece_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID, + cm_id_priv->tid, param->ece.attr_mod); IBA_SET(CM_SIDR_REP_REQUESTID, sidr_rep_msg, be32_to_cpu(cm_id_priv->id.remote_id)); IBA_SET(CM_SIDR_REP_STATUS, sidr_rep_msg, param->status); @@ -3606,6 +3631,10 @@ static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg, IBA_SET(CM_SIDR_REP_SERVICEID, sidr_rep_msg, be64_to_cpu(cm_id_priv->id.service_id)); IBA_SET(CM_SIDR_REP_Q_KEY, sidr_rep_msg, param->qkey); + IBA_SET(CM_SIDR_REP_VENDOR_ID_L, sidr_rep_msg, + param->ece.vendor_id & 0xFF); + IBA_SET(CM_SIDR_REP_VENDOR_ID_H, sidr_rep_msg, + (param->ece.vendor_id >> 8) & 0xFF); if (param->info && param->info_length) IBA_SET_MEM(CM_SIDR_REP_ADDITIONAL_INFORMATION, sidr_rep_msg, diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index e81b8a523a3e..f554a371f4fa 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1911,6 +1911,9 @@ static void cma_set_rep_event_data(struct rdma_cm_event *event, event->param.conn.rnr_retry_count = rep_data->rnr_retry_count; event->param.conn.srq = rep_data->srq; event->param.conn.qp_num = rep_data->remote_qpn; + + event->ece.vendor_id = rep_data->ece.vendor_id; + event->ece.attr_mod = rep_data->ece.attr_mod; } static int cma_cm_event_handler(struct rdma_id_private *id_priv, @@ -2129,6 +2132,9 @@ static void cma_set_req_event_data(struct rdma_cm_event *event, event->param.conn.rnr_retry_count = req_data->rnr_retry_count; event->param.conn.srq = req_data->srq; event->param.conn.qp_num = req_data->remote_qpn; + + event->ece.vendor_id = req_data->ece.vendor_id; + event->ece.attr_mod = req_data->ece.attr_mod; } static int cma_ib_check_req_qp_type(const struct rdma_cm_id *id, @@ -3947,6 +3953,8 @@ static int cma_connect_ib(struct rdma_id_private *id_priv, req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; req.max_cm_retries = CMA_MAX_CM_RETRIES; req.srq = id_priv->srq ? 1 : 0; + req.ece.vendor_id = id_priv->ece.vendor_id; + req.ece.attr_mod = id_priv->ece.attr_mod; trace_cm_send_req(id_priv); ret = ib_send_cm_req(id_priv->cm_id.ib, &req); diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index 058cfbc2b37f..0f1ea5f2d01c 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -11,6 +11,7 @@ #include #include +#include /* ib_cm and ib_user_cm modules share /sys/class/infiniband_cm */ extern struct class cm_class; @@ -115,6 +116,7 @@ struct ib_cm_req_event_param { unsigned int retry_count:3; unsigned int rnr_retry_count:3; unsigned int srq:1; + struct rdma_ucm_ece ece; }; struct ib_cm_rep_event_param { @@ -129,6 +131,7 @@ struct ib_cm_rep_event_param { unsigned int flow_control:1; unsigned int rnr_retry_count:3; unsigned int srq:1; + struct rdma_ucm_ece ece; }; enum ib_cm_rej_reason { @@ -164,7 +167,8 @@ enum ib_cm_rej_reason { IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID = 30, IB_CM_REJ_INVALID_CLASS_VERSION = 31, IB_CM_REJ_INVALID_FLOW_LABEL = 32, - IB_CM_REJ_INVALID_ALT_FLOW_LABEL = 33 + IB_CM_REJ_INVALID_ALT_FLOW_LABEL = 33, + IB_CM_REJ_VENDOR_OPTION_NOT_SUPPORTED = 35, }; struct ib_cm_rej_event_param { @@ -369,6 +373,7 @@ struct ib_cm_req_param { u8 rnr_retry_count; u8 max_cm_retries; u8 srq; + struct rdma_ucm_ece ece; }; /** @@ -392,6 +397,7 @@ struct ib_cm_rep_param { u8 flow_control; u8 rnr_retry_count; u8 srq; + struct rdma_ucm_ece ece; }; /** @@ -546,6 +552,7 @@ struct ib_cm_sidr_rep_param { u8 info_length; const void *private_data; u8 private_data_len; + struct rdma_ucm_ece ece; }; /** -- cgit v1.2.3 From 0cb15372a615a9835893f43e86ae45399eb63996 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 26 May 2020 13:33:03 +0300 Subject: RDMA/cma: Connect ECE to rdma_accept The rdma_accept() is called by both passive and active sides of CMID connection to mark readiness to start data transfer. For passive side, this is called explicitly, for active side, it is called implicitly while receiving REP message. Provide ECE data to rdma_accept function needed for passive side to send that REP message. Link: https://lore.kernel.org/r/20200526103304.196371-6-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 19 +++++++++++++++++++ drivers/infiniband/core/ucma.c | 14 +++++++++++--- include/rdma/rdma_cm.h | 3 +++ include/uapi/rdma/rdma_user_cm.h | 1 + 4 files changed, 34 insertions(+), 3 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index f554a371f4fa..d449afe5557b 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -4090,6 +4090,8 @@ static int cma_accept_ib(struct rdma_id_private *id_priv, rep.flow_control = conn_param->flow_control; rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); rep.srq = id_priv->srq ? 1 : 0; + rep.ece.vendor_id = id_priv->ece.vendor_id; + rep.ece.attr_mod = id_priv->ece.attr_mod; trace_cm_send_rep(id_priv); ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep); @@ -4137,7 +4139,11 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv, return ret; rep.qp_num = id_priv->qp_num; rep.qkey = id_priv->qkey; + + rep.ece.vendor_id = id_priv->ece.vendor_id; + rep.ece.attr_mod = id_priv->ece.attr_mod; } + rep.private_data = private_data; rep.private_data_len = private_data_len; @@ -4195,6 +4201,19 @@ reject: } EXPORT_SYMBOL(__rdma_accept); +int __rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, + const char *caller, struct rdma_ucm_ece *ece) +{ + struct rdma_id_private *id_priv = + container_of(id, struct rdma_id_private, id); + + id_priv->ece.vendor_id = ece->vendor_id; + id_priv->ece.attr_mod = ece->attr_mod; + + return __rdma_accept(id, conn_param, caller); +} +EXPORT_SYMBOL(__rdma_accept_ece); + int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event) { struct rdma_id_private *id_priv; diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 3e5268cfa164..6b27b210b890 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -1135,28 +1135,36 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf, { struct rdma_ucm_accept cmd; struct rdma_conn_param conn_param; + struct rdma_ucm_ece ece = {}; struct ucma_context *ctx; + size_t in_size; int ret; - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + in_size = min_t(size_t, in_len, sizeof(cmd)); + if (copy_from_user(&cmd, inbuf, in_size)) return -EFAULT; ctx = ucma_get_ctx_dev(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); + if (offsetofend(typeof(cmd), ece) <= in_size) { + ece.vendor_id = cmd.ece.vendor_id; + ece.attr_mod = cmd.ece.attr_mod; + } + if (cmd.conn_param.valid) { ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); mutex_lock(&file->mut); mutex_lock(&ctx->mutex); - ret = __rdma_accept(ctx->cm_id, &conn_param, NULL); + ret = __rdma_accept_ece(ctx->cm_id, &conn_param, NULL, &ece); mutex_unlock(&ctx->mutex); if (!ret) ctx->uid = cmd.uid; mutex_unlock(&file->mut); } else { mutex_lock(&ctx->mutex); - ret = __rdma_accept(ctx->cm_id, NULL, NULL); + ret = __rdma_accept_ece(ctx->cm_id, NULL, NULL, &ece); mutex_unlock(&ctx->mutex); } ucma_put_ctx(ctx); diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index 418590c9a9e8..7ac91677660f 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -280,6 +280,9 @@ int rdma_listen(struct rdma_cm_id *id, int backlog); int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, const char *caller); +int __rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, + const char *caller, struct rdma_ucm_ece *ece); + /** * rdma_accept - Called to accept a connection request or response. * @id: Connection identifier associated with the request. diff --git a/include/uapi/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h index 19c5c3f74af9..6b883dde7064 100644 --- a/include/uapi/rdma/rdma_user_cm.h +++ b/include/uapi/rdma/rdma_user_cm.h @@ -232,6 +232,7 @@ struct rdma_ucm_accept { struct rdma_ucm_conn_param conn_param; __u32 id; __u32 reserved; + struct rdma_ucm_ece ece; }; struct rdma_ucm_reject { -- cgit v1.2.3 From 8094ba0ace7f6cd1e31ea8b151fba3594cadfa9a Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 26 May 2020 13:33:04 +0300 Subject: RDMA/cma: Provide ECE reject reason IBTA declares "vendor option not supported" reject reason in REJ messages if passive side doesn't want to accept proposed ECE options. Due to the fact that ECE is managed by userspace, there is a need to let users to provide such rejected reason. Link: https://lore.kernel.org/r/20200526103304.196371-7-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 9 ++++----- drivers/infiniband/core/ucma.c | 15 ++++++++++++++- drivers/infiniband/ulp/isert/ib_isert.c | 5 +++-- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 3 ++- drivers/infiniband/ulp/srpt/ib_srpt.c | 3 ++- drivers/nvme/target/rdma.c | 4 +++- include/rdma/rdma_cm.h | 2 +- include/uapi/rdma/rdma_user_cm.h | 3 ++- net/rds/ib_cm.c | 4 +++- 9 files changed, 34 insertions(+), 14 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index d449afe5557b..8026ee56546a 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -4196,7 +4196,7 @@ int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, return 0; reject: cma_modify_qp_err(id_priv); - rdma_reject(id, NULL, 0); + rdma_reject(id, NULL, 0, IB_CM_REJ_CONSUMER_DEFINED); return ret; } EXPORT_SYMBOL(__rdma_accept); @@ -4236,7 +4236,7 @@ int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event) EXPORT_SYMBOL(rdma_notify); int rdma_reject(struct rdma_cm_id *id, const void *private_data, - u8 private_data_len) + u8 private_data_len, u8 reason) { struct rdma_id_private *id_priv; int ret; @@ -4251,9 +4251,8 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data, private_data, private_data_len); } else { trace_cm_send_rej(id_priv); - ret = ib_send_cm_rej(id_priv->cm_id.ib, - IB_CM_REJ_CONSUMER_DEFINED, NULL, - 0, private_data, private_data_len); + ret = ib_send_cm_rej(id_priv->cm_id.ib, reason, NULL, 0, + private_data, private_data_len); } } else if (rdma_cap_iw_cm(id->device, id->port_num)) { ret = iw_cm_reject(id_priv->cm_id.iw, diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 6b27b210b890..5b87eee8ccc8 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include "core_priv.h" @@ -1181,12 +1182,24 @@ static ssize_t ucma_reject(struct ucma_file *file, const char __user *inbuf, if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; + if (!cmd.reason) + cmd.reason = IB_CM_REJ_CONSUMER_DEFINED; + + switch (cmd.reason) { + case IB_CM_REJ_CONSUMER_DEFINED: + case IB_CM_REJ_VENDOR_OPTION_NOT_SUPPORTED: + break; + default: + return -EINVAL; + } + ctx = ucma_get_ctx_dev(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); mutex_lock(&ctx->mutex); - ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len); + ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len, + cmd.reason); mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index a1a035270cab..b7df38ee8ae0 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -502,7 +503,7 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) if (!np->enabled) { spin_unlock_bh(&np->np_thread_lock); isert_dbg("iscsi_np is not enabled, reject connect request\n"); - return rdma_reject(cma_id, NULL, 0); + return rdma_reject(cma_id, NULL, 0, IB_CM_REJ_CONSUMER_DEFINED); } spin_unlock_bh(&np->np_thread_lock); @@ -553,7 +554,7 @@ out_rsp_dma_map: isert_free_login_buf(isert_conn); out: kfree(isert_conn); - rdma_reject(cma_id, NULL, 0); + rdma_reject(cma_id, NULL, 0, IB_CM_REJ_CONSUMER_DEFINED); return ret; } diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index 5ef8988ee75b..0d9241f5d9e6 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -15,6 +15,7 @@ #include "rtrs-srv.h" #include "rtrs-log.h" +#include MODULE_DESCRIPTION("RDMA Transport Server"); MODULE_LICENSE("GPL"); @@ -1576,7 +1577,7 @@ static int rtrs_rdma_do_reject(struct rdma_cm_id *cm_id, int errno) .errno = cpu_to_le16(errno), }; - err = rdma_reject(cm_id, &msg, sizeof(msg)); + err = rdma_reject(cm_id, &msg, sizeof(msg), IB_CM_REJ_CONSUMER_DEFINED); if (err) pr_err("rdma_reject(), err: %d\n", err); diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index a294630f2100..cdc8c239d6c0 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -2497,7 +2497,8 @@ reject: SRP_BUF_FORMAT_INDIRECT); if (rdma_cm_id) - rdma_reject(rdma_cm_id, rej, sizeof(*rej)); + rdma_reject(rdma_cm_id, rej, sizeof(*rej), + IB_CM_REJ_CONSUMER_DEFINED); else ib_send_cm_rej(ib_cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, rej, sizeof(*rej)); diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index fd47de0e4e4e..55aaf03a9580 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include "nvmet.h" @@ -1138,7 +1139,8 @@ static int nvmet_rdma_cm_reject(struct rdma_cm_id *cm_id, rej.recfmt = cpu_to_le16(NVME_RDMA_CM_FMT_1_0); rej.sts = cpu_to_le16(status); - return rdma_reject(cm_id, (void *)&rej, sizeof(rej)); + return rdma_reject(cm_id, (void *)&rej, sizeof(rej), + IB_CM_REJ_CONSUMER_DEFINED); } static struct nvmet_rdma_queue * diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index 7ac91677660f..939d7abe026f 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -320,7 +320,7 @@ int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event); * rdma_reject - Called to reject a connection request or response. */ int rdma_reject(struct rdma_cm_id *id, const void *private_data, - u8 private_data_len); + u8 private_data_len, u8 reason); /** * rdma_disconnect - This function disconnects the associated QP and diff --git a/include/uapi/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h index 6b883dde7064..ed5a514305c1 100644 --- a/include/uapi/rdma/rdma_user_cm.h +++ b/include/uapi/rdma/rdma_user_cm.h @@ -238,7 +238,8 @@ struct rdma_ucm_accept { struct rdma_ucm_reject { __u32 id; __u8 private_data_len; - __u8 reserved[3]; + __u8 reason; + __u8 reserved[2]; __u8 private_data[RDMA_MAX_PRIVATE_DATA]; }; diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index c71f4328d138..0fec4171564e 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -37,6 +37,7 @@ #include #include #include +#include #include "rds_single_path.h" #include "rds.h" @@ -927,7 +928,8 @@ out: if (conn) mutex_unlock(&conn->c_cm_lock); if (err) - rdma_reject(cm_id, &err, sizeof(int)); + rdma_reject(cm_id, &err, sizeof(int), + IB_CM_REJ_CONSUMER_DEFINED); return destroy; } -- cgit v1.2.3 From ffd7339a2fac98b9ff731e336c4411bf1ce57e22 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 27 May 2020 14:18:45 -0300 Subject: RDMA/core: Use offsetofend() instead of open coding No reason to open code this. Link: https://lore.kernel.org/r/0-v1-0bc346e08476+585-drop_offsetofend_jgg@mellanox.com Signed-off-by: Jason Gunthorpe --- include/rdma/uverbs_ioctl.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/rdma') diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 0418d7bddf3e..86de10ea30af 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -491,8 +491,7 @@ struct uapi_definition { */ #define UVERBS_ATTR_STRUCT(_type, _last) \ .zero_trailing = 1, \ - UVERBS_ATTR_SIZE(((uintptr_t)(&((_type *)0)->_last + 1)), \ - sizeof(_type)) + UVERBS_ATTR_SIZE(offsetofend(_type, _last), sizeof(_type)) /* * Specifies at least min_len bytes must be passed in, but the amount can be * larger, up to the protocol maximum size. No check for zeroing is done. -- cgit v1.2.3 From 3446cbd2d523fdaf37f3772082071d1154c419d9 Mon Sep 17 00:00:00 2001 From: Yamin Friedman Date: Wed, 27 May 2020 11:34:52 +0300 Subject: RDMA/core: Add protection for shared CQs used by ULPs A pre-step for adding shared CQs. Add the infrastructure to prevent shared CQ users from altering the CQ configurations. For now all cqs are marked as private (non-shared). The core driver should use the new force functions to perform resize/destroy/moderation changes that are not allowed for users of shared CQs. Link: https://lore.kernel.org/r/1590568495-101621-2-git-send-email-yaminf@mellanox.com Signed-off-by: Yamin Friedman Reviewed-by: Or Gerlitz Reviewed-by: Max Gurtovoy Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/verbs.c | 9 +++++++++ include/rdma/ib_verbs.h | 5 ++++- 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index e2c9430a3ff1..21815e125e98 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -2005,6 +2005,9 @@ EXPORT_SYMBOL(__ib_create_cq); int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period) { + if (cq->shared) + return -EOPNOTSUPP; + return cq->device->ops.modify_cq ? cq->device->ops.modify_cq(cq, cq_count, cq_period) : -EOPNOTSUPP; @@ -2013,6 +2016,9 @@ EXPORT_SYMBOL(rdma_set_cq_moderation); int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata) { + if (WARN_ON_ONCE(cq->shared)) + return -EOPNOTSUPP; + if (atomic_read(&cq->usecnt)) return -EBUSY; @@ -2025,6 +2031,9 @@ EXPORT_SYMBOL(ib_destroy_cq_user); int ib_resize_cq(struct ib_cq *cq, int cqe) { + if (cq->shared) + return -EOPNOTSUPP; + return cq->device->ops.resize_cq ? cq->device->ops.resize_cq(cq, cqe, NULL) : -EOPNOTSUPP; } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 94533ae16697..cc515025cbdb 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1613,7 +1613,8 @@ struct ib_cq { /* updated only by trace points */ ktime_t timestamp; - bool interrupt; + u8 interrupt:1; + u8 shared:1; /* * Implementation details of the RDMA core, don't use in drivers: @@ -3909,6 +3910,8 @@ static inline struct ib_cq *ib_alloc_cq_any(struct ib_device *dev, * ib_free_cq_user - Free kernel/user CQ * @cq: The CQ to free * @udata: Valid user data or NULL for kernel objects + * + * NOTE: This function shouldn't be called on shared CQs. */ void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata); -- cgit v1.2.3 From c7ff819aefea04944dfcec5f0731b97277df6a9c Mon Sep 17 00:00:00 2001 From: Yamin Friedman Date: Wed, 27 May 2020 11:34:53 +0300 Subject: RDMA/core: Introduce shared CQ pool API Allow a ULP to ask the core to provide a completion queue based on a least-used search on a per-device CQ pools. The device CQ pools grow in a lazy fashion when more CQs are requested. This feature reduces the amount of interrupts when using many QPs. Using shared CQs allows for more effcient completion handling. It also reduces the amount of overhead needed for CQ contexts. Test setup: Intel(R) Xeon(R) Platinum 8176M CPU @ 2.10GHz servers. Running NVMeoF 4KB read IOs over ConnectX-5EX across Spectrum switch. TX-depth = 32. The patch was applied in the nvme driver on both the target and initiator. Four controllers are accessed from each core. In the current test case we have exposed sixteen NVMe namespaces using four different subsystems (four namespaces per subsystem) from one NVM port. Each controller allocated X queues (RDMA QPs) and attached to Y CQs. Before this series we had X == Y, i.e for four controllers we've created total of 4X QPs and 4X CQs. In the shared case, we've created 4X QPs and only X CQs which means that we have four controllers that share a completion queue per core. Until fourteen cores there is no significant change in performance and the number of interrupts per second is less than a million in the current case. ================================================== |Cores|Current KIOPs |Shared KIOPs |improvement| |-----|---------------|--------------|-----------| |14 |2332 |2723 |16.7% | |-----|---------------|--------------|-----------| |20 |2086 |2712 |30% | |-----|---------------|--------------|-----------| |28 |1971 |2669 |35.4% | |================================================= |Cores|Current avg lat|Shared avg lat|improvement| |-----|---------------|--------------|-----------| |14 |767us |657us |14.3% | |-----|---------------|--------------|-----------| |20 |1225us |943us |23% | |-----|---------------|--------------|-----------| |28 |1816us |1341us |26.1% | ======================================================== |Cores|Current interrupts|Shared interrupts|improvement| |-----|------------------|-----------------|-----------| |14 |1.6M/sec |0.4M/sec |72% | |-----|------------------|-----------------|-----------| |20 |2.8M/sec |0.6M/sec |72.4% | |-----|------------------|-----------------|-----------| |28 |2.9M/sec |0.8M/sec |63.4% | ==================================================================== |Cores|Current 99.99th PCTL lat|Shared 99.99th PCTL lat|improvement| |-----|------------------------|-----------------------|-----------| |14 |67ms |6ms |90.9% | |-----|------------------------|-----------------------|-----------| |20 |5ms |6ms |-10% | |-----|------------------------|-----------------------|-----------| |28 |8.7ms |6ms |25.9% | |=================================================================== Performance improvement with sixteen disks (sixteen CQs per core) is comparable. Link: https://lore.kernel.org/r/1590568495-101621-3-git-send-email-yaminf@mellanox.com Signed-off-by: Yamin Friedman Reviewed-by: Or Gerlitz Reviewed-by: Max Gurtovoy Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/core_priv.h | 3 + drivers/infiniband/core/cq.c | 173 ++++++++++++++++++++++++++++++++++++ drivers/infiniband/core/device.c | 2 + include/rdma/ib_verbs.h | 17 +++- 4 files changed, 194 insertions(+), 1 deletion(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index cf42acca4a3a..a1e6a67b2c4a 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -414,4 +414,7 @@ void rdma_umap_priv_init(struct rdma_umap_priv *priv, struct vm_area_struct *vma, struct rdma_user_mmap_entry *entry); +void ib_cq_pool_init(struct ib_device *dev); +void ib_cq_pool_destroy(struct ib_device *dev); + #endif /* _CORE_PRIV_H */ diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c index 4f25b2400694..655795bfa0ee 100644 --- a/drivers/infiniband/core/cq.c +++ b/drivers/infiniband/core/cq.c @@ -7,7 +7,11 @@ #include #include +#include "core_priv.h" + #include +/* Max size for shared CQ, may require tuning */ +#define IB_MAX_SHARED_CQ_SZ 4096U /* # of WCs to poll for with a single call to ib_poll_cq */ #define IB_POLL_BATCH 16 @@ -218,6 +222,7 @@ struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private, cq->cq_context = private; cq->poll_ctx = poll_ctx; atomic_set(&cq->usecnt, 0); + cq->comp_vector = comp_vector; cq->wc = kmalloc_array(IB_POLL_BATCH, sizeof(*cq->wc), GFP_KERNEL); if (!cq->wc) @@ -309,6 +314,8 @@ void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata) { if (WARN_ON_ONCE(atomic_read(&cq->usecnt))) return; + if (WARN_ON_ONCE(cq->cqe_used)) + return; switch (cq->poll_ctx) { case IB_POLL_DIRECT: @@ -334,3 +341,169 @@ void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata) kfree(cq); } EXPORT_SYMBOL(ib_free_cq_user); + +void ib_cq_pool_init(struct ib_device *dev) +{ + unsigned int i; + + spin_lock_init(&dev->cq_pools_lock); + for (i = 0; i < ARRAY_SIZE(dev->cq_pools); i++) + INIT_LIST_HEAD(&dev->cq_pools[i]); +} + +void ib_cq_pool_destroy(struct ib_device *dev) +{ + struct ib_cq *cq, *n; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(dev->cq_pools); i++) { + list_for_each_entry_safe(cq, n, &dev->cq_pools[i], + pool_entry) { + WARN_ON(cq->cqe_used); + cq->shared = false; + ib_free_cq(cq); + } + } +} + +static int ib_alloc_cqs(struct ib_device *dev, unsigned int nr_cqes, + enum ib_poll_context poll_ctx) +{ + LIST_HEAD(tmp_list); + unsigned int nr_cqs, i; + struct ib_cq *cq; + int ret; + + if (poll_ctx > IB_POLL_LAST_POOL_TYPE) { + WARN_ON_ONCE(poll_ctx > IB_POLL_LAST_POOL_TYPE); + return -EINVAL; + } + + /* + * Allocate at least as many CQEs as requested, and otherwise + * a reasonable batch size so that we can share CQs between + * multiple users instead of allocating a larger number of CQs. + */ + nr_cqes = min_t(unsigned int, dev->attrs.max_cqe, + max(nr_cqes, IB_MAX_SHARED_CQ_SZ)); + nr_cqs = min_t(unsigned int, dev->num_comp_vectors, num_online_cpus()); + for (i = 0; i < nr_cqs; i++) { + cq = ib_alloc_cq(dev, NULL, nr_cqes, i, poll_ctx); + if (IS_ERR(cq)) { + ret = PTR_ERR(cq); + goto out_free_cqs; + } + cq->shared = true; + list_add_tail(&cq->pool_entry, &tmp_list); + } + + spin_lock_irq(&dev->cq_pools_lock); + list_splice(&tmp_list, &dev->cq_pools[poll_ctx]); + spin_unlock_irq(&dev->cq_pools_lock); + + return 0; + +out_free_cqs: + list_for_each_entry(cq, &tmp_list, pool_entry) { + cq->shared = false; + ib_free_cq(cq); + } + return ret; +} + +/** + * ib_cq_pool_get() - Find the least used completion queue that matches + * a given cpu hint (or least used for wild card affinity) and fits + * nr_cqe. + * @dev: rdma device + * @nr_cqe: number of needed cqe entries + * @comp_vector_hint: completion vector hint (-1) for the driver to assign + * a comp vector based on internal counter + * @poll_ctx: cq polling context + * + * Finds a cq that satisfies @comp_vector_hint and @nr_cqe requirements and + * claim entries in it for us. In case there is no available cq, allocate + * a new cq with the requirements and add it to the device pool. + * IB_POLL_DIRECT cannot be used for shared cqs so it is not a valid value + * for @poll_ctx. + */ +struct ib_cq *ib_cq_pool_get(struct ib_device *dev, unsigned int nr_cqe, + int comp_vector_hint, + enum ib_poll_context poll_ctx) +{ + static unsigned int default_comp_vector; + unsigned int vector, num_comp_vectors; + struct ib_cq *cq, *found = NULL; + int ret; + + if (poll_ctx > IB_POLL_LAST_POOL_TYPE) { + WARN_ON_ONCE(poll_ctx > IB_POLL_LAST_POOL_TYPE); + return ERR_PTR(-EINVAL); + } + + num_comp_vectors = + min_t(unsigned int, dev->num_comp_vectors, num_online_cpus()); + /* Project the affinty to the device completion vector range */ + if (comp_vector_hint < 0) { + comp_vector_hint = + (READ_ONCE(default_comp_vector) + 1) % num_comp_vectors; + WRITE_ONCE(default_comp_vector, comp_vector_hint); + } + vector = comp_vector_hint % num_comp_vectors; + + /* + * Find the least used CQ with correct affinity and + * enough free CQ entries + */ + while (!found) { + spin_lock_irq(&dev->cq_pools_lock); + list_for_each_entry(cq, &dev->cq_pools[poll_ctx], + pool_entry) { + /* + * Check to see if we have found a CQ with the + * correct completion vector + */ + if (vector != cq->comp_vector) + continue; + if (cq->cqe_used + nr_cqe > cq->cqe) + continue; + found = cq; + break; + } + + if (found) { + found->cqe_used += nr_cqe; + spin_unlock_irq(&dev->cq_pools_lock); + + return found; + } + spin_unlock_irq(&dev->cq_pools_lock); + + /* + * Didn't find a match or ran out of CQs in the device + * pool, allocate a new array of CQs. + */ + ret = ib_alloc_cqs(dev, nr_cqe, poll_ctx); + if (ret) + return ERR_PTR(ret); + } + + return found; +} +EXPORT_SYMBOL(ib_cq_pool_get); + +/** + * ib_cq_pool_put - Return a CQ taken from a shared pool. + * @cq: The CQ to return. + * @nr_cqe: The max number of cqes that the user had requested. + */ +void ib_cq_pool_put(struct ib_cq *cq, unsigned int nr_cqe) +{ + if (WARN_ON_ONCE(nr_cqe > cq->cqe_used)) + return; + + spin_lock_irq(&cq->device->cq_pools_lock); + cq->cqe_used -= nr_cqe; + spin_unlock_irq(&cq->device->cq_pools_lock); +} +EXPORT_SYMBOL(ib_cq_pool_put); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index d9f565a779df..53f541f41ff3 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1393,6 +1393,7 @@ int ib_register_device(struct ib_device *device, const char *name) goto dev_cleanup; } + ib_cq_pool_init(device); ret = enable_device_and_get(device); dev_set_uevent_suppress(&device->dev, false); /* Mark for userspace that device is ready */ @@ -1447,6 +1448,7 @@ static void __ib_unregister_device(struct ib_device *ib_dev) goto out; disable_device(ib_dev); + ib_cq_pool_destroy(ib_dev); /* Expedite removing unregistered pointers from the hash table */ free_netdevs(ib_dev); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index cc515025cbdb..19864da78649 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1588,10 +1588,12 @@ struct ib_ah { typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context); enum ib_poll_context { - IB_POLL_DIRECT, /* caller context, no hw completions */ IB_POLL_SOFTIRQ, /* poll from softirq context */ IB_POLL_WORKQUEUE, /* poll from workqueue */ IB_POLL_UNBOUND_WORKQUEUE, /* poll from unbound workqueue */ + IB_POLL_LAST_POOL_TYPE = IB_POLL_UNBOUND_WORKQUEUE, + + IB_POLL_DIRECT, /* caller context, no hw completions */ }; struct ib_cq { @@ -1601,9 +1603,11 @@ struct ib_cq { void (*event_handler)(struct ib_event *, void *); void *cq_context; int cqe; + unsigned int cqe_used; atomic_t usecnt; /* count number of work queues */ enum ib_poll_context poll_ctx; struct ib_wc *wc; + struct list_head pool_entry; union { struct irq_poll iop; struct work_struct work; @@ -1615,6 +1619,7 @@ struct ib_cq { ktime_t timestamp; u8 interrupt:1; u8 shared:1; + unsigned int comp_vector; /* * Implementation details of the RDMA core, don't use in drivers: @@ -2734,6 +2739,10 @@ struct ib_device { #endif u32 index; + + spinlock_t cq_pools_lock; + struct list_head cq_pools[IB_POLL_LAST_POOL_TYPE + 1]; + struct rdma_restrack_root *res; const struct uapi_definition *driver_def; @@ -4037,6 +4046,12 @@ static inline int ib_req_notify_cq(struct ib_cq *cq, return cq->device->ops.req_notify_cq(cq, flags); } +struct ib_cq *ib_cq_pool_get(struct ib_device *dev, unsigned int nr_cqe, + int comp_vector_hint, + enum ib_poll_context poll_ctx); + +void ib_cq_pool_put(struct ib_cq *cq, unsigned int nr_cqe); + /** * ib_req_ncomp_notif - Request completion notification when there are * at least the specified number of unreaped completions on the CQ. -- cgit v1.2.3 From 4e373d5417ecbb4f438a8500f0379a2fc29c2643 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Thu, 28 May 2020 16:45:46 -0300 Subject: RDMA/core: Remove FMR pool API This ancient and unsafe method for memory registration is no longer used by any RDMA based ULP. Remove the FMR pool API from the core driver. Link: https://lore.kernel.org/r/4-v3-f58e6669d5d3+2cf-fmr_removal_jgg@mellanox.com Signed-off-by: Max Gurtovoy Signed-off-by: Jason Gunthorpe --- Documentation/driver-api/infiniband.rst | 3 - drivers/infiniband/core/Makefile | 2 +- drivers/infiniband/core/fmr_pool.c | 494 -------------------------------- include/rdma/ib_fmr_pool.h | 93 ------ 4 files changed, 1 insertion(+), 591 deletions(-) delete mode 100644 drivers/infiniband/core/fmr_pool.c delete mode 100644 include/rdma/ib_fmr_pool.h (limited to 'include/rdma') diff --git a/Documentation/driver-api/infiniband.rst b/Documentation/driver-api/infiniband.rst index 1a3116f32ff0..30e142ccbee9 100644 --- a/Documentation/driver-api/infiniband.rst +++ b/Documentation/driver-api/infiniband.rst @@ -37,9 +37,6 @@ InfiniBand core interfaces .. kernel-doc:: drivers/infiniband/core/ud_header.c :export: -.. kernel-doc:: drivers/infiniband/core/fmr_pool.c - :export: - .. kernel-doc:: drivers/infiniband/core/umem.c :export: diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index 63c1591223ac..24cb71a16a28 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -8,7 +8,7 @@ obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o $(user_access-y) ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \ - device.o fmr_pool.o cache.o netlink.o \ + device.o cache.o netlink.o \ roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \ multicast.o mad.o smi.o agent.o mad_rmpp.o \ nldev.o restrack.o counters.o ib_core_uverbs.o \ diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c deleted file mode 100644 index e08aec427027..000000000000 --- a/drivers/infiniband/core/fmr_pool.c +++ /dev/null @@ -1,494 +0,0 @@ -/* - * Copyright (c) 2004 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include - -#include - -#include "core_priv.h" - -#define PFX "fmr_pool: " - -enum { - IB_FMR_MAX_REMAPS = 32, - - IB_FMR_HASH_BITS = 8, - IB_FMR_HASH_SIZE = 1 << IB_FMR_HASH_BITS, - IB_FMR_HASH_MASK = IB_FMR_HASH_SIZE - 1 -}; - -/* - * If an FMR is not in use, then the list member will point to either - * its pool's free_list (if the FMR can be mapped again; that is, - * remap_count < pool->max_remaps) or its pool's dirty_list (if the - * FMR needs to be unmapped before being remapped). In either of - * these cases it is a bug if the ref_count is not 0. In other words, - * if ref_count is > 0, then the list member must not be linked into - * either free_list or dirty_list. - * - * The cache_node member is used to link the FMR into a cache bucket - * (if caching is enabled). This is independent of the reference - * count of the FMR. When a valid FMR is released, its ref_count is - * decremented, and if ref_count reaches 0, the FMR is placed in - * either free_list or dirty_list as appropriate. However, it is not - * removed from the cache and may be "revived" if a call to - * ib_fmr_register_physical() occurs before the FMR is remapped. In - * this case we just increment the ref_count and remove the FMR from - * free_list/dirty_list. - * - * Before we remap an FMR from free_list, we remove it from the cache - * (to prevent another user from obtaining a stale FMR). When an FMR - * is released, we add it to the tail of the free list, so that our - * cache eviction policy is "least recently used." - * - * All manipulation of ref_count, list and cache_node is protected by - * pool_lock to maintain consistency. - */ - -struct ib_fmr_pool { - spinlock_t pool_lock; - - int pool_size; - int max_pages; - int max_remaps; - int dirty_watermark; - int dirty_len; - struct list_head free_list; - struct list_head dirty_list; - struct hlist_head *cache_bucket; - - void (*flush_function)(struct ib_fmr_pool *pool, - void * arg); - void *flush_arg; - - struct kthread_worker *worker; - struct kthread_work work; - - atomic_t req_ser; - atomic_t flush_ser; - - wait_queue_head_t force_wait; -}; - -static inline u32 ib_fmr_hash(u64 first_page) -{ - return jhash_2words((u32) first_page, (u32) (first_page >> 32), 0) & - (IB_FMR_HASH_SIZE - 1); -} - -/* Caller must hold pool_lock */ -static inline struct ib_pool_fmr *ib_fmr_cache_lookup(struct ib_fmr_pool *pool, - u64 *page_list, - int page_list_len, - u64 io_virtual_address) -{ - struct hlist_head *bucket; - struct ib_pool_fmr *fmr; - - if (!pool->cache_bucket) - return NULL; - - bucket = pool->cache_bucket + ib_fmr_hash(*page_list); - - hlist_for_each_entry(fmr, bucket, cache_node) - if (io_virtual_address == fmr->io_virtual_address && - page_list_len == fmr->page_list_len && - !memcmp(page_list, fmr->page_list, - page_list_len * sizeof *page_list)) - return fmr; - - return NULL; -} - -static void ib_fmr_batch_release(struct ib_fmr_pool *pool) -{ - int ret; - struct ib_pool_fmr *fmr; - LIST_HEAD(unmap_list); - LIST_HEAD(fmr_list); - - spin_lock_irq(&pool->pool_lock); - - list_for_each_entry(fmr, &pool->dirty_list, list) { - hlist_del_init(&fmr->cache_node); - fmr->remap_count = 0; - list_add_tail(&fmr->fmr->list, &fmr_list); - } - - list_splice_init(&pool->dirty_list, &unmap_list); - pool->dirty_len = 0; - - spin_unlock_irq(&pool->pool_lock); - - if (list_empty(&unmap_list)) { - return; - } - - ret = ib_unmap_fmr(&fmr_list); - if (ret) - pr_warn(PFX "ib_unmap_fmr returned %d\n", ret); - - spin_lock_irq(&pool->pool_lock); - list_splice(&unmap_list, &pool->free_list); - spin_unlock_irq(&pool->pool_lock); -} - -static void ib_fmr_cleanup_func(struct kthread_work *work) -{ - struct ib_fmr_pool *pool = container_of(work, struct ib_fmr_pool, work); - - ib_fmr_batch_release(pool); - atomic_inc(&pool->flush_ser); - wake_up_interruptible(&pool->force_wait); - - if (pool->flush_function) - pool->flush_function(pool, pool->flush_arg); - - if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0) - kthread_queue_work(pool->worker, &pool->work); -} - -/** - * ib_create_fmr_pool - Create an FMR pool - * @pd:Protection domain for FMRs - * @params:FMR pool parameters - * - * Create a pool of FMRs. Return value is pointer to new pool or - * error code if creation failed. - */ -struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, - struct ib_fmr_pool_param *params) -{ - struct ib_device *device; - struct ib_fmr_pool *pool; - int i; - int ret; - int max_remaps; - - if (!params) - return ERR_PTR(-EINVAL); - - device = pd->device; - if (!device->ops.alloc_fmr || !device->ops.dealloc_fmr || - !device->ops.map_phys_fmr || !device->ops.unmap_fmr) { - dev_info(&device->dev, "Device does not support FMRs\n"); - return ERR_PTR(-ENOSYS); - } - - if (!device->attrs.max_map_per_fmr) - max_remaps = IB_FMR_MAX_REMAPS; - else - max_remaps = device->attrs.max_map_per_fmr; - - pool = kmalloc(sizeof *pool, GFP_KERNEL); - if (!pool) - return ERR_PTR(-ENOMEM); - - pool->cache_bucket = NULL; - pool->flush_function = params->flush_function; - pool->flush_arg = params->flush_arg; - - INIT_LIST_HEAD(&pool->free_list); - INIT_LIST_HEAD(&pool->dirty_list); - - if (params->cache) { - pool->cache_bucket = - kmalloc_array(IB_FMR_HASH_SIZE, - sizeof(*pool->cache_bucket), - GFP_KERNEL); - if (!pool->cache_bucket) { - ret = -ENOMEM; - goto out_free_pool; - } - - for (i = 0; i < IB_FMR_HASH_SIZE; ++i) - INIT_HLIST_HEAD(pool->cache_bucket + i); - } - - pool->pool_size = 0; - pool->max_pages = params->max_pages_per_fmr; - pool->max_remaps = max_remaps; - pool->dirty_watermark = params->dirty_watermark; - pool->dirty_len = 0; - spin_lock_init(&pool->pool_lock); - atomic_set(&pool->req_ser, 0); - atomic_set(&pool->flush_ser, 0); - init_waitqueue_head(&pool->force_wait); - - pool->worker = - kthread_create_worker(0, "ib_fmr(%s)", dev_name(&device->dev)); - if (IS_ERR(pool->worker)) { - pr_warn(PFX "couldn't start cleanup kthread worker\n"); - ret = PTR_ERR(pool->worker); - goto out_free_pool; - } - kthread_init_work(&pool->work, ib_fmr_cleanup_func); - - { - struct ib_pool_fmr *fmr; - struct ib_fmr_attr fmr_attr = { - .max_pages = params->max_pages_per_fmr, - .max_maps = pool->max_remaps, - .page_shift = params->page_shift - }; - int bytes_per_fmr = sizeof *fmr; - - if (pool->cache_bucket) - bytes_per_fmr += params->max_pages_per_fmr * sizeof (u64); - - for (i = 0; i < params->pool_size; ++i) { - fmr = kmalloc(bytes_per_fmr, GFP_KERNEL); - if (!fmr) - goto out_fail; - - fmr->pool = pool; - fmr->remap_count = 0; - fmr->ref_count = 0; - INIT_HLIST_NODE(&fmr->cache_node); - - fmr->fmr = ib_alloc_fmr(pd, params->access, &fmr_attr); - if (IS_ERR(fmr->fmr)) { - pr_warn(PFX "fmr_create failed for FMR %d\n", - i); - kfree(fmr); - goto out_fail; - } - - list_add_tail(&fmr->list, &pool->free_list); - ++pool->pool_size; - } - } - - return pool; - - out_free_pool: - kfree(pool->cache_bucket); - kfree(pool); - - return ERR_PTR(ret); - - out_fail: - ib_destroy_fmr_pool(pool); - - return ERR_PTR(-ENOMEM); -} -EXPORT_SYMBOL(ib_create_fmr_pool); - -/** - * ib_destroy_fmr_pool - Free FMR pool - * @pool:FMR pool to free - * - * Destroy an FMR pool and free all associated resources. - */ -void ib_destroy_fmr_pool(struct ib_fmr_pool *pool) -{ - struct ib_pool_fmr *fmr; - struct ib_pool_fmr *tmp; - LIST_HEAD(fmr_list); - int i; - - kthread_destroy_worker(pool->worker); - ib_fmr_batch_release(pool); - - i = 0; - list_for_each_entry_safe(fmr, tmp, &pool->free_list, list) { - if (fmr->remap_count) { - INIT_LIST_HEAD(&fmr_list); - list_add_tail(&fmr->fmr->list, &fmr_list); - ib_unmap_fmr(&fmr_list); - } - ib_dealloc_fmr(fmr->fmr); - list_del(&fmr->list); - kfree(fmr); - ++i; - } - - if (i < pool->pool_size) - pr_warn(PFX "pool still has %d regions registered\n", - pool->pool_size - i); - - kfree(pool->cache_bucket); - kfree(pool); -} -EXPORT_SYMBOL(ib_destroy_fmr_pool); - -/** - * ib_flush_fmr_pool - Invalidate all unmapped FMRs - * @pool:FMR pool to flush - * - * Ensure that all unmapped FMRs are fully invalidated. - */ -int ib_flush_fmr_pool(struct ib_fmr_pool *pool) -{ - int serial; - struct ib_pool_fmr *fmr, *next; - - /* - * The free_list holds FMRs that may have been used - * but have not been remapped enough times to be dirty. - * Put them on the dirty list now so that the cleanup - * thread will reap them too. - */ - spin_lock_irq(&pool->pool_lock); - list_for_each_entry_safe(fmr, next, &pool->free_list, list) { - if (fmr->remap_count > 0) - list_move(&fmr->list, &pool->dirty_list); - } - spin_unlock_irq(&pool->pool_lock); - - serial = atomic_inc_return(&pool->req_ser); - kthread_queue_work(pool->worker, &pool->work); - - if (wait_event_interruptible(pool->force_wait, - atomic_read(&pool->flush_ser) - serial >= 0)) - return -EINTR; - - return 0; -} -EXPORT_SYMBOL(ib_flush_fmr_pool); - -/** - * ib_fmr_pool_map_phys - Map an FMR from an FMR pool. - * @pool_handle: FMR pool to allocate FMR from - * @page_list: List of pages to map - * @list_len: Number of pages in @page_list - * @io_virtual_address: I/O virtual address for new FMR - */ -struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle, - u64 *page_list, - int list_len, - u64 io_virtual_address) -{ - struct ib_fmr_pool *pool = pool_handle; - struct ib_pool_fmr *fmr; - unsigned long flags; - int result; - - if (list_len < 1 || list_len > pool->max_pages) - return ERR_PTR(-EINVAL); - - spin_lock_irqsave(&pool->pool_lock, flags); - fmr = ib_fmr_cache_lookup(pool, - page_list, - list_len, - io_virtual_address); - if (fmr) { - /* found in cache */ - ++fmr->ref_count; - if (fmr->ref_count == 1) { - list_del(&fmr->list); - } - - spin_unlock_irqrestore(&pool->pool_lock, flags); - - return fmr; - } - - if (list_empty(&pool->free_list)) { - spin_unlock_irqrestore(&pool->pool_lock, flags); - return ERR_PTR(-EAGAIN); - } - - fmr = list_entry(pool->free_list.next, struct ib_pool_fmr, list); - list_del(&fmr->list); - hlist_del_init(&fmr->cache_node); - spin_unlock_irqrestore(&pool->pool_lock, flags); - - result = ib_map_phys_fmr(fmr->fmr, page_list, list_len, - io_virtual_address); - - if (result) { - spin_lock_irqsave(&pool->pool_lock, flags); - list_add(&fmr->list, &pool->free_list); - spin_unlock_irqrestore(&pool->pool_lock, flags); - - pr_warn(PFX "fmr_map returns %d\n", result); - - return ERR_PTR(result); - } - - ++fmr->remap_count; - fmr->ref_count = 1; - - if (pool->cache_bucket) { - fmr->io_virtual_address = io_virtual_address; - fmr->page_list_len = list_len; - memcpy(fmr->page_list, page_list, list_len * sizeof(*page_list)); - - spin_lock_irqsave(&pool->pool_lock, flags); - hlist_add_head(&fmr->cache_node, - pool->cache_bucket + ib_fmr_hash(fmr->page_list[0])); - spin_unlock_irqrestore(&pool->pool_lock, flags); - } - - return fmr; -} -EXPORT_SYMBOL(ib_fmr_pool_map_phys); - -/** - * ib_fmr_pool_unmap - Unmap FMR - * @fmr:FMR to unmap - * - * Unmap an FMR. The FMR mapping may remain valid until the FMR is - * reused (or until ib_flush_fmr_pool() is called). - */ -void ib_fmr_pool_unmap(struct ib_pool_fmr *fmr) -{ - struct ib_fmr_pool *pool; - unsigned long flags; - - pool = fmr->pool; - - spin_lock_irqsave(&pool->pool_lock, flags); - - --fmr->ref_count; - if (!fmr->ref_count) { - if (fmr->remap_count < pool->max_remaps) { - list_add_tail(&fmr->list, &pool->free_list); - } else { - list_add_tail(&fmr->list, &pool->dirty_list); - if (++pool->dirty_len >= pool->dirty_watermark) { - atomic_inc(&pool->req_ser); - kthread_queue_work(pool->worker, &pool->work); - } - } - } - - spin_unlock_irqrestore(&pool->pool_lock, flags); -} -EXPORT_SYMBOL(ib_fmr_pool_unmap); diff --git a/include/rdma/ib_fmr_pool.h b/include/rdma/ib_fmr_pool.h deleted file mode 100644 index 2fd9bfb6d648..000000000000 --- a/include/rdma/ib_fmr_pool.h +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright (c) 2004 Topspin Corporation. All rights reserved. - * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#if !defined(IB_FMR_POOL_H) -#define IB_FMR_POOL_H - -#include - -struct ib_fmr_pool; - -/** - * struct ib_fmr_pool_param - Parameters for creating FMR pool - * @max_pages_per_fmr:Maximum number of pages per map request. - * @page_shift: Log2 of sizeof "pages" mapped by this fmr - * @access:Access flags for FMRs in pool. - * @pool_size:Number of FMRs to allocate for pool. - * @dirty_watermark:Flush is triggered when @dirty_watermark dirty - * FMRs are present. - * @flush_function:Callback called when unmapped FMRs are flushed and - * more FMRs are possibly available for mapping - * @flush_arg:Context passed to user's flush function. - * @cache:If set, FMRs may be reused after unmapping for identical map - * requests. - */ -struct ib_fmr_pool_param { - int max_pages_per_fmr; - int page_shift; - enum ib_access_flags access; - int pool_size; - int dirty_watermark; - void (*flush_function)(struct ib_fmr_pool *pool, - void *arg); - void *flush_arg; - unsigned cache:1; -}; - -struct ib_pool_fmr { - struct ib_fmr *fmr; - struct ib_fmr_pool *pool; - struct list_head list; - struct hlist_node cache_node; - int ref_count; - int remap_count; - u64 io_virtual_address; - int page_list_len; - u64 page_list[]; -}; - -struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, - struct ib_fmr_pool_param *params); - -void ib_destroy_fmr_pool(struct ib_fmr_pool *pool); - -int ib_flush_fmr_pool(struct ib_fmr_pool *pool); - -struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle, - u64 *page_list, - int list_len, - u64 io_virtual_address); - -void ib_fmr_pool_unmap(struct ib_pool_fmr *fmr); - -#endif /* IB_FMR_POOL_H */ -- cgit v1.2.3 From 3a578152a9208bbcd196210be2f5396744cda302 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Thu, 28 May 2020 16:45:53 -0300 Subject: RDMA/core: Remove FMR device ops After removing FMR support from all the RDMA ULPs and providers, there is no need to keep FMR operation for IB devices. Link: https://lore.kernel.org/r/11-v3-f58e6669d5d3+2cf-fmr_removal_jgg@mellanox.com Signed-off-by: Max Gurtovoy Signed-off-by: Jason Gunthorpe --- Documentation/infiniband/core_locking.rst | 2 -- drivers/infiniband/core/device.c | 4 --- drivers/infiniband/core/verbs.c | 48 ------------------------- include/rdma/ib_verbs.h | 59 ------------------------------- 4 files changed, 113 deletions(-) (limited to 'include/rdma') diff --git a/Documentation/infiniband/core_locking.rst b/Documentation/infiniband/core_locking.rst index 8f76a8a5a38f..efd5e7603014 100644 --- a/Documentation/infiniband/core_locking.rst +++ b/Documentation/infiniband/core_locking.rst @@ -22,7 +22,6 @@ Sleeping and interrupt context - post_recv - poll_cq - req_notify_cq - - map_phys_fmr which may not sleep and must be callable from any context. @@ -36,7 +35,6 @@ Sleeping and interrupt context - ib_post_send - ib_post_recv - ib_req_notify_cq - - ib_map_phys_fmr are therefore safe to call from any context. diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 53f541f41ff3..905a2beaf885 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2571,7 +2571,6 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, add_gid); SET_DEVICE_OP(dev_ops, advise_mr); SET_DEVICE_OP(dev_ops, alloc_dm); - SET_DEVICE_OP(dev_ops, alloc_fmr); SET_DEVICE_OP(dev_ops, alloc_hw_stats); SET_DEVICE_OP(dev_ops, alloc_mr); SET_DEVICE_OP(dev_ops, alloc_mr_integrity); @@ -2598,7 +2597,6 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, create_wq); SET_DEVICE_OP(dev_ops, dealloc_dm); SET_DEVICE_OP(dev_ops, dealloc_driver); - SET_DEVICE_OP(dev_ops, dealloc_fmr); SET_DEVICE_OP(dev_ops, dealloc_mw); SET_DEVICE_OP(dev_ops, dealloc_pd); SET_DEVICE_OP(dev_ops, dealloc_ucontext); @@ -2642,7 +2640,6 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, iw_rem_ref); SET_DEVICE_OP(dev_ops, map_mr_sg); SET_DEVICE_OP(dev_ops, map_mr_sg_pi); - SET_DEVICE_OP(dev_ops, map_phys_fmr); SET_DEVICE_OP(dev_ops, mmap); SET_DEVICE_OP(dev_ops, mmap_free); SET_DEVICE_OP(dev_ops, modify_ah); @@ -2676,7 +2673,6 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, resize_cq); SET_DEVICE_OP(dev_ops, set_vf_guid); SET_DEVICE_OP(dev_ops, set_vf_link_state); - SET_DEVICE_OP(dev_ops, unmap_fmr); SET_OBJ_SIZE(dev_ops, ib_ah); SET_OBJ_SIZE(dev_ops, ib_cq); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 21815e125e98..53d6505c0c7b 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -2212,54 +2212,6 @@ out: } EXPORT_SYMBOL(ib_alloc_mr_integrity); -/* "Fast" memory regions */ - -struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd, - int mr_access_flags, - struct ib_fmr_attr *fmr_attr) -{ - struct ib_fmr *fmr; - - if (!pd->device->ops.alloc_fmr) - return ERR_PTR(-EOPNOTSUPP); - - fmr = pd->device->ops.alloc_fmr(pd, mr_access_flags, fmr_attr); - if (!IS_ERR(fmr)) { - fmr->device = pd->device; - fmr->pd = pd; - atomic_inc(&pd->usecnt); - } - - return fmr; -} -EXPORT_SYMBOL(ib_alloc_fmr); - -int ib_unmap_fmr(struct list_head *fmr_list) -{ - struct ib_fmr *fmr; - - if (list_empty(fmr_list)) - return 0; - - fmr = list_entry(fmr_list->next, struct ib_fmr, list); - return fmr->device->ops.unmap_fmr(fmr_list); -} -EXPORT_SYMBOL(ib_unmap_fmr); - -int ib_dealloc_fmr(struct ib_fmr *fmr) -{ - struct ib_pd *pd; - int ret; - - pd = fmr->pd; - ret = fmr->device->ops.dealloc_fmr(fmr); - if (!ret) - atomic_dec(&pd->usecnt); - - return ret; -} -EXPORT_SYMBOL(ib_dealloc_fmr); - /* Multicast groups */ static bool is_valid_mcast_lid(struct ib_qp *qp, u16 lid) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 19864da78649..ff6a8053ec52 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1475,12 +1475,6 @@ enum ib_mr_rereg_flags { IB_MR_REREG_SUPPORTED = ((IB_MR_REREG_ACCESS << 1) - 1) }; -struct ib_fmr_attr { - int max_pages; - int max_maps; - u8 page_shift; -}; - struct ib_umem; enum rdma_remove_reason { @@ -1855,14 +1849,6 @@ struct ib_mw { enum ib_mw_type type; }; -struct ib_fmr { - struct ib_device *device; - struct ib_pd *pd; - struct list_head list; - u32 lkey; - u32 rkey; -}; - /* Supported steering options */ enum ib_flow_attr_type { /* steering according to rule specifications */ @@ -2505,12 +2491,6 @@ struct ib_device_ops { struct ib_mw *(*alloc_mw)(struct ib_pd *pd, enum ib_mw_type type, struct ib_udata *udata); int (*dealloc_mw)(struct ib_mw *mw); - struct ib_fmr *(*alloc_fmr)(struct ib_pd *pd, int mr_access_flags, - struct ib_fmr_attr *fmr_attr); - int (*map_phys_fmr)(struct ib_fmr *fmr, u64 *page_list, int list_len, - u64 iova); - int (*unmap_fmr)(struct list_head *fmr_list); - int (*dealloc_fmr)(struct ib_fmr *fmr); int (*attach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid); int (*detach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid); struct ib_xrcd *(*alloc_xrcd)(struct ib_device *device, @@ -4319,45 +4299,6 @@ static inline u32 ib_inc_rkey(u32 rkey) return ((rkey + 1) & mask) | (rkey & ~mask); } -/** - * ib_alloc_fmr - Allocates a unmapped fast memory region. - * @pd: The protection domain associated with the unmapped region. - * @mr_access_flags: Specifies the memory access rights. - * @fmr_attr: Attributes of the unmapped region. - * - * A fast memory region must be mapped before it can be used as part of - * a work request. - */ -struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd, - int mr_access_flags, - struct ib_fmr_attr *fmr_attr); - -/** - * ib_map_phys_fmr - Maps a list of physical pages to a fast memory region. - * @fmr: The fast memory region to associate with the pages. - * @page_list: An array of physical pages to map to the fast memory region. - * @list_len: The number of pages in page_list. - * @iova: The I/O virtual address to use with the mapped region. - */ -static inline int ib_map_phys_fmr(struct ib_fmr *fmr, - u64 *page_list, int list_len, - u64 iova) -{ - return fmr->device->ops.map_phys_fmr(fmr, page_list, list_len, iova); -} - -/** - * ib_unmap_fmr - Removes the mapping from a list of fast memory regions. - * @fmr_list: A linked list of fast memory regions to unmap. - */ -int ib_unmap_fmr(struct list_head *fmr_list); - -/** - * ib_dealloc_fmr - Deallocates a fast memory region. - * @fmr: The fast memory region to deallocate. - */ -int ib_dealloc_fmr(struct ib_fmr *fmr); - /** * ib_attach_mcast - Attaches the specified QP to a multicast group. * @qp: QP to attach to the multicast group. The QP must be type -- cgit v1.2.3 From 649392bf75a423287a9c4936b341677f12e8cf0b Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 28 May 2020 16:45:54 -0300 Subject: RDMA: Remove 'max_fmr' Now that FMR support is gone, this attribute can be deleted from all places. Link: https://lore.kernel.org/r/12-v3-f58e6669d5d3+2cf-fmr_removal_jgg@mellanox.com Reviewed-by: Max Gurtovoy Reviewed-by: Bernard Metzler Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 1 - drivers/infiniband/hw/ocrdma/ocrdma.h | 1 - drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 1 - drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 1 - drivers/infiniband/hw/qedr/main.c | 1 - drivers/infiniband/hw/qedr/qedr.h | 1 - drivers/infiniband/hw/qedr/verbs.c | 1 - drivers/infiniband/sw/rdmavt/mr.c | 1 - drivers/infiniband/sw/siw/siw.h | 2 -- drivers/infiniband/sw/siw/siw_main.c | 1 - drivers/infiniband/sw/siw/siw_verbs.c | 1 - drivers/net/ethernet/qlogic/qed/qed_rdma.c | 1 - drivers/net/ethernet/qlogic/qed/qed_rdma.h | 1 - include/linux/qed/qed_rdma_if.h | 1 - include/rdma/ib_verbs.h | 1 - net/rds/ib.c | 2 +- 16 files changed, 1 insertion(+), 17 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 2067a939788b..56d207405dbd 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -356,7 +356,6 @@ static void copy_query_dev_fields(struct ib_ucontext *ucontext, resp->max_mcast_qp_attach = attr->max_mcast_qp_attach; resp->max_total_mcast_qp_attach = attr->max_total_mcast_qp_attach; resp->max_ah = attr->max_ah; - resp->max_fmr = attr->max_fmr; resp->max_map_per_fmr = attr->max_map_per_fmr; resp->max_srq = attr->max_srq; resp->max_srq_wr = attr->max_srq_wr; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index 7baedc74e39d..fcfe0e82197a 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -98,7 +98,6 @@ struct ocrdma_dev_attr { u64 max_mr_size; u32 max_num_mr_pbl; int max_mw; - int max_fmr; int max_map_per_fmr; int max_pages_per_frmr; u16 max_ord_per_qp; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index d82d3ec3649e..e07bf0b2209a 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -1190,7 +1190,6 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev, attr->max_mr = rsp->max_mr; attr->max_mr_size = ((u64)rsp->max_mr_size_hi << 32) | rsp->max_mr_size_lo; - attr->max_fmr = 0; attr->max_pages_per_frmr = rsp->max_pages_per_frmr; attr->max_num_mr_pbl = rsp->max_num_mr_pbl; attr->max_cqe = rsp->max_cq_cqes_per_cq & diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 10e343894595..890e3fd41d21 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -99,7 +99,6 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, attr->max_mw = dev->attr.max_mw; attr->max_pd = dev->attr.max_pd; attr->atomic_cap = 0; - attr->max_fmr = 0; attr->max_map_per_fmr = 0; attr->max_qp_rd_atom = min(dev->attr.max_ord_per_qp, dev->attr.max_ird_per_qp); diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index dcdc85a1ab25..ccaedfd53e49 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -632,7 +632,6 @@ static int qedr_set_device_attr(struct qedr_dev *dev) attr->max_mr_size = qed_attr->max_mr_size; attr->max_cqe = min_t(u64, qed_attr->max_cqe, QEDR_MAX_CQES); attr->max_mw = qed_attr->max_mw; - attr->max_fmr = qed_attr->max_fmr; attr->max_mr_mw_fmr_pbl = qed_attr->max_mr_mw_fmr_pbl; attr->max_mr_mw_fmr_size = qed_attr->max_mr_mw_fmr_size; attr->max_pd = qed_attr->max_pd; diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 5488dbd59d3c..fdf90ecb2699 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -103,7 +103,6 @@ struct qedr_device_attr { u64 max_mr_size; u32 max_cqe; u32 max_mw; - u32 max_fmr; u32 max_mr_mw_fmr_pbl; u64 max_mr_mw_fmr_size; u32 max_pd; diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index d6b94a713573..ca88006eaa66 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -145,7 +145,6 @@ int qedr_query_device(struct ib_device *ibdev, attr->max_mw = qattr->max_mw; attr->max_pd = qattr->max_pd; attr->atomic_cap = dev->atomic_cap; - attr->max_fmr = qattr->max_fmr; attr->max_map_per_fmr = 16; attr->max_qp_init_rd_atom = 1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1); diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index ddb0c0d771c2..60864e5ca7cb 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -97,7 +97,6 @@ int rvt_driver_mr_init(struct rvt_dev_info *rdi) RCU_INIT_POINTER(rdi->lkey_table.table[i], NULL); rdi->dparms.props.max_mr = rdi->lkey_table.max; - rdi->dparms.props.max_fmr = rdi->lkey_table.max; return 0; } diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index 5a58a1cc7a7e..e9753831ac3f 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -30,7 +30,6 @@ #define SIW_MAX_MR (SIW_MAX_QP * 10) #define SIW_MAX_PD SIW_MAX_QP #define SIW_MAX_MW 0 /* to be set if MW's are supported */ -#define SIW_MAX_FMR SIW_MAX_MR #define SIW_MAX_SRQ SIW_MAX_QP #define SIW_MAX_SRQ_WR (SIW_MAX_QP_WR * 10) #define SIW_MAX_CONTEXT SIW_MAX_PD @@ -59,7 +58,6 @@ struct siw_dev_cap { int max_mr; int max_pd; int max_mw; - int max_fmr; int max_srq; int max_srq_wr; int max_srq_sge; diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index 5cd40fb9e20c..a0b8cc643c5c 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -413,7 +413,6 @@ static struct siw_device *siw_device_create(struct net_device *netdev) sdev->attrs.max_mr = SIW_MAX_MR; sdev->attrs.max_pd = SIW_MAX_PD; sdev->attrs.max_mw = SIW_MAX_MW; - sdev->attrs.max_fmr = SIW_MAX_FMR; sdev->attrs.max_srq = SIW_MAX_SRQ; sdev->attrs.max_srq_wr = SIW_MAX_SRQ_WR; sdev->attrs.max_srq_sge = SIW_MAX_SGE; diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index aeb842bc7a1e..987e2ba05dbc 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -136,7 +136,6 @@ int siw_query_device(struct ib_device *base_dev, struct ib_device_attr *attr, attr->max_cq = sdev->attrs.max_cq; attr->max_cqe = sdev->attrs.max_cqe; attr->max_fast_reg_page_list_len = SIW_MAX_SGE_PBL; - attr->max_fmr = sdev->attrs.max_fmr; attr->max_mr = sdev->attrs.max_mr; attr->max_mw = sdev->attrs.max_mw; attr->max_mr_size = ~0ull; diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c index 38b1f402f7ed..5dc18a4bdda4 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c +++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c @@ -499,7 +499,6 @@ static void qed_rdma_init_devinfo(struct qed_hwfn *p_hwfn, dev->max_cqe = QED_RDMA_MAX_CQE_16_BIT; dev->max_mw = 0; - dev->max_fmr = QED_RDMA_MAX_FMR; dev->max_mr_mw_fmr_pbl = (PAGE_SIZE / 8) * (PAGE_SIZE / 8); dev->max_mr_mw_fmr_size = dev->max_mr_mw_fmr_pbl * PAGE_SIZE; dev->max_pkey = QED_RDMA_MAX_P_KEY; diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.h b/drivers/net/ethernet/qlogic/qed/qed_rdma.h index 3689fe3e5935..dfaa2f552627 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_rdma.h +++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.h @@ -45,7 +45,6 @@ #include "qed_iwarp.h" #include "qed_roce.h" -#define QED_RDMA_MAX_FMR (RDMA_MAX_TIDS) #define QED_RDMA_MAX_P_KEY (1) #define QED_RDMA_MAX_WQE (0x7FFF) #define QED_RDMA_MAX_SRQ_WQE_ELEM (0x7FFF) diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h index 74efca15fde7..c90276cda5c1 100644 --- a/include/linux/qed/qed_rdma_if.h +++ b/include/linux/qed/qed_rdma_if.h @@ -91,7 +91,6 @@ struct qed_rdma_device { u64 max_mr_size; u32 max_cqe; u32 max_mw; - u32 max_fmr; u32 max_mr_mw_fmr_pbl; u64 max_mr_mw_fmr_size; u32 max_pd; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index ff6a8053ec52..c4708b3243f9 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -430,7 +430,6 @@ struct ib_device_attr { int max_mcast_qp_attach; int max_total_mcast_qp_attach; int max_ah; - int max_fmr; int max_map_per_fmr; int max_srq; int max_srq_wr; diff --git a/net/rds/ib.c b/net/rds/ib.c index 6c43b3e4c736..deecbdcdae84 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -217,7 +217,7 @@ static int rds_ib_add_one(struct ib_device *device) } rdsdebug("RDS/IB: max_mr = %d, max_wrs = %d, max_sge = %d, max_1m_mrs = %d, max_8k_mrs = %d\n", - device->attrs.max_fmr, rds_ibdev->max_wrs, rds_ibdev->max_sge, + device->attrs.max_mr, rds_ibdev->max_wrs, rds_ibdev->max_sge, rds_ibdev->max_1m_mrs, rds_ibdev->max_8k_mrs); pr_info("RDS/IB: %s: added\n", device->name); -- cgit v1.2.3 From 4d12c04caa88cd3115f25acd832a7cddb698981b Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 28 May 2020 16:45:55 -0300 Subject: RDMA: Remove 'max_map_per_fmr' Now that FMR support is gone, this attribute can be deleted from all places. Link: https://lore.kernel.org/r/13-v3-f58e6669d5d3+2cf-fmr_removal_jgg@mellanox.com Reviewed-by: Max Gurtovoy Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 1 - drivers/infiniband/hw/hfi1/verbs.c | 1 - drivers/infiniband/hw/i40iw/i40iw_verbs.c | 1 - drivers/infiniband/hw/mlx5/main.c | 1 - drivers/infiniband/hw/mthca/mthca_provider.c | 10 ---------- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 1 - drivers/infiniband/hw/qedr/verbs.c | 1 - drivers/infiniband/hw/qib/qib_verbs.c | 1 - drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 1 - include/rdma/ib_verbs.h | 1 - 10 files changed, 19 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 56d207405dbd..b48b3f6e632d 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -356,7 +356,6 @@ static void copy_query_dev_fields(struct ib_ucontext *ucontext, resp->max_mcast_qp_attach = attr->max_mcast_qp_attach; resp->max_total_mcast_qp_attach = attr->max_total_mcast_qp_attach; resp->max_ah = attr->max_ah; - resp->max_map_per_fmr = attr->max_map_per_fmr; resp->max_srq = attr->max_srq; resp->max_srq_wr = attr->max_srq_wr; resp->max_srq_sge = attr->max_srq_sge; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 43ddced15951..30865635b449 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1361,7 +1361,6 @@ static void hfi1_fill_device_attr(struct hfi1_devdata *dd) rdi->dparms.props.max_cq = hfi1_max_cqs; rdi->dparms.props.max_ah = hfi1_max_ahs; rdi->dparms.props.max_cqe = hfi1_max_cqes; - rdi->dparms.props.max_map_per_fmr = 32767; rdi->dparms.props.max_pd = hfi1_max_pds; rdi->dparms.props.max_qp_rd_atom = HFI1_MAX_RDMA_ATOMIC; rdi->dparms.props.max_qp_init_rd_atom = 255; diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 1b6fb1380961..19af29a48c55 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -83,7 +83,6 @@ static int i40iw_query_device(struct ib_device *ibdev, props->max_qp_rd_atom = I40IW_MAX_IRD_SIZE; props->max_qp_init_rd_atom = props->max_qp_rd_atom; props->atomic_cap = IB_ATOMIC_NONE; - props->max_map_per_fmr = 1; props->max_fast_reg_page_list_len = I40IW_MAX_PAGES_PER_FMR; return 0; } diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 49a1aff72715..343a8b8361e7 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -999,7 +999,6 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg); props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * props->max_mcast_grp; - props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */ props->max_ah = INT_MAX; props->hca_core_clock = MLX5_CAP_GEN(mdev, device_frequency_khz); props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL; diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index de2124a8ee2b..9fa2f9164a47 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -118,16 +118,6 @@ static int mthca_query_device(struct ib_device *ibdev, struct ib_device_attr *pr props->max_mcast_qp_attach = MTHCA_QP_PER_MGM; props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * props->max_mcast_grp; - /* - * If Sinai memory key optimization is being used, then only - * the 8-bit key portion will change. For other HCAs, the - * unused index bits will also be used for FMR remapping. - */ - if (mdev->mthca_flags & MTHCA_FLAG_SINAI_OPT) - props->max_map_per_fmr = 255; - else - props->max_map_per_fmr = - (1 << (32 - ilog2(mdev->limits.num_mpts))) - 1; err = 0; out: diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 890e3fd41d21..d11c74390a12 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -99,7 +99,6 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, attr->max_mw = dev->attr.max_mw; attr->max_pd = dev->attr.max_pd; attr->atomic_cap = 0; - attr->max_map_per_fmr = 0; attr->max_qp_rd_atom = min(dev->attr.max_ord_per_qp, dev->attr.max_ird_per_qp); attr->max_qp_init_rd_atom = dev->attr.max_ord_per_qp; diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index ca88006eaa66..9b9e80266367 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -145,7 +145,6 @@ int qedr_query_device(struct ib_device *ibdev, attr->max_mw = qattr->max_mw; attr->max_pd = qattr->max_pd; attr->atomic_cap = dev->atomic_cap; - attr->max_map_per_fmr = 16; attr->max_qp_init_rd_atom = 1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1); attr->max_qp_rd_atom = diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 7508abb6a0fa..7acf9ba5358a 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1460,7 +1460,6 @@ static void qib_fill_device_attr(struct qib_devdata *dd) rdi->dparms.props.max_cq = ib_qib_max_cqs; rdi->dparms.props.max_cqe = ib_qib_max_cqes; rdi->dparms.props.max_ah = ib_qib_max_ahs; - rdi->dparms.props.max_map_per_fmr = 32767; rdi->dparms.props.max_qp_rd_atom = QIB_MAX_RDMA_ATOMIC; rdi->dparms.props.max_qp_init_rd_atom = 255; rdi->dparms.props.max_srq = ib_qib_max_srqs; diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index 71f82339446c..b8a77ce11590 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -322,7 +322,6 @@ int usnic_ib_query_device(struct ib_device *ibdev, props->max_mcast_grp = 0; props->max_mcast_qp_attach = 0; props->max_total_mcast_qp_attach = 0; - props->max_map_per_fmr = 0; /* Owned by Userspace * max_qp_wr, max_sge, max_sge_rd, max_cqe */ mutex_unlock(&us_ibdev->usdev_lock); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index c4708b3243f9..033e7044f29c 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -430,7 +430,6 @@ struct ib_device_attr { int max_mcast_qp_attach; int max_total_mcast_qp_attach; int max_ah; - int max_map_per_fmr; int max_srq; int max_srq_wr; int max_srq_sge; -- cgit v1.2.3