From 2dee0e545894c23b1a2cc2019ac87dffb42e5984 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 8 Jun 2017 16:15:07 +0300 Subject: IB/uverbs: Enable QP creation with a given source QP number Enable QP creation with a given source QP number, the created QP will use the source QPN as its wire QP number. To create such a QP, root privileges (i.e. CAP_NET_RAW) are required from the user application. This comes as a pre-patch for downstream patches in this series to allow user space applications to accelerate traffic which is typically handled by IPoIB ULP. Signed-off-by: Yishai Hadas Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/uapi/rdma/ib_user_verbs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 270c350bedc6..63656d2e8705 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -578,7 +578,7 @@ struct ib_uverbs_ex_create_qp { __u32 comp_mask; __u32 create_flags; __u32 rwq_ind_tbl_handle; - __u32 reserved1; + __u32 source_qpn; }; struct ib_uverbs_open_qp { -- cgit v1.2.3 From ea30b966f7dd6bcfb20c98a7f99608c7bb10bfac Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 21 Jun 2017 09:26:28 +0300 Subject: IB/mlx4: Add inline-receive support When inline-receive is enabled, the HCA may write received data into the receive WQE. Inline-receive is enabled by setting its matching bit in the QP context and each single-packet message with payload not exceeding the receive WQE size will be delivered to the WQE. The completion report will indicate that the payload was placed to the WQE. It includes: 1) Return maximum supported size of inline-receive by the hardware in query_device vendor's data part. 2) Enable the feature when requested by the vendor data input. Signed-off-by: Maor Gottlieb Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/main.c | 7 +++++++ drivers/infiniband/hw/mlx4/mlx4_ib.h | 3 +++ drivers/infiniband/hw/mlx4/qp.c | 32 +++++++++++++++++++++++++------- include/uapi/rdma/mlx4-abi.h | 3 ++- 4 files changed, 37 insertions(+), 8 deletions(-) (limited to 'include/uapi') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index d1b43cbbfea7..e8c290edb1e1 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -563,6 +563,13 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, } } + if (uhw->outlen >= resp.response_length + + sizeof(resp.max_inl_recv_sz)) { + resp.response_length += sizeof(resp.max_inl_recv_sz); + resp.max_inl_recv_sz = dev->dev->caps.max_rq_sg * + sizeof(struct mlx4_wqe_data_seg); + } + if (uhw->outlen) { err = ib_copy_to_udata(uhw, &resp, resp.response_length); if (err) diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 9db82e67e959..a6337f3161cf 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -318,6 +318,7 @@ struct mlx4_ib_qp { u8 sq_no_prefetch; u8 state; int mlx_type; + u32 inl_recv_sz; struct list_head gid_list; struct list_head steering_rules; struct mlx4_ib_buf *sqp_proxy_rcv; @@ -623,6 +624,8 @@ struct mlx4_uverbs_ex_query_device_resp { __u32 comp_mask; __u32 response_length; __u64 hca_core_clock_offset; + __u32 max_inl_recv_sz; + __u32 reserved; }; static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev) diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 75c0e6c5dd56..d1caa39a3943 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -377,7 +377,8 @@ static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags) } static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, - int is_user, int has_rq, struct mlx4_ib_qp *qp) + int is_user, int has_rq, struct mlx4_ib_qp *qp, + u32 inl_recv_sz) { /* Sanity check RQ size before proceeding */ if (cap->max_recv_wr > dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE || @@ -385,18 +386,24 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, return -EINVAL; if (!has_rq) { - if (cap->max_recv_wr) + if (cap->max_recv_wr || inl_recv_sz) return -EINVAL; qp->rq.wqe_cnt = qp->rq.max_gs = 0; } else { + u32 max_inl_recv_sz = dev->dev->caps.max_rq_sg * + sizeof(struct mlx4_wqe_data_seg); + u32 wqe_size; + /* HW requires >= 1 RQ entry with >= 1 gather entry */ - if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge)) + if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge || + inl_recv_sz > max_inl_recv_sz)) return -EINVAL; qp->rq.wqe_cnt = roundup_pow_of_two(max(1U, cap->max_recv_wr)); qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge)); - qp->rq.wqe_shift = ilog2(qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg)); + wqe_size = qp->rq.max_gs * sizeof(struct mlx4_wqe_data_seg); + qp->rq.wqe_shift = ilog2(max_t(u32, wqe_size, inl_recv_sz)); } /* leave userspace return values as they were, so as not to break ABI */ @@ -719,9 +726,6 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); - err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, qp_has_rq(init_attr), qp); - if (err) - goto err; if (pd->uobject) { struct mlx4_ib_create_qp ucmd; @@ -731,6 +735,12 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err; } + err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, + qp_has_rq(init_attr), qp, ucmd.inl_recv_sz); + if (err) + goto err; + + qp->inl_recv_sz = ucmd.inl_recv_sz; qp->sq_no_prefetch = ucmd.sq_no_prefetch; err = set_user_sq_size(dev, qp, &ucmd); @@ -760,6 +770,11 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err_mtt; } } else { + err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, + qp_has_rq(init_attr), qp, 0); + if (err) + goto err; + qp->sq_no_prefetch = 0; if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) @@ -1651,6 +1666,9 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } } + if (qp->inl_recv_sz) + context->param3 |= cpu_to_be32(1 << 25); + if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) context->mtu_msgmax = (IB_MTU_4096 << 5) | 11; else if (ibqp->qp_type == IB_QPT_RAW_PACKET) diff --git a/include/uapi/rdma/mlx4-abi.h b/include/uapi/rdma/mlx4-abi.h index af431752655c..bf3bdba2f326 100644 --- a/include/uapi/rdma/mlx4-abi.h +++ b/include/uapi/rdma/mlx4-abi.h @@ -101,7 +101,8 @@ struct mlx4_ib_create_qp { __u8 log_sq_bb_count; __u8 log_sq_stride; __u8 sq_no_prefetch; - __u8 reserved[5]; + __u32 inl_recv_sz; + __u8 reserved; }; #endif /* MLX4_ABI_USER_H */ -- cgit v1.2.3 From 400b1ebcfe31279895f1baa8ecaa390d9a4a0eef Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Tue, 4 Jul 2017 16:24:24 +0300 Subject: IB/mlx4: Add support for WQ related verbs Support create/modify/destroy WQ related verbs. The base IB object to enable RSS functionality is a WQ (i.e. ib_wq). This patch implements the related WQ verbs as of create, modify and destroy. In downstream patches the WQ will be used as part of an indirection table (i.e. ib_rwq_ind_table) to enable RSS QP creation. Notes: ConnectX-3 hardware requires consecutive WQNs list as receive descriptor queues for the RSS QP. Hence, the driver manages consecutive ranges lists per context which the user must respect. Destroying the WQ does not return its WQN back to its range for reusing. However, destroying all WQs from the same range releases the range and in turn releases its WQNs for reusing. Since the WQ object is not a natural object in the hardware, the driver implements the WQ by the hardware QP. As such, the WQ inherits its port from its RSS QP parent upon its RST->INIT transition and by that time its state is applied to the hardware. Signed-off-by: Guy Levi Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/main.c | 24 ++ drivers/infiniband/hw/mlx4/mlx4_ib.h | 25 +- drivers/infiniband/hw/mlx4/qp.c | 505 +++++++++++++++++++++++++++++++---- include/uapi/rdma/mlx4-abi.h | 14 + 4 files changed, 511 insertions(+), 57 deletions(-) (limited to 'include/uapi') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 0944e224c0df..7c6f929ebd3e 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -81,6 +81,8 @@ static const char mlx4_ib_version[] = DRV_VERSION "\n"; static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init); +static enum rdma_link_layer mlx4_ib_port_link_layer(struct ib_device *device, + u8 port_num); static struct workqueue_struct *wq; @@ -552,6 +554,11 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->timestamp_mask = 0xFFFFFFFFFFFFULL; props->max_ah = INT_MAX; + if ((dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) && + (mlx4_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET || + mlx4_ib_port_link_layer(ibdev, 2) == IB_LINK_LAYER_ETHERNET)) + props->max_wq_type_rq = props->max_qp; + if (!mlx4_is_slave(dev->dev)) err = mlx4_get_internal_clock_params(dev->dev, &clock_params); @@ -1076,6 +1083,9 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev, INIT_LIST_HEAD(&context->db_page_list); mutex_init(&context->db_page_mutex); + INIT_LIST_HEAD(&context->wqn_ranges_list); + mutex_init(&context->wqn_ranges_mutex); + if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3)); else @@ -2720,6 +2730,20 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.get_dev_fw_str = get_fw_ver_str; ibdev->ib_dev.disassociate_ucontext = mlx4_ib_disassociate_ucontext; + if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) && + ((mlx4_ib_port_link_layer(&ibdev->ib_dev, 1) == + IB_LINK_LAYER_ETHERNET) || + (mlx4_ib_port_link_layer(&ibdev->ib_dev, 2) == + IB_LINK_LAYER_ETHERNET))) { + ibdev->ib_dev.create_wq = mlx4_ib_create_wq; + ibdev->ib_dev.modify_wq = mlx4_ib_modify_wq; + ibdev->ib_dev.destroy_wq = mlx4_ib_destroy_wq; + ibdev->ib_dev.uverbs_ex_cmd_mask |= + (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | + (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ); + } + if (!mlx4_is_slave(ibdev->dev)) { ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc; ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index a6337f3161cf..ba4e78c064d2 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -88,6 +88,8 @@ struct mlx4_ib_ucontext { struct list_head db_page_list; struct mutex db_page_mutex; struct mlx4_ib_vma_private_data hw_bar_info[HW_BAR_COUNT]; + struct list_head wqn_ranges_list; + struct mutex wqn_ranges_mutex; /* protect wqn_ranges_list */ }; struct mlx4_ib_pd { @@ -289,8 +291,19 @@ struct mlx4_roce_smac_vlan_info { int update_vid; }; +struct mlx4_wqn_range { + int base_wqn; + int size; + int refcount; + bool dirty; + struct list_head list; +}; + struct mlx4_ib_qp { - struct ib_qp ibqp; + union { + struct ib_qp ibqp; + struct ib_wq ibwq; + }; struct mlx4_qp mqp; struct mlx4_buf buf; @@ -329,6 +342,9 @@ struct mlx4_ib_qp { struct list_head cq_recv_list; struct list_head cq_send_list; struct counter_index *counter_index; + struct mlx4_wqn_range *wqn_range; + /* Number of RSS QP parents that uses this WQ */ + u32 rss_usecnt; }; struct mlx4_ib_srq { @@ -893,4 +909,11 @@ void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev, void mlx4_ib_sl2vl_update(struct mlx4_ib_dev *mdev, int port); +struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd, + struct ib_wq_init_attr *init_attr, + struct ib_udata *udata); +int mlx4_ib_destroy_wq(struct ib_wq *wq); +int mlx4_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, + u32 wq_attr_mask, struct ib_udata *udata); + #endif /* MLX4_IB_H */ diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 247b9132e9de..65e4ec549368 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -116,6 +116,11 @@ static const __be32 mlx4_ib_opcode[] = { [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA), }; +enum mlx4_ib_source_type { + MLX4_IB_QP_SRC = 0, + MLX4_IB_RWQ_SRC = 1, +}; + static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp) { return container_of(mqp, struct mlx4_ib_sqp, qp); @@ -330,6 +335,12 @@ static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type) } } +static void mlx4_ib_wq_event(struct mlx4_qp *qp, enum mlx4_event type) +{ + pr_warn_ratelimited("Unexpected event type %d on WQ 0x%06x. Events are not supported for WQs\n", + type, qp->qpn); +} + static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags) { /* @@ -639,7 +650,91 @@ static void mlx4_ib_free_qp_counter(struct mlx4_ib_dev *dev, qp->counter_index = NULL; } +/* + * This function allocates a WQN from a range which is consecutive and aligned + * to its size. In case the range is full, then it creates a new range and + * allocates WQN from it. The new range will be used for following allocations. + */ +static int mlx4_ib_alloc_wqn(struct mlx4_ib_ucontext *context, + struct mlx4_ib_qp *qp, int range_size, int *wqn) +{ + struct mlx4_ib_dev *dev = to_mdev(context->ibucontext.device); + struct mlx4_wqn_range *range; + int err = 0; + + mutex_lock(&context->wqn_ranges_mutex); + + range = list_first_entry_or_null(&context->wqn_ranges_list, + struct mlx4_wqn_range, list); + + if (!range || (range->refcount == range->size) || range->dirty) { + range = kzalloc(sizeof(*range), GFP_KERNEL); + if (!range) { + err = -ENOMEM; + goto out; + } + + err = mlx4_qp_reserve_range(dev->dev, range_size, + range_size, &range->base_wqn, 0, + qp->mqp.usage); + if (err) { + kfree(range); + goto out; + } + + range->size = range_size; + list_add(&range->list, &context->wqn_ranges_list); + } else if (range_size != 1) { + /* + * Requesting a new range (>1) when last range is still open, is + * not valid. + */ + err = -EINVAL; + goto out; + } + + qp->wqn_range = range; + + *wqn = range->base_wqn + range->refcount; + + range->refcount++; + +out: + mutex_unlock(&context->wqn_ranges_mutex); + + return err; +} + +static void mlx4_ib_release_wqn(struct mlx4_ib_ucontext *context, + struct mlx4_ib_qp *qp, bool dirty_release) +{ + struct mlx4_ib_dev *dev = to_mdev(context->ibucontext.device); + struct mlx4_wqn_range *range; + + mutex_lock(&context->wqn_ranges_mutex); + + range = qp->wqn_range; + + range->refcount--; + if (!range->refcount) { + mlx4_qp_release_range(dev->dev, range->base_wqn, + range->size); + list_del(&range->list); + kfree(range); + } else if (dirty_release) { + /* + * A range which one of its WQNs is destroyed, won't be able to be + * reused for further WQN allocations. + * The next created WQ will allocate a new range. + */ + range->dirty = 1; + } + + mutex_unlock(&context->wqn_ranges_mutex); +} + static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, + enum mlx4_ib_source_type src, struct ib_qp_init_attr *init_attr, struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp) @@ -652,6 +747,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type; struct mlx4_ib_cq *mcq; unsigned long flags; + int range_size = 0; /* When tunneling special qps, we use a plain UD qp */ if (sqpn) { @@ -728,27 +824,69 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (pd->uobject) { - struct mlx4_ib_create_qp ucmd; + union { + struct mlx4_ib_create_qp qp; + struct mlx4_ib_create_wq wq; + } ucmd; + size_t copy_len; + + copy_len = (src == MLX4_IB_QP_SRC) ? + sizeof(struct mlx4_ib_create_qp) : + min(sizeof(struct mlx4_ib_create_wq), udata->inlen); - if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { + if (ib_copy_from_udata(&ucmd, udata, copy_len)) { err = -EFAULT; goto err; } - err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, - qp_has_rq(init_attr), qp, ucmd.inl_recv_sz); - if (err) - goto err; + if (src == MLX4_IB_RWQ_SRC) { + if (ucmd.wq.comp_mask || ucmd.wq.reserved1 || + ucmd.wq.reserved[0] || ucmd.wq.reserved[1] || + ucmd.wq.reserved[2]) { + pr_debug("user command isn't supported\n"); + err = -EOPNOTSUPP; + goto err; + } - qp->inl_recv_sz = ucmd.inl_recv_sz; - qp->sq_no_prefetch = ucmd.sq_no_prefetch; + if (ucmd.wq.log_range_size > + ilog2(dev->dev->caps.max_rss_tbl_sz)) { + pr_debug("WQN range size must be equal or smaller than %d\n", + dev->dev->caps.max_rss_tbl_sz); + err = -EOPNOTSUPP; + goto err; + } + range_size = 1 << ucmd.wq.log_range_size; + } else { + qp->inl_recv_sz = ucmd.qp.inl_recv_sz; + } - err = set_user_sq_size(dev, qp, &ucmd); + err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, + qp_has_rq(init_attr), qp, qp->inl_recv_sz); if (err) goto err; - qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, - qp->buf_size, 0, 0); + if (src == MLX4_IB_QP_SRC) { + qp->sq_no_prefetch = ucmd.qp.sq_no_prefetch; + + err = set_user_sq_size(dev, qp, + (struct mlx4_ib_create_qp *) + &ucmd); + if (err) + goto err; + } else { + qp->sq_no_prefetch = 1; + qp->sq.wqe_cnt = 1; + qp->sq.wqe_shift = MLX4_IB_MIN_SQ_STRIDE; + /* Allocated buffer expects to have at least that SQ + * size. + */ + qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + + (qp->sq.wqe_cnt << qp->sq.wqe_shift); + } + + qp->umem = ib_umem_get(pd->uobject->context, + (src == MLX4_IB_QP_SRC) ? ucmd.qp.buf_addr : + ucmd.wq.buf_addr, qp->buf_size, 0, 0); if (IS_ERR(qp->umem)) { err = PTR_ERR(qp->umem); goto err; @@ -765,7 +903,8 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (qp_has_rq(init_attr)) { err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context), - ucmd.db_addr, &qp->db); + (src == MLX4_IB_QP_SRC) ? ucmd.qp.db_addr : + ucmd.wq.db_addr, &qp->db); if (err) goto err_mtt; } @@ -853,6 +992,11 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err_wrid; } } + } else if (src == MLX4_IB_RWQ_SRC) { + err = mlx4_ib_alloc_wqn(to_mucontext(pd->uobject->context), qp, + range_size, &qpn); + if (err) + goto err_wrid; } else { /* Raw packet QPNs may not have bits 6,7 set in their qp_num; * otherwise, the WQE BlueFlame setup flow wrongly causes @@ -891,7 +1035,9 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, */ qp->doorbell_qpn = swab32(qp->mqp.qpn << 8); - qp->mqp.event = mlx4_ib_qp_event; + qp->mqp.event = (src == MLX4_IB_QP_SRC) ? mlx4_ib_qp_event : + mlx4_ib_wq_event; + if (!*caller_qp) *caller_qp = qp; @@ -918,6 +1064,9 @@ err_qpn: if (!sqpn) { if (qp->flags & MLX4_IB_QP_NETIF) mlx4_ib_steer_qp_free(dev, qpn, 1); + else if (src == MLX4_IB_RWQ_SRC) + mlx4_ib_release_wqn(to_mucontext(pd->uobject->context), + qp, 0); else mlx4_qp_release_range(dev->dev, qpn, 1); } @@ -1016,7 +1165,7 @@ static struct mlx4_ib_pd *get_pd(struct mlx4_ib_qp *qp) return to_mpd(qp->ibqp.pd); } -static void get_cqs(struct mlx4_ib_qp *qp, +static void get_cqs(struct mlx4_ib_qp *qp, enum mlx4_ib_source_type src, struct mlx4_ib_cq **send_cq, struct mlx4_ib_cq **recv_cq) { switch (qp->ibqp.qp_type) { @@ -1029,14 +1178,16 @@ static void get_cqs(struct mlx4_ib_qp *qp, *recv_cq = *send_cq; break; default: - *send_cq = to_mcq(qp->ibqp.send_cq); - *recv_cq = to_mcq(qp->ibqp.recv_cq); + *recv_cq = (src == MLX4_IB_QP_SRC) ? to_mcq(qp->ibqp.recv_cq) : + to_mcq(qp->ibwq.cq); + *send_cq = (src == MLX4_IB_QP_SRC) ? to_mcq(qp->ibqp.send_cq) : + *recv_cq; break; } } static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, - int is_user) + enum mlx4_ib_source_type src, int is_user) { struct mlx4_ib_cq *send_cq, *recv_cq; unsigned long flags; @@ -1069,7 +1220,7 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, } } - get_cqs(qp, &send_cq, &recv_cq); + get_cqs(qp, src, &send_cq, &recv_cq); spin_lock_irqsave(&dev->reset_flow_resource_lock, flags); mlx4_ib_lock_cqs(send_cq, recv_cq); @@ -1095,6 +1246,9 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, if (!is_sqp(dev, qp) && !is_tunnel_qp(dev, qp)) { if (qp->flags & MLX4_IB_QP_NETIF) mlx4_ib_steer_qp_free(dev, qp->mqp.qpn, 1); + else if (src == MLX4_IB_RWQ_SRC) + mlx4_ib_release_wqn(to_mucontext( + qp->ibwq.uobject->context), qp, 1); else mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1); } @@ -1102,9 +1256,12 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, mlx4_mtt_cleanup(dev->dev, &qp->mtt); if (is_user) { - if (qp->rq.wqe_cnt) - mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.uobject->context), - &qp->db); + if (qp->rq.wqe_cnt) { + struct mlx4_ib_ucontext *mcontext = !src ? + to_mucontext(qp->ibqp.uobject->context) : + to_mucontext(qp->ibwq.uobject->context); + mlx4_ib_db_unmap_user(mcontext, &qp->db); + } ib_umem_release(qp->umem); } else { kvfree(qp->sq.wrid); @@ -1200,8 +1357,8 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, /* fall through */ case IB_QPT_UD: { - err = create_qp_common(to_mdev(pd->device), pd, init_attr, - udata, 0, &qp); + err = create_qp_common(to_mdev(pd->device), pd, MLX4_IB_QP_SRC, + init_attr, udata, 0, &qp); if (err) { kfree(qp); return ERR_PTR(err); @@ -1231,8 +1388,8 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, sqpn = get_sqp_num(to_mdev(pd->device), init_attr); } - err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata, - sqpn, &qp); + err = create_qp_common(to_mdev(pd->device), pd, MLX4_IB_QP_SRC, + init_attr, udata, sqpn, &qp); if (err) return ERR_PTR(err); @@ -1303,7 +1460,7 @@ static int _mlx4_ib_destroy_qp(struct ib_qp *qp) mlx4_ib_free_qp_counter(dev, mqp); pd = get_pd(mqp); - destroy_qp_common(dev, mqp, !!pd->ibpd.uobject); + destroy_qp_common(dev, mqp, MLX4_IB_QP_SRC, !!pd->ibpd.uobject); if (is_sqp(dev, mqp)) kfree(to_msqp(mqp)); @@ -1626,12 +1783,15 @@ static u8 gid_type_to_qpc(enum ib_gid_type gid_type) } } -static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, +static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state) { - struct mlx4_ib_dev *dev = to_mdev(ibqp->device); - struct mlx4_ib_qp *qp = to_mqp(ibqp); + struct ib_uobject *ibuobject; + struct ib_srq *ibsrq; + enum ib_qp_type qp_type; + struct mlx4_ib_dev *dev; + struct mlx4_ib_qp *qp; struct mlx4_ib_pd *pd; struct mlx4_ib_cq *send_cq, *recv_cq; struct mlx4_qp_context *context; @@ -1641,6 +1801,28 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, int err = -EINVAL; int counter_index; + if (src_type == MLX4_IB_RWQ_SRC) { + struct ib_wq *ibwq; + + ibwq = (struct ib_wq *)src; + ibuobject = ibwq->uobject; + ibsrq = NULL; + qp_type = IB_QPT_RAW_PACKET; + qp = to_mqp((struct ib_qp *)ibwq); + dev = to_mdev(ibwq->device); + pd = to_mpd(ibwq->pd); + } else { + struct ib_qp *ibqp; + + ibqp = (struct ib_qp *)src; + ibuobject = ibqp->uobject; + ibsrq = ibqp->srq; + qp_type = ibqp->qp_type; + qp = to_mqp(ibqp); + dev = to_mdev(ibqp->device); + pd = get_pd(qp); + } + /* APM is not supported under RoCE */ if (attr_mask & IB_QP_ALT_PATH && rdma_port_get_link_layer(&dev->ib_dev, qp->port) == @@ -1674,11 +1856,11 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, if (qp->inl_recv_sz) context->param3 |= cpu_to_be32(1 << 25); - if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) + if (qp_type == IB_QPT_GSI || qp_type == IB_QPT_SMI) context->mtu_msgmax = (IB_MTU_4096 << 5) | 11; - else if (ibqp->qp_type == IB_QPT_RAW_PACKET) + else if (qp_type == IB_QPT_RAW_PACKET) context->mtu_msgmax = (MLX4_RAW_QP_MTU << 5) | MLX4_RAW_QP_MSGMAX; - else if (ibqp->qp_type == IB_QPT_UD) { + else if (qp_type == IB_QPT_UD) { if (qp->flags & MLX4_IB_QP_LSO) context->mtu_msgmax = (IB_MTU_4096 << 5) | ilog2(dev->dev->caps.max_gso_sz); @@ -1708,14 +1890,15 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { context->sq_size_stride |= !!qp->sq_no_prefetch << 7; context->xrcd = cpu_to_be32((u32) qp->xrcdn); - if (ibqp->qp_type == IB_QPT_RAW_PACKET) + if (qp_type == IB_QPT_RAW_PACKET) context->param3 |= cpu_to_be32(1 << 30); } - if (qp->ibqp.uobject) + if (ibuobject) context->usr_page = cpu_to_be32( mlx4_to_hw_uar_index(dev->dev, - to_mucontext(ibqp->uobject->context)->uar.index)); + to_mucontext(ibuobject->context) + ->uar.index)); else context->usr_page = cpu_to_be32( mlx4_to_hw_uar_index(dev->dev, dev->priv_uar.index)); @@ -1759,7 +1942,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, steer_qp = 1; } - if (ibqp->qp_type == IB_QPT_GSI) { + if (qp_type == IB_QPT_GSI) { enum ib_gid_type gid_type = qp->flags & MLX4_IB_ROCE_V2_GSI_QP ? IB_GID_TYPE_ROCE_UDP_ENCAP : IB_GID_TYPE_ROCE; u8 qpc_roce_mode = gid_type_to_qpc(gid_type); @@ -1776,7 +1959,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } if (attr_mask & IB_QP_AV) { - u8 port_num = mlx4_is_bonded(to_mdev(ibqp->device)->dev) ? 1 : + u8 port_num = mlx4_is_bonded(dev->dev) ? 1 : attr_mask & IB_QP_PORT ? attr->port_num : qp->port; union ib_gid gid; struct ib_gid_attr gid_attr = {.gid_type = IB_GID_TYPE_IB}; @@ -1791,7 +1974,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, int index = rdma_ah_read_grh(&attr->ah_attr)->sgid_index; - status = ib_get_cached_gid(ibqp->device, port_num, + status = ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &gid_attr); if (!status && !memcmp(&gid, &zgid, sizeof(gid))) status = -ENOENT; @@ -1848,15 +2031,14 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH; } - pd = get_pd(qp); - get_cqs(qp, &send_cq, &recv_cq); + get_cqs(qp, src_type, &send_cq, &recv_cq); context->pd = cpu_to_be32(pd->pdn); context->cqn_send = cpu_to_be32(send_cq->mcq.cqn); context->cqn_recv = cpu_to_be32(recv_cq->mcq.cqn); context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28); /* Set "fast registration enabled" for all kernel QPs */ - if (!qp->ibqp.uobject) + if (!ibuobject) context->params1 |= cpu_to_be32(1 << 11); if (attr_mask & IB_QP_RNR_RETRY) { @@ -1891,7 +2073,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, optpar |= MLX4_QP_OPTPAR_RWE | MLX4_QP_OPTPAR_RRE | MLX4_QP_OPTPAR_RAE; } - if (ibqp->srq) + if (ibsrq) context->params2 |= cpu_to_be32(MLX4_QP_BIT_RIC); if (attr_mask & IB_QP_MIN_RNR_TIMER) { @@ -1922,17 +2104,19 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, optpar |= MLX4_QP_OPTPAR_Q_KEY; } - if (ibqp->srq) - context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn); + if (ibsrq) + context->srqn = cpu_to_be32(1 << 24 | + to_msrq(ibsrq)->msrq.srqn); - if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) + if (qp->rq.wqe_cnt && + cur_state == IB_QPS_RESET && + new_state == IB_QPS_INIT) context->db_rec_addr = cpu_to_be64(qp->db.dma); if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR && - (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI || - ibqp->qp_type == IB_QPT_UD || - ibqp->qp_type == IB_QPT_RAW_PACKET)) { + (qp_type == IB_QPT_GSI || qp_type == IB_QPT_SMI || + qp_type == IB_QPT_UD || qp_type == IB_QPT_RAW_PACKET)) { context->pri_path.sched_queue = (qp->port - 1) << 6; if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI || qp->mlx4_ib_qp_type & @@ -1965,7 +2149,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } } - if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) { + if (qp_type == IB_QPT_RAW_PACKET) { context->pri_path.ackto = (context->pri_path.ackto & 0xf8) | MLX4_IB_LINK_TYPE_ETH; if (dev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) { @@ -1975,7 +2159,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } } - if (ibqp->qp_type == IB_QPT_UD && (new_state == IB_QPS_RTR)) { + if (qp_type == IB_QPT_UD && (new_state == IB_QPS_RTR)) { int is_eth = rdma_port_get_link_layer( &dev->ib_dev, qp->port) == IB_LINK_LAYER_ETHERNET; @@ -1985,14 +2169,15 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } } - if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD && attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify) sqd_event = 1; else sqd_event = 0; - if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) + if (!ibuobject && + cur_state == IB_QPS_RESET && + new_state == IB_QPS_INIT) context->rlkey_roce_mode |= (1 << 4); /* @@ -2001,7 +2186,9 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, * headroom is stamped so that the hardware doesn't start * processing stale work requests. */ - if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { + if (!ibuobject && + cur_state == IB_QPS_RESET && + new_state == IB_QPS_INIT) { struct mlx4_wqe_ctrl_seg *ctrl; int i; @@ -2058,9 +2245,9 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, * entries and reinitialize the QP. */ if (new_state == IB_QPS_RESET) { - if (!ibqp->uobject) { + if (!ibuobject) { mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn, - ibqp->srq ? to_msrq(ibqp->srq) : NULL); + ibsrq ? to_msrq(ibsrq) : NULL); if (send_cq != recv_cq) mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL); @@ -2265,7 +2452,8 @@ static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto out; } - err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); + err = __mlx4_ib_modify_qp(ibqp, MLX4_IB_QP_SRC, attr, attr_mask, + cur_state, new_state); if (mlx4_is_bonded(dev->dev) && (attr_mask & IB_QP_PORT)) attr->port_num = 1; @@ -3550,3 +3738,208 @@ out: return err; } +struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd, + struct ib_wq_init_attr *init_attr, + struct ib_udata *udata) +{ + struct mlx4_ib_dev *dev; + struct ib_qp_init_attr ib_qp_init_attr; + struct mlx4_ib_qp *qp; + struct mlx4_ib_create_wq ucmd; + int err, required_cmd_sz; + + if (!(udata && pd->uobject)) + return ERR_PTR(-EINVAL); + + required_cmd_sz = offsetof(typeof(ucmd), reserved) + + sizeof(ucmd.reserved); + if (udata->inlen < required_cmd_sz) { + pr_debug("invalid inlen\n"); + return ERR_PTR(-EINVAL); + } + + if (udata->inlen > sizeof(ucmd) && + !ib_is_udata_cleared(udata, sizeof(ucmd), + udata->inlen - sizeof(ucmd))) { + pr_debug("inlen is not supported\n"); + return ERR_PTR(-EOPNOTSUPP); + } + + if (udata->outlen) + return ERR_PTR(-EOPNOTSUPP); + + dev = to_mdev(pd->device); + + if (init_attr->wq_type != IB_WQT_RQ) { + pr_debug("unsupported wq type %d\n", init_attr->wq_type); + return ERR_PTR(-EOPNOTSUPP); + } + + if (init_attr->create_flags) { + pr_debug("unsupported create_flags %u\n", + init_attr->create_flags); + return ERR_PTR(-EOPNOTSUPP); + } + + qp = kzalloc(sizeof(*qp), GFP_KERNEL); + if (!qp) + return ERR_PTR(-ENOMEM); + + qp->pri.vid = 0xFFFF; + qp->alt.vid = 0xFFFF; + + memset(&ib_qp_init_attr, 0, sizeof(ib_qp_init_attr)); + ib_qp_init_attr.qp_context = init_attr->wq_context; + ib_qp_init_attr.qp_type = IB_QPT_RAW_PACKET; + ib_qp_init_attr.cap.max_recv_wr = init_attr->max_wr; + ib_qp_init_attr.cap.max_recv_sge = init_attr->max_sge; + ib_qp_init_attr.recv_cq = init_attr->cq; + ib_qp_init_attr.send_cq = ib_qp_init_attr.recv_cq; /* Dummy CQ */ + + err = create_qp_common(dev, pd, MLX4_IB_RWQ_SRC, &ib_qp_init_attr, + udata, 0, &qp); + if (err) { + kfree(qp); + return ERR_PTR(err); + } + + qp->ibwq.event_handler = init_attr->event_handler; + qp->ibwq.wq_num = qp->mqp.qpn; + qp->ibwq.state = IB_WQS_RESET; + + return &qp->ibwq; +} + +static int ib_wq2qp_state(enum ib_wq_state state) +{ + switch (state) { + case IB_WQS_RESET: + return IB_QPS_RESET; + case IB_WQS_RDY: + return IB_QPS_RTR; + default: + return IB_QPS_ERR; + } +} + +static int _mlx4_ib_modify_wq(struct ib_wq *ibwq, enum ib_wq_state new_state) +{ + struct mlx4_ib_qp *qp = to_mqp((struct ib_qp *)ibwq); + enum ib_qp_state qp_cur_state; + enum ib_qp_state qp_new_state; + int attr_mask; + int err; + + /* ib_qp.state represents the WQ HW state while ib_wq.state represents + * the WQ logic state. + */ + qp_cur_state = qp->state; + qp_new_state = ib_wq2qp_state(new_state); + + if (ib_wq2qp_state(new_state) == qp_cur_state) + return 0; + + if (new_state == IB_WQS_RDY) { + struct ib_qp_attr attr = {}; + + attr.port_num = qp->port; + attr_mask = IB_QP_PORT; + + err = __mlx4_ib_modify_qp(ibwq, MLX4_IB_RWQ_SRC, &attr, + attr_mask, IB_QPS_RESET, IB_QPS_INIT); + if (err) { + pr_debug("WQN=0x%06x failed to apply RST->INIT on the HW QP\n", + ibwq->wq_num); + return err; + } + + qp_cur_state = IB_QPS_INIT; + } + + attr_mask = 0; + err = __mlx4_ib_modify_qp(ibwq, MLX4_IB_RWQ_SRC, NULL, attr_mask, + qp_cur_state, qp_new_state); + + if (err && (qp_cur_state == IB_QPS_INIT)) { + qp_new_state = IB_QPS_RESET; + if (__mlx4_ib_modify_qp(ibwq, MLX4_IB_RWQ_SRC, NULL, + attr_mask, IB_QPS_INIT, IB_QPS_RESET)) { + pr_warn("WQN=0x%06x failed with reverting HW's resources failure\n", + ibwq->wq_num); + qp_new_state = IB_QPS_INIT; + } + } + + qp->state = qp_new_state; + + return err; +} + +int mlx4_ib_modify_wq(struct ib_wq *ibwq, struct ib_wq_attr *wq_attr, + u32 wq_attr_mask, struct ib_udata *udata) +{ + struct mlx4_ib_qp *qp = to_mqp((struct ib_qp *)ibwq); + struct mlx4_ib_modify_wq ucmd = {}; + size_t required_cmd_sz; + enum ib_wq_state cur_state, new_state; + int err = 0; + + required_cmd_sz = offsetof(typeof(ucmd), reserved) + + sizeof(ucmd.reserved); + if (udata->inlen < required_cmd_sz) + return -EINVAL; + + if (udata->inlen > sizeof(ucmd) && + !ib_is_udata_cleared(udata, sizeof(ucmd), + udata->inlen - sizeof(ucmd))) + return -EOPNOTSUPP; + + if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) + return -EFAULT; + + if (ucmd.comp_mask || ucmd.reserved) + return -EOPNOTSUPP; + + if (wq_attr_mask & IB_WQ_FLAGS) + return -EOPNOTSUPP; + + cur_state = wq_attr_mask & IB_WQ_CUR_STATE ? wq_attr->curr_wq_state : + ibwq->state; + new_state = wq_attr_mask & IB_WQ_STATE ? wq_attr->wq_state : cur_state; + + if (cur_state < IB_WQS_RESET || cur_state > IB_WQS_ERR || + new_state < IB_WQS_RESET || new_state > IB_WQS_ERR) + return -EINVAL; + + if ((new_state == IB_WQS_RDY) && (cur_state == IB_WQS_ERR)) + return -EINVAL; + + if ((new_state == IB_WQS_ERR) && (cur_state == IB_WQS_RESET)) + return -EINVAL; + + /* Can update HW state only if a RSS QP has already associated to this + * WQ, so we can apply its port on the WQ. + */ + if (qp->rss_usecnt) + err = _mlx4_ib_modify_wq(ibwq, new_state); + + if (!err) + ibwq->state = new_state; + + return err; +} + +int mlx4_ib_destroy_wq(struct ib_wq *ibwq) +{ + struct mlx4_ib_dev *dev = to_mdev(ibwq->device); + struct mlx4_ib_qp *qp = to_mqp((struct ib_qp *)ibwq); + + if (qp->counter_index) + mlx4_ib_free_qp_counter(dev, qp); + + destroy_qp_common(dev, qp, MLX4_IB_RWQ_SRC, 1); + + kfree(qp); + + return 0; +} diff --git a/include/uapi/rdma/mlx4-abi.h b/include/uapi/rdma/mlx4-abi.h index bf3bdba2f326..c9702a5f0bda 100644 --- a/include/uapi/rdma/mlx4-abi.h +++ b/include/uapi/rdma/mlx4-abi.h @@ -105,4 +105,18 @@ struct mlx4_ib_create_qp { __u8 reserved; }; +struct mlx4_ib_create_wq { + __u64 buf_addr; + __u64 db_addr; + __u8 log_range_size; + __u8 reserved[3]; + __u32 comp_mask; + __u32 reserved1; +}; + +struct mlx4_ib_modify_wq { + __u32 comp_mask; + __u32 reserved; +}; + #endif /* MLX4_ABI_USER_H */ -- cgit v1.2.3 From b8d46ca035060e70f5f0da849d86720752d5aa17 Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Tue, 4 Jul 2017 16:24:25 +0300 Subject: IB/mlx4: Add support for WQ indirection table related verbs To enable RSS functionality the IB indirection table object (i.e. ib_rwq_ind_table) should be used. This patch implements the related verbs as of create and destroy an indirection table. In downstream patches the indirection table will be used as part of RSS QP creation. Signed-off-by: Guy Levi Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/main.c | 12 +++++-- drivers/infiniband/hw/mlx4/mlx4_ib.h | 6 ++++ drivers/infiniband/hw/mlx4/qp.c | 70 ++++++++++++++++++++++++++++++++++++ include/uapi/rdma/mlx4-abi.h | 4 +++ 4 files changed, 89 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 7c6f929ebd3e..b42234571a8c 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2738,10 +2738,16 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.create_wq = mlx4_ib_create_wq; ibdev->ib_dev.modify_wq = mlx4_ib_modify_wq; ibdev->ib_dev.destroy_wq = mlx4_ib_destroy_wq; + ibdev->ib_dev.create_rwq_ind_table = + mlx4_ib_create_rwq_ind_table; + ibdev->ib_dev.destroy_rwq_ind_table = + mlx4_ib_destroy_rwq_ind_table; ibdev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ); + (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | + (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | + (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); } if (!mlx4_is_slave(ibdev->dev)) { diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index ba4e78c064d2..85525bc400a0 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -916,4 +916,10 @@ int mlx4_ib_destroy_wq(struct ib_wq *wq); int mlx4_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, u32 wq_attr_mask, struct ib_udata *udata); +struct ib_rwq_ind_table +*mlx4_ib_create_rwq_ind_table(struct ib_device *device, + struct ib_rwq_ind_table_init_attr *init_attr, + struct ib_udata *udata); +int mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table); + #endif /* MLX4_IB_H */ diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 65e4ec549368..519919d15474 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -3943,3 +3943,73 @@ int mlx4_ib_destroy_wq(struct ib_wq *ibwq) return 0; } + +struct ib_rwq_ind_table +*mlx4_ib_create_rwq_ind_table(struct ib_device *device, + struct ib_rwq_ind_table_init_attr *init_attr, + struct ib_udata *udata) +{ + struct ib_rwq_ind_table *rwq_ind_table; + struct mlx4_ib_create_rwq_ind_tbl_resp resp = {}; + unsigned int ind_tbl_size = 1 << init_attr->log_ind_tbl_size; + unsigned int base_wqn; + size_t min_resp_len; + int i; + int err; + + if (udata->inlen > 0 && + !ib_is_udata_cleared(udata, 0, + udata->inlen)) + return ERR_PTR(-EOPNOTSUPP); + + min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved); + if (udata->outlen && udata->outlen < min_resp_len) + return ERR_PTR(-EINVAL); + + if (ind_tbl_size > + device->attrs.rss_caps.max_rwq_indirection_table_size) { + pr_debug("log_ind_tbl_size = %d is bigger than supported = %d\n", + ind_tbl_size, + device->attrs.rss_caps.max_rwq_indirection_table_size); + return ERR_PTR(-EINVAL); + } + + base_wqn = init_attr->ind_tbl[0]->wq_num; + + if (base_wqn % ind_tbl_size) { + pr_debug("WQN=0x%x isn't aligned with indirection table size\n", + base_wqn); + return ERR_PTR(-EINVAL); + } + + for (i = 1; i < ind_tbl_size; i++) { + if (++base_wqn != init_attr->ind_tbl[i]->wq_num) { + pr_debug("indirection table's WQNs aren't consecutive\n"); + return ERR_PTR(-EINVAL); + } + } + + rwq_ind_table = kzalloc(sizeof(*rwq_ind_table), GFP_KERNEL); + if (!rwq_ind_table) + return ERR_PTR(-ENOMEM); + + if (udata->outlen) { + resp.response_length = offsetof(typeof(resp), response_length) + + sizeof(resp.response_length); + err = ib_copy_to_udata(udata, &resp, resp.response_length); + if (err) + goto err; + } + + return rwq_ind_table; + +err: + kfree(rwq_ind_table); + return ERR_PTR(err); +} + +int mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl) +{ + kfree(ib_rwq_ind_tbl); + return 0; +} diff --git a/include/uapi/rdma/mlx4-abi.h b/include/uapi/rdma/mlx4-abi.h index c9702a5f0bda..5591d955ba00 100644 --- a/include/uapi/rdma/mlx4-abi.h +++ b/include/uapi/rdma/mlx4-abi.h @@ -119,4 +119,8 @@ struct mlx4_ib_modify_wq { __u32 reserved; }; +struct mlx4_ib_create_rwq_ind_tbl_resp { + __u32 response_length; + __u32 reserved; +}; #endif /* MLX4_ABI_USER_H */ -- cgit v1.2.3 From 3078f5f1bd8b6c8aef77b8ef4d49671fa6eb058e Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Tue, 4 Jul 2017 16:24:26 +0300 Subject: IB/mlx4: Add support for RSS QP Add support to work with a RSS QP by using an indirection table object upon QP creation. Other related QP verbs (e.g. modify/destroy/query) were updated as well for that QP mode. Notes: - The RX hash properties are supplied as driver private data. - The RSS QP port is used on the associated WQs in its indirection table. Applying different ports during WQ life time is not allowed. - The expected RSS QP flow is: create, modify(RST->INIT), modify(RST->RTR), destroy. Signed-off-by: Guy Levi Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/mlx4_ib.h | 8 + drivers/infiniband/hw/mlx4/qp.c | 453 +++++++++++++++++++++++++++++++++-- include/uapi/rdma/mlx4-abi.h | 33 +++ 3 files changed, 472 insertions(+), 22 deletions(-) (limited to 'include/uapi') diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 85525bc400a0..1fa19820355a 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -46,6 +46,7 @@ #include #include +#include #define MLX4_IB_DRV_NAME "mlx4_ib" @@ -299,6 +300,12 @@ struct mlx4_wqn_range { struct list_head list; }; +struct mlx4_ib_rss { + unsigned int base_qpn_tbl_sz; + u8 flags; + u8 rss_key[MLX4_EN_RSS_KEY_SIZE]; +}; + struct mlx4_ib_qp { union { struct ib_qp ibqp; @@ -345,6 +352,7 @@ struct mlx4_ib_qp { struct mlx4_wqn_range *wqn_range; /* Number of RSS QP parents that uses this WQ */ u32 rss_usecnt; + struct mlx4_ib_rss *rss_ctx; }; struct mlx4_ib_srq { diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 519919d15474..e42acfb20588 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -53,6 +53,7 @@ static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq); static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq); +static int _mlx4_ib_modify_wq(struct ib_wq *ibwq, enum ib_wq_state new_state); enum { MLX4_IB_ACK_REQ_FREQ = 8, @@ -650,6 +651,212 @@ static void mlx4_ib_free_qp_counter(struct mlx4_ib_dev *dev, qp->counter_index = NULL; } +static int set_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_rss *rss_ctx, + struct ib_qp_init_attr *init_attr, + struct mlx4_ib_create_qp_rss *ucmd) +{ + rss_ctx->base_qpn_tbl_sz = init_attr->rwq_ind_tbl->ind_tbl[0]->wq_num | + (init_attr->rwq_ind_tbl->log_ind_tbl_size << 24); + + if ((ucmd->rx_hash_function == MLX4_IB_RX_HASH_FUNC_TOEPLITZ) && + (dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS_TOP)) { + memcpy(rss_ctx->rss_key, ucmd->rx_hash_key, + MLX4_EN_RSS_KEY_SIZE); + } else { + pr_debug("RX Hash function is not supported\n"); + return (-EOPNOTSUPP); + } + + if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_IPV4) && + (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_IPV4)) { + rss_ctx->flags = MLX4_RSS_IPV4; + } else if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_IPV4) || + (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_IPV4)) { + pr_debug("RX Hash fields_mask is not supported - both IPv4 SRC and DST must be set\n"); + return (-EOPNOTSUPP); + } + + if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_IPV6) && + (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_IPV6)) { + rss_ctx->flags |= MLX4_RSS_IPV6; + } else if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_IPV6) || + (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_IPV6)) { + pr_debug("RX Hash fields_mask is not supported - both IPv6 SRC and DST must be set\n"); + return (-EOPNOTSUPP); + } + + if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_PORT_UDP) && + (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_PORT_UDP)) { + if (!(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UDP_RSS)) { + pr_debug("RX Hash fields_mask for UDP is not supported\n"); + return (-EOPNOTSUPP); + } + + if (rss_ctx->flags & MLX4_RSS_IPV4) { + rss_ctx->flags |= MLX4_RSS_UDP_IPV4; + } else if (rss_ctx->flags & MLX4_RSS_IPV6) { + rss_ctx->flags |= MLX4_RSS_UDP_IPV6; + } else { + pr_debug("RX Hash fields_mask is not supported - UDP must be set with IPv4 or IPv6\n"); + return (-EOPNOTSUPP); + } + } else if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_PORT_UDP) || + (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_PORT_UDP)) { + pr_debug("RX Hash fields_mask is not supported - both UDP SRC and DST must be set\n"); + return (-EOPNOTSUPP); + } + + if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_PORT_TCP) && + (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_PORT_TCP)) { + if (rss_ctx->flags & MLX4_RSS_IPV4) { + rss_ctx->flags |= MLX4_RSS_TCP_IPV4; + } else if (rss_ctx->flags & MLX4_RSS_IPV6) { + rss_ctx->flags |= MLX4_RSS_TCP_IPV6; + } else { + pr_debug("RX Hash fields_mask is not supported - TCP must be set with IPv4 or IPv6\n"); + return (-EOPNOTSUPP); + } + + } else if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_PORT_TCP) || + (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_PORT_TCP)) { + pr_debug("RX Hash fields_mask is not supported - both TCP SRC and DST must be set\n"); + return (-EOPNOTSUPP); + } + + return 0; +} + +static int create_qp_rss(struct mlx4_ib_dev *dev, struct ib_pd *ibpd, + struct ib_qp_init_attr *init_attr, + struct mlx4_ib_create_qp_rss *ucmd, + struct mlx4_ib_qp *qp) +{ + int qpn; + int err; + + qp->mqp.usage = MLX4_RES_USAGE_USER_VERBS; + + err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn, 0, qp->mqp.usage); + if (err) + return err; + + err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp); + if (err) + goto err_qpn; + + mutex_init(&qp->mutex); + + INIT_LIST_HEAD(&qp->gid_list); + INIT_LIST_HEAD(&qp->steering_rules); + + qp->mlx4_ib_qp_type = MLX4_IB_QPT_RAW_ETHERTYPE; + qp->state = IB_QPS_RESET; + + /* Set dummy send resources to be compatible with HV and PRM */ + qp->sq_no_prefetch = 1; + qp->sq.wqe_cnt = 1; + qp->sq.wqe_shift = MLX4_IB_MIN_SQ_STRIDE; + qp->buf_size = qp->sq.wqe_cnt << MLX4_IB_MIN_SQ_STRIDE; + qp->mtt = (to_mqp( + (struct ib_qp *)init_attr->rwq_ind_tbl->ind_tbl[0]))->mtt; + + qp->rss_ctx = kzalloc(sizeof(*qp->rss_ctx), GFP_KERNEL); + if (!qp->rss_ctx) { + err = -ENOMEM; + goto err_qp_alloc; + } + + err = set_qp_rss(dev, qp->rss_ctx, init_attr, ucmd); + if (err) + goto err; + + return 0; + +err: + kfree(qp->rss_ctx); + +err_qp_alloc: + mlx4_qp_remove(dev->dev, &qp->mqp); + mlx4_qp_free(dev->dev, &qp->mqp); + +err_qpn: + mlx4_qp_release_range(dev->dev, qpn, 1); + return err; +} + +static struct ib_qp *_mlx4_ib_create_qp_rss(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) +{ + struct mlx4_ib_qp *qp; + struct mlx4_ib_create_qp_rss ucmd = {}; + size_t required_cmd_sz; + int err; + + if (!udata) { + pr_debug("RSS QP with NULL udata\n"); + return ERR_PTR(-EINVAL); + } + + if (udata->outlen) + return ERR_PTR(-EOPNOTSUPP); + + required_cmd_sz = offsetof(typeof(ucmd), reserved1) + + sizeof(ucmd.reserved1); + if (udata->inlen < required_cmd_sz) { + pr_debug("invalid inlen\n"); + return ERR_PTR(-EINVAL); + } + + if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) { + pr_debug("copy failed\n"); + return ERR_PTR(-EFAULT); + } + + if (ucmd.comp_mask || ucmd.reserved1) + return ERR_PTR(-EOPNOTSUPP); + + if (udata->inlen > sizeof(ucmd) && + !ib_is_udata_cleared(udata, sizeof(ucmd), + udata->inlen - sizeof(ucmd))) { + pr_debug("inlen is not supported\n"); + return ERR_PTR(-EOPNOTSUPP); + } + + if (init_attr->qp_type != IB_QPT_RAW_PACKET) { + pr_debug("RSS QP with unsupported QP type %d\n", + init_attr->qp_type); + return ERR_PTR(-EOPNOTSUPP); + } + + if (init_attr->create_flags) { + pr_debug("RSS QP doesn't support create flags\n"); + return ERR_PTR(-EOPNOTSUPP); + } + + if (init_attr->send_cq || init_attr->cap.max_send_wr) { + pr_debug("RSS QP with unsupported send attributes\n"); + return ERR_PTR(-EOPNOTSUPP); + } + + qp = kzalloc(sizeof(*qp), GFP_KERNEL); + if (!qp) + return ERR_PTR(-ENOMEM); + + qp->pri.vid = 0xFFFF; + qp->alt.vid = 0xFFFF; + + err = create_qp_rss(to_mdev(pd->device), pd, init_attr, &ucmd, qp); + if (err) { + kfree(qp); + return ERR_PTR(err); + } + + qp->ibqp.qp_num = qp->mqp.qpn; + + return &qp->ibqp; +} + /* * This function allocates a WQN from a range which is consecutive and aligned * to its size. In case the range is full, then it creates a new range and @@ -1186,6 +1393,36 @@ static void get_cqs(struct mlx4_ib_qp *qp, enum mlx4_ib_source_type src, } } +static void destroy_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) +{ + if (qp->state != IB_QPS_RESET) { + int i; + + for (i = 0; i < (1 << qp->ibqp.rwq_ind_tbl->log_ind_tbl_size); + i++) { + struct ib_wq *ibwq = qp->ibqp.rwq_ind_tbl->ind_tbl[i]; + struct mlx4_ib_qp *wq = to_mqp((struct ib_qp *)ibwq); + + mutex_lock(&wq->mutex); + + wq->rss_usecnt--; + + mutex_unlock(&wq->mutex); + } + + if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state), + MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp)) + pr_warn("modify QP %06x to RESET failed.\n", + qp->mqp.qpn); + } + + mlx4_qp_remove(dev->dev, &qp->mqp); + mlx4_qp_free(dev->dev, &qp->mqp); + mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1); + del_gid_entries(qp); + kfree(qp->rss_ctx); +} + static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, enum mlx4_ib_source_type src, int is_user) { @@ -1303,6 +1540,9 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, int sup_u_create_flags = MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK; u16 xrcdn = 0; + if (init_attr->rwq_ind_tbl) + return _mlx4_ib_create_qp_rss(pd, init_attr, udata); + /* * We only support LSO, vendor flag1, and multicast loopback blocking, * and only for kernel UD QPs. @@ -1444,7 +1684,6 @@ static int _mlx4_ib_destroy_qp(struct ib_qp *qp) { struct mlx4_ib_dev *dev = to_mdev(qp->device); struct mlx4_ib_qp *mqp = to_mqp(qp); - struct mlx4_ib_pd *pd; if (is_qp0(dev, mqp)) mlx4_CLOSE_PORT(dev->dev, mqp->port); @@ -1459,8 +1698,14 @@ static int _mlx4_ib_destroy_qp(struct ib_qp *qp) if (mqp->counter_index) mlx4_ib_free_qp_counter(dev, mqp); - pd = get_pd(mqp); - destroy_qp_common(dev, mqp, MLX4_IB_QP_SRC, !!pd->ibpd.uobject); + if (qp->rwq_ind_tbl) { + destroy_qp_rss(dev, mqp); + } else { + struct mlx4_ib_pd *pd; + + pd = get_pd(mqp); + destroy_qp_common(dev, mqp, MLX4_IB_QP_SRC, !!pd->ibpd.uobject); + } if (is_sqp(dev, mqp)) kfree(to_msqp(mqp)); @@ -1783,12 +2028,116 @@ static u8 gid_type_to_qpc(enum ib_gid_type gid_type) } } +/* + * Go over all RSS QP's childes (WQs) and apply their HW state according to + * their logic state if the RSS QP is the first RSS QP associated for the WQ. + */ +static int bringup_rss_rwqs(struct ib_rwq_ind_table *ind_tbl, u8 port_num) +{ + int i; + int err; + + for (i = 0; i < (1 << ind_tbl->log_ind_tbl_size); i++) { + struct ib_wq *ibwq = ind_tbl->ind_tbl[i]; + struct mlx4_ib_qp *wq = to_mqp((struct ib_qp *)ibwq); + + mutex_lock(&wq->mutex); + + /* Mlx4_ib restrictions: + * WQ's is associated to a port according to the RSS QP it is + * associates to. + * In case the WQ is associated to a different port by another + * RSS QP, return a failure. + */ + if ((wq->rss_usecnt > 0) && (wq->port != port_num)) { + err = -EINVAL; + mutex_unlock(&wq->mutex); + break; + } + wq->port = port_num; + if ((wq->rss_usecnt == 0) && (ibwq->state == IB_WQS_RDY)) { + err = _mlx4_ib_modify_wq(ibwq, IB_WQS_RDY); + if (err) { + mutex_unlock(&wq->mutex); + break; + } + } + wq->rss_usecnt++; + + mutex_unlock(&wq->mutex); + } + + if (i && err) { + int j; + + for (j = (i - 1); j >= 0; j--) { + struct ib_wq *ibwq = ind_tbl->ind_tbl[j]; + struct mlx4_ib_qp *wq = to_mqp((struct ib_qp *)ibwq); + + mutex_lock(&wq->mutex); + + if ((wq->rss_usecnt == 1) && + (ibwq->state == IB_WQS_RDY)) + if (_mlx4_ib_modify_wq(ibwq, IB_WQS_RESET)) + pr_warn("failed to reverse WQN=0x%06x\n", + ibwq->wq_num); + wq->rss_usecnt--; + + mutex_unlock(&wq->mutex); + } + } + + return err; +} + +static void bring_down_rss_rwqs(struct ib_rwq_ind_table *ind_tbl) +{ + int i; + + for (i = 0; i < (1 << ind_tbl->log_ind_tbl_size); i++) { + struct ib_wq *ibwq = ind_tbl->ind_tbl[i]; + struct mlx4_ib_qp *wq = to_mqp((struct ib_qp *)ibwq); + + mutex_lock(&wq->mutex); + + if ((wq->rss_usecnt == 1) && (ibwq->state == IB_WQS_RDY)) + if (_mlx4_ib_modify_wq(ibwq, IB_WQS_RESET)) + pr_warn("failed to reverse WQN=%x\n", + ibwq->wq_num); + wq->rss_usecnt--; + + mutex_unlock(&wq->mutex); + } +} + +static void fill_qp_rss_context(struct mlx4_qp_context *context, + struct mlx4_ib_qp *qp) +{ + struct mlx4_rss_context *rss_context; + + rss_context = (void *)context + offsetof(struct mlx4_qp_context, + pri_path) + MLX4_RSS_OFFSET_IN_QPC_PRI_PATH; + + rss_context->base_qpn = cpu_to_be32(qp->rss_ctx->base_qpn_tbl_sz); + rss_context->default_qpn = + cpu_to_be32(qp->rss_ctx->base_qpn_tbl_sz & 0xffffff); + if (qp->rss_ctx->flags & (MLX4_RSS_UDP_IPV4 | MLX4_RSS_UDP_IPV6)) + rss_context->base_qpn_udp = rss_context->default_qpn; + rss_context->flags = qp->rss_ctx->flags; + /* Currently support just toeplitz */ + rss_context->hash_fn = MLX4_RSS_HASH_TOP; + + memcpy(rss_context->rss_key, qp->rss_ctx->rss_key, + MLX4_EN_RSS_KEY_SIZE); +} + static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state) { struct ib_uobject *ibuobject; struct ib_srq *ibsrq; + struct ib_rwq_ind_table *rwq_ind_tbl; enum ib_qp_type qp_type; struct mlx4_ib_dev *dev; struct mlx4_ib_qp *qp; @@ -1804,23 +2153,25 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, if (src_type == MLX4_IB_RWQ_SRC) { struct ib_wq *ibwq; - ibwq = (struct ib_wq *)src; - ibuobject = ibwq->uobject; - ibsrq = NULL; - qp_type = IB_QPT_RAW_PACKET; - qp = to_mqp((struct ib_qp *)ibwq); - dev = to_mdev(ibwq->device); - pd = to_mpd(ibwq->pd); + ibwq = (struct ib_wq *)src; + ibuobject = ibwq->uobject; + ibsrq = NULL; + rwq_ind_tbl = NULL; + qp_type = IB_QPT_RAW_PACKET; + qp = to_mqp((struct ib_qp *)ibwq); + dev = to_mdev(ibwq->device); + pd = to_mpd(ibwq->pd); } else { struct ib_qp *ibqp; - ibqp = (struct ib_qp *)src; - ibuobject = ibqp->uobject; - ibsrq = ibqp->srq; - qp_type = ibqp->qp_type; - qp = to_mqp(ibqp); - dev = to_mdev(ibqp->device); - pd = get_pd(qp); + ibqp = (struct ib_qp *)src; + ibuobject = ibqp->uobject; + ibsrq = ibqp->srq; + rwq_ind_tbl = ibqp->rwq_ind_tbl; + qp_type = ibqp->qp_type; + qp = to_mqp(ibqp); + dev = to_mdev(ibqp->device); + pd = get_pd(qp); } /* APM is not supported under RoCE */ @@ -1836,6 +2187,11 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) | (to_mlx4_st(dev, qp->mlx4_ib_qp_type) << 16)); + if (rwq_ind_tbl) { + fill_qp_rss_context(context, qp); + context->flags |= cpu_to_be32(1 << MLX4_RSS_QPC_FLAG_OFFSET); + } + if (!(attr_mask & IB_QP_PATH_MIG_STATE)) context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11); else { @@ -1876,9 +2232,11 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, ilog2(dev->dev->caps.max_msg_sz); } - if (qp->rq.wqe_cnt) - context->rq_size_stride = ilog2(qp->rq.wqe_cnt) << 3; - context->rq_size_stride |= qp->rq.wqe_shift - 4; + if (!rwq_ind_tbl) { /* PRM RSS receive side should be left zeros */ + if (qp->rq.wqe_cnt) + context->rq_size_stride = ilog2(qp->rq.wqe_cnt) << 3; + context->rq_size_stride |= qp->rq.wqe_shift - 4; + } if (qp->sq.wqe_cnt) context->sq_size_stride = ilog2(qp->sq.wqe_cnt) << 3; @@ -2031,8 +2389,14 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH; } - get_cqs(qp, src_type, &send_cq, &recv_cq); - context->pd = cpu_to_be32(pd->pdn); + context->pd = cpu_to_be32(pd->pdn); + + if (!rwq_ind_tbl) { + get_cqs(qp, src_type, &send_cq, &recv_cq); + } else { /* Set dummy CQs to be compatible with HV and PRM */ + send_cq = to_mcq(rwq_ind_tbl->ind_tbl[0]->cq); + recv_cq = send_cq; + } context->cqn_send = cpu_to_be32(send_cq->mcq.cqn); context->cqn_recv = cpu_to_be32(recv_cq->mcq.cqn); context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28); @@ -2358,6 +2722,11 @@ out: return err; } +enum { + MLX4_IB_MODIFY_QP_RSS_SUP_ATTR_MSK = (IB_QP_STATE | + IB_QP_PORT), +}; + static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { @@ -2388,6 +2757,27 @@ static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto out; } + if (ibqp->rwq_ind_tbl) { + if (!(((cur_state == IB_QPS_RESET) && + (new_state == IB_QPS_INIT)) || + ((cur_state == IB_QPS_INIT) && + (new_state == IB_QPS_RTR)))) { + pr_debug("qpn 0x%x: RSS QP unsupported transition %d to %d\n", + ibqp->qp_num, cur_state, new_state); + + err = -EOPNOTSUPP; + goto out; + } + + if (attr_mask & ~MLX4_IB_MODIFY_QP_RSS_SUP_ATTR_MSK) { + pr_debug("qpn 0x%x: RSS QP unsupported attribute mask 0x%x for transition %d to %d\n", + ibqp->qp_num, attr_mask, cur_state, new_state); + + err = -EOPNOTSUPP; + goto out; + } + } + if (mlx4_is_bonded(dev->dev) && (attr_mask & IB_QP_PORT)) { if ((cur_state == IB_QPS_RESET) && (new_state == IB_QPS_INIT)) { if ((ibqp->qp_type == IB_QPT_RC) || @@ -2452,9 +2842,18 @@ static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto out; } + if (ibqp->rwq_ind_tbl && (new_state == IB_QPS_INIT)) { + err = bringup_rss_rwqs(ibqp->rwq_ind_tbl, attr->port_num); + if (err) + goto out; + } + err = __mlx4_ib_modify_qp(ibqp, MLX4_IB_QP_SRC, attr, attr_mask, cur_state, new_state); + if (ibqp->rwq_ind_tbl && err) + bring_down_rss_rwqs(ibqp->rwq_ind_tbl); + if (mlx4_is_bonded(dev->dev) && (attr_mask & IB_QP_PORT)) attr->port_num = 1; @@ -3643,6 +4042,9 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr int mlx4_state; int err = 0; + if (ibqp->rwq_ind_tbl) + return -EOPNOTSUPP; + mutex_lock(&qp->mutex); if (qp->state == IB_QPS_RESET) { @@ -3917,6 +4319,11 @@ int mlx4_ib_modify_wq(struct ib_wq *ibwq, struct ib_wq_attr *wq_attr, if ((new_state == IB_WQS_ERR) && (cur_state == IB_WQS_RESET)) return -EINVAL; + /* Need to protect against the parent RSS which also may modify WQ + * state. + */ + mutex_lock(&qp->mutex); + /* Can update HW state only if a RSS QP has already associated to this * WQ, so we can apply its port on the WQ. */ @@ -3926,6 +4333,8 @@ int mlx4_ib_modify_wq(struct ib_wq *ibwq, struct ib_wq_attr *wq_attr, if (!err) ibwq->state = new_state; + mutex_unlock(&qp->mutex); + return err; } diff --git a/include/uapi/rdma/mlx4-abi.h b/include/uapi/rdma/mlx4-abi.h index 5591d955ba00..d915cab37ec3 100644 --- a/include/uapi/rdma/mlx4-abi.h +++ b/include/uapi/rdma/mlx4-abi.h @@ -95,6 +95,16 @@ struct mlx4_ib_create_srq_resp { __u32 reserved; }; +struct mlx4_ib_create_qp_rss { + __u64 rx_hash_fields_mask; + __u8 rx_hash_function; + __u8 rx_key_len; + __u8 reserved[6]; + __u8 rx_hash_key[40]; + __u32 comp_mask; + __u32 reserved1; +}; + struct mlx4_ib_create_qp { __u64 buf_addr; __u64 db_addr; @@ -123,4 +133,27 @@ struct mlx4_ib_create_rwq_ind_tbl_resp { __u32 response_length; __u32 reserved; }; + +/* RX Hash function flags */ +enum mlx4_ib_rx_hash_function_flags { + MLX4_IB_RX_HASH_FUNC_TOEPLITZ = 1 << 0, +}; + +/* + * RX Hash flags, these flags allows to set which incoming packet's field should + * participates in RX Hash. Each flag represent certain packet's field, + * when the flag is set the field that is represented by the flag will + * participate in RX Hash calculation. + */ +enum mlx4_ib_rx_hash_fields { + MLX4_IB_RX_HASH_SRC_IPV4 = 1 << 0, + MLX4_IB_RX_HASH_DST_IPV4 = 1 << 1, + MLX4_IB_RX_HASH_SRC_IPV6 = 1 << 2, + MLX4_IB_RX_HASH_DST_IPV6 = 1 << 3, + MLX4_IB_RX_HASH_SRC_PORT_TCP = 1 << 4, + MLX4_IB_RX_HASH_DST_PORT_TCP = 1 << 5, + MLX4_IB_RX_HASH_SRC_PORT_UDP = 1 << 6, + MLX4_IB_RX_HASH_DST_PORT_UDP = 1 << 7 +}; + #endif /* MLX4_ABI_USER_H */ -- cgit v1.2.3 From ad84dad2160d5f36bb471b391462d651c887d693 Mon Sep 17 00:00:00 2001 From: "Amrani, Ram" Date: Mon, 26 Jun 2017 19:05:05 +0300 Subject: RDMA/qedr: notify user application if DPM is supported Direct Packet Mode support may be disabled, e.g, due to limited resources. Notifying the user application prevents wasting cycles on attempting to send these kind of packets. Signed-off-by: Ram Amrani Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qedr/main.c | 1 + drivers/infiniband/hw/qedr/qedr.h | 2 ++ drivers/infiniband/hw/qedr/verbs.c | 1 + include/uapi/rdma/qedr-abi.h | 1 + 4 files changed, 5 insertions(+) (limited to 'include/uapi') diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 0ae30f5c8cbc..199b6edbef92 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -777,6 +777,7 @@ static struct qedr_dev *qedr_add(struct qed_dev *cdev, struct pci_dev *pdev, if (rc) goto init_err; + dev->user_dpm_enabled = dev_info.user_dpm_enabled; dev->num_hwfns = dev_info.common.num_hwfns; dev->rdma_ctx = dev->ops->rdma_get_rdma_ctx(cdev); diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 392e76e26840..b2bb42e2805d 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -162,6 +162,8 @@ struct qedr_dev { struct qedr_qp *gsi_qp; unsigned long enet_state; + + u8 user_dpm_enabled; }; #define QEDR_MAX_SQ_PBL (0x8000) diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 2ae71b8f1ba8..4322ee00498e 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -376,6 +376,7 @@ struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *ibdev, memset(&uresp, 0, sizeof(uresp)); + uresp.dpm_enabled = dev->user_dpm_enabled; uresp.db_pa = ctx->dpi_phys_addr; uresp.db_size = ctx->dpi_size; uresp.max_send_wr = dev->attr.max_sqe; diff --git a/include/uapi/rdma/qedr-abi.h b/include/uapi/rdma/qedr-abi.h index 75c270d839c8..2684004ec4fd 100644 --- a/include/uapi/rdma/qedr-abi.h +++ b/include/uapi/rdma/qedr-abi.h @@ -49,6 +49,7 @@ struct qedr_alloc_ucontext_resp { __u32 sges_per_recv_wr; __u32 sges_per_srq_wr; __u32 max_cqes; + __u8 dpm_enabled; }; struct qedr_alloc_pd_ureq { -- cgit v1.2.3 From 67cbe3532c2cd84303a2073cedad6b8bcad13be3 Mon Sep 17 00:00:00 2001 From: "Amrani, Ram" Date: Mon, 26 Jun 2017 19:05:06 +0300 Subject: RDMA/qedr: notify user application of supported WIDs The number of supported WIDs, if they are supported at all, can be limited due to resources. Notifying the user space application the number of available WIDs allows it to utilize them correctly. Signed-off-by: Ram Amrani Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qedr/verbs.c | 2 ++ include/uapi/rdma/qedr-abi.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 4322ee00498e..9ee2dce3e5bb 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -377,6 +377,8 @@ struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *ibdev, memset(&uresp, 0, sizeof(uresp)); uresp.dpm_enabled = dev->user_dpm_enabled; + uresp.wids_enabled = 1; + uresp.wid_count = oparams.wid_count; uresp.db_pa = ctx->dpi_phys_addr; uresp.db_size = ctx->dpi_size; uresp.max_send_wr = dev->attr.max_sqe; diff --git a/include/uapi/rdma/qedr-abi.h b/include/uapi/rdma/qedr-abi.h index 2684004ec4fd..54b64357ab24 100644 --- a/include/uapi/rdma/qedr-abi.h +++ b/include/uapi/rdma/qedr-abi.h @@ -50,6 +50,8 @@ struct qedr_alloc_ucontext_resp { __u32 sges_per_srq_wr; __u32 max_cqes; __u8 dpm_enabled; + __u8 wids_enabled; + __u16 wid_count; }; struct qedr_alloc_pd_ureq { -- cgit v1.2.3 From 1a6e7c31d71db34d1b9bc3acc87eaea6c2ecc997 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 20 Jun 2017 07:55:53 +0300 Subject: RDMA/netlink: Add netlink device definitions to UAPI Introduce new defines to rdma_netlink.h, so the RDMA configuration tool will be able to communicate with RDMA subsystem by using the shared defines. The addition of new client (NLDEV) revealed the fact that we exposed by mistake the RDMA_NL_I40IW define which is not backed by any RDMA netlink by now and it won't be exposed in the future too. So this patch reuses the value and deletes the old defines. The NLDEV operates with objects. The struct ib_device has two straightforward objects: device itself and ports of that device. This brings us to propose the following commands to work on those objects: * RDMA_NLDEV_CMD_{GET,SET,NEW,DEL} - works on ib_device itself * RDMA_NLDEV_CMD_PORT_{GET,SET,NEW,DEL} - works on ports of specific ib_device Those commands receive/return the device index (RDMA_NLDEV_ATTR_DEV_INDEX) and port index (RDMA_NLDEV_ATTR_PORT_INDEX). For device object accesses, the RDMA_NLDEV_ATTR_PORT_INDEX will return the maximum number of ports for specific ib_device and for port access the actual port index. The port index starts from 1 to follow RDMA/core internal semantics and the sysfs exposed knobs. Signed-off-by: Leon Romanovsky Reviewed-by: Steve Wise --- drivers/infiniband/core/netlink.c | 2 +- include/uapi/rdma/rdma_netlink.h | 39 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 39 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index cd692bd73793..27352a352770 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -61,7 +61,7 @@ static bool is_nl_msg_valid(unsigned int type, unsigned int op) RDMA_NL_IWPM_NUM_OPS, 0, RDMA_NL_LS_NUM_OPS, - 0 }; + RDMA_NLDEV_NUM_OPS }; /* * This BUILD_BUG_ON is intended to catch addition of new diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 02fe8390c18f..a44229fa5eca 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -8,7 +8,7 @@ enum { RDMA_NL_IWCM, RDMA_NL_RSVD, RDMA_NL_LS, /* RDMA Local Services */ - RDMA_NL_I40IW, + RDMA_NL_NLDEV, /* RDMA device interface */ RDMA_NL_NUM_CLIENTS }; @@ -222,4 +222,41 @@ struct rdma_nla_ls_gid { __u8 gid[16]; }; +enum rdma_nldev_command { + RDMA_NLDEV_CMD_UNSPEC, + + RDMA_NLDEV_CMD_GET, /* can dump */ + RDMA_NLDEV_CMD_SET, + RDMA_NLDEV_CMD_NEW, + RDMA_NLDEV_CMD_DEL, + + RDMA_NLDEV_CMD_PORT_GET, /* can dump */ + RDMA_NLDEV_CMD_PORT_SET, + RDMA_NLDEV_CMD_PORT_NEW, + RDMA_NLDEV_CMD_PORT_DEL, + + RDMA_NLDEV_NUM_OPS +}; + +enum rdma_nldev_attr { + /* don't change the order or add anything between, this is ABI! */ + RDMA_NLDEV_ATTR_UNSPEC, + + /* Identifier for ib_device */ + RDMA_NLDEV_ATTR_DEV_INDEX, /* u32 */ + + RDMA_NLDEV_ATTR_DEV_NAME, /* string */ + /* + * Device index together with port index are identifiers + * for port/link properties. + * + * For RDMA_NLDEV_CMD_GET commamnd, port index will return number + * of available ports in ib_device, while for port specific operations, + * it will be real port index as it appears in sysfs. Port index follows + * sysfs notation and starts from 1 for the first port. + */ + RDMA_NLDEV_ATTR_PORT_INDEX, /* u32 */ + + RDMA_NLDEV_ATTR_MAX +}; #endif /* _UAPI_RDMA_NETLINK_H */ -- cgit v1.2.3 From ac50525374315b9b609747f83b07f8dccb06b722 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 20 Jun 2017 14:47:08 +0300 Subject: RDMA/netlink: Expose device and port capability masks The port capability mask is exposed to user space via sysfs interface, while device capabilities are available for verbs only. This patch provides those capabilities through netlink interface. Signed-off-by: Leon Romanovsky Reviewed-by: Steve Wise --- drivers/infiniband/core/nldev.c | 19 +++++++++++++++++++ include/uapi/rdma/rdma_netlink.h | 5 +++++ 2 files changed, 24 insertions(+) (limited to 'include/uapi') diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index db9d9ffc1415..94c1e49074f5 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -50,18 +50,37 @@ static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) return -EMSGSIZE; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, rdma_end_port(device))) return -EMSGSIZE; + + BUILD_BUG_ON(sizeof(device->attrs.device_cap_flags) != sizeof(u64)); + if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS, + device->attrs.device_cap_flags, 0)) + return -EMSGSIZE; + return 0; } static int fill_port_info(struct sk_buff *msg, struct ib_device *device, u32 port) { + struct ib_port_attr attr; + int ret; + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index)) return -EMSGSIZE; if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, device->name)) return -EMSGSIZE; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) return -EMSGSIZE; + + ret = ib_query_port(device, port, &attr); + if (ret) + return ret; + + BUILD_BUG_ON(sizeof(attr.port_cap_flags) > sizeof(u64)); + if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS, + (u64)attr.port_cap_flags, 0)) + return -EMSGSIZE; + return 0; } diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index a44229fa5eca..90de11db6580 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -257,6 +257,11 @@ enum rdma_nldev_attr { */ RDMA_NLDEV_ATTR_PORT_INDEX, /* u32 */ + /* + * Device and port capabilities + */ + RDMA_NLDEV_ATTR_CAP_FLAGS, /* u64 */ + RDMA_NLDEV_ATTR_MAX }; #endif /* _UAPI_RDMA_NETLINK_H */ -- cgit v1.2.3 From 8621a7e3c1c22e18385c9ced1647363884ea2aa1 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 27 Jun 2017 16:58:59 +0300 Subject: RDMA/netlink: Export FW version Add FW version to the device properties exported by RDMA netlink, to be used by RDMAtool. Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/nldev.c | 9 +++++++++ include/uapi/rdma/rdma_netlink.h | 4 ++++ 2 files changed, 13 insertions(+) (limited to 'include/uapi') diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 94c1e49074f5..cdc970ca5a1b 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -40,10 +40,14 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, .len = IB_DEVICE_NAME_MAX - 1}, [RDMA_NLDEV_ATTR_PORT_INDEX] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_FW_VERSION] = { .type = NLA_NUL_STRING, + .len = IB_FW_VERSION_NAME_MAX - 1}, }; static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) { + char fw[IB_FW_VERSION_NAME_MAX]; + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index)) return -EMSGSIZE; if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, device->name)) @@ -56,6 +60,11 @@ static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) device->attrs.device_cap_flags, 0)) return -EMSGSIZE; + ib_get_device_fw_str(device, fw); + /* Device without FW has strlen(fw) */ + if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw)) + return -EMSGSIZE; + return 0; } diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 90de11db6580..5159858730b0 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -262,6 +262,10 @@ enum rdma_nldev_attr { */ RDMA_NLDEV_ATTR_CAP_FLAGS, /* u64 */ + /* + * FW version + */ + RDMA_NLDEV_ATTR_FW_VERSION, /* string */ RDMA_NLDEV_ATTR_MAX }; #endif /* _UAPI_RDMA_NETLINK_H */ -- cgit v1.2.3 From 1aaff896ca6b968a639e3e1e72ba6146ba332501 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 28 Jun 2017 14:01:37 +0300 Subject: RDMA/netlink: Export node_guid and sys_image_guid Add Node GUID and system image GUID to the device properties exported by RDMA netlink, to be used by RDMAtool. Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/nldev.c | 8 ++++++++ include/uapi/rdma/rdma_netlink.h | 13 +++++++++++++ 2 files changed, 21 insertions(+) (limited to 'include/uapi') diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index cdc970ca5a1b..f932c2c3fad0 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -42,6 +42,8 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_ATTR_PORT_INDEX] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_FW_VERSION] = { .type = NLA_NUL_STRING, .len = IB_FW_VERSION_NAME_MAX - 1}, + [RDMA_NLDEV_ATTR_NODE_GUID] = { .type = NLA_U64 }, + [RDMA_NLDEV_ATTR_SYS_IMAGE_GUID] = { .type = NLA_U64 }, }; static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) @@ -65,6 +67,12 @@ static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw)) return -EMSGSIZE; + if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_NODE_GUID, + be64_to_cpu(device->node_guid), 0)) + return -EMSGSIZE; + if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SYS_IMAGE_GUID, + be64_to_cpu(device->attrs.sys_image_guid), 0)) + return -EMSGSIZE; return 0; } diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 5159858730b0..fe3a7429e7a1 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -266,6 +266,19 @@ enum rdma_nldev_attr { * FW version */ RDMA_NLDEV_ATTR_FW_VERSION, /* string */ + + /* + * Node GUID (in host byte order) associated with the RDMA device. + */ + RDMA_NLDEV_ATTR_NODE_GUID, /* u64 */ + + /* + * System image GUID (in host byte order) associated with + * this RDMA device and other devices which are part of a + * single system. + */ + RDMA_NLDEV_ATTR_SYS_IMAGE_GUID, /* u64 */ + RDMA_NLDEV_ATTR_MAX }; #endif /* _UAPI_RDMA_NETLINK_H */ -- cgit v1.2.3 From 12026fbba6af2fc53c3c6cf88bdfc6561986ba82 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 28 Jun 2017 15:05:14 +0300 Subject: RDMA/netlink: Advertise IB subnet prefix Add IB subnet prefix to the port properties exported by RDMA netlink. Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/nldev.c | 5 +++++ include/uapi/rdma/rdma_netlink.h | 5 +++++ 2 files changed, 10 insertions(+) (limited to 'include/uapi') diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index f932c2c3fad0..7af71d5e52c8 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -44,6 +44,7 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { .len = IB_FW_VERSION_NAME_MAX - 1}, [RDMA_NLDEV_ATTR_NODE_GUID] = { .type = NLA_U64 }, [RDMA_NLDEV_ATTR_SYS_IMAGE_GUID] = { .type = NLA_U64 }, + [RDMA_NLDEV_ATTR_SUBNET_PREFIX] = { .type = NLA_U64 }, }; static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) @@ -97,6 +98,10 @@ static int fill_port_info(struct sk_buff *msg, if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS, (u64)attr.port_cap_flags, 0)) return -EMSGSIZE; + if (rdma_protocol_ib(device, port) && + nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX, + attr.subnet_prefix, 0)) + return -EMSGSIZE; return 0; } diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index fe3a7429e7a1..481003182a35 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -279,6 +279,11 @@ enum rdma_nldev_attr { */ RDMA_NLDEV_ATTR_SYS_IMAGE_GUID, /* u64 */ + /* + * Subnet prefix (in host byte order) + */ + RDMA_NLDEV_ATTR_SUBNET_PREFIX, /* u64 */ + RDMA_NLDEV_ATTR_MAX }; #endif /* _UAPI_RDMA_NETLINK_H */ -- cgit v1.2.3 From 80a06dd36f79de7007f21f5cbe42181a4e5c7d6d Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 28 Jun 2017 15:38:36 +0300 Subject: RDMA/netink: Export lids and sm_lids According to the IB specification, the LID and SM_LID are 16-bit wide, but to support OmniPath users, export it as 32-bit value from the beginning. Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/nldev.c | 9 ++++++++- include/uapi/rdma/rdma_netlink.h | 8 ++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 7af71d5e52c8..16f1d28bea69 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -45,6 +45,8 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_ATTR_NODE_GUID] = { .type = NLA_U64 }, [RDMA_NLDEV_ATTR_SYS_IMAGE_GUID] = { .type = NLA_U64 }, [RDMA_NLDEV_ATTR_SUBNET_PREFIX] = { .type = NLA_U64 }, + [RDMA_NLDEV_ATTR_LID] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_SM_LID] = { .type = NLA_U32 }, }; static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) @@ -102,7 +104,12 @@ static int fill_port_info(struct sk_buff *msg, nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX, attr.subnet_prefix, 0)) return -EMSGSIZE; - + if (rdma_protocol_ib(device, port)) { + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid)) + return -EMSGSIZE; + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid)) + return -EMSGSIZE; + } return 0; } diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 481003182a35..7d5caaf54126 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -284,6 +284,14 @@ enum rdma_nldev_attr { */ RDMA_NLDEV_ATTR_SUBNET_PREFIX, /* u64 */ + /* + * Local Identifier (LID), + * According to IB specification, It is 16-bit address assigned + * by the Subnet Manager. Extended to be 32-bit for OmniPath users. + */ + RDMA_NLDEV_ATTR_LID, /* u32 */ + RDMA_NLDEV_ATTR_SM_LID, /* u32 */ + RDMA_NLDEV_ATTR_MAX }; #endif /* _UAPI_RDMA_NETLINK_H */ -- cgit v1.2.3 From 34840fea112d36507c19dc6052b8c6d88bdd9c16 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 28 Jun 2017 15:49:30 +0300 Subject: RDMA/netlink: Export LID mask control (LMC) Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/nldev.c | 3 +++ include/uapi/rdma/rdma_netlink.h | 5 +++++ 2 files changed, 8 insertions(+) (limited to 'include/uapi') diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 16f1d28bea69..11546f87c5dc 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -47,6 +47,7 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_ATTR_SUBNET_PREFIX] = { .type = NLA_U64 }, [RDMA_NLDEV_ATTR_LID] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_SM_LID] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_LMC] = { .type = NLA_U8 }, }; static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) @@ -109,6 +110,8 @@ static int fill_port_info(struct sk_buff *msg, return -EMSGSIZE; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid)) return -EMSGSIZE; + if (nla_put_u8(msg, RDMA_NLDEV_ATTR_LMC, attr.lmc)) + return -EMSGSIZE; } return 0; } diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 7d5caaf54126..035706e6b016 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -292,6 +292,11 @@ enum rdma_nldev_attr { RDMA_NLDEV_ATTR_LID, /* u32 */ RDMA_NLDEV_ATTR_SM_LID, /* u32 */ + /* + * LID mask control (LMC) + */ + RDMA_NLDEV_ATTR_LMC, /* u8 */ + RDMA_NLDEV_ATTR_MAX }; #endif /* _UAPI_RDMA_NETLINK_H */ -- cgit v1.2.3 From 5654e49db0b2d87c12b6e120b6a830abe3d3921b Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 29 Jun 2017 13:12:45 +0300 Subject: RDMA/netlink: Provide port state and physical link state Add port state and physical link state to the users of RDMA netlink. Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/nldev.c | 6 ++++++ include/uapi/rdma/rdma_netlink.h | 3 +++ 2 files changed, 9 insertions(+) (limited to 'include/uapi') diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 11546f87c5dc..32ccb2b88933 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -48,6 +48,8 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_ATTR_LID] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_SM_LID] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_LMC] = { .type = NLA_U8 }, + [RDMA_NLDEV_ATTR_PORT_STATE] = { .type = NLA_U8 }, + [RDMA_NLDEV_ATTR_PORT_PHYS_STATE] = { .type = NLA_U8 }, }; static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) @@ -113,6 +115,10 @@ static int fill_port_info(struct sk_buff *msg, if (nla_put_u8(msg, RDMA_NLDEV_ATTR_LMC, attr.lmc)) return -EMSGSIZE; } + if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_STATE, attr.state)) + return -EMSGSIZE; + if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state)) + return -EMSGSIZE; return 0; } diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 035706e6b016..c488c3cf361b 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -297,6 +297,9 @@ enum rdma_nldev_attr { */ RDMA_NLDEV_ATTR_LMC, /* u8 */ + RDMA_NLDEV_ATTR_PORT_STATE, /* u8 */ + RDMA_NLDEV_ATTR_PORT_PHYS_STATE, /* u8 */ + RDMA_NLDEV_ATTR_MAX }; #endif /* _UAPI_RDMA_NETLINK_H */ -- cgit v1.2.3 From 1bb77b8c1d57149ed0aa6825255ead80ae584034 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 29 Jun 2017 16:01:29 +0300 Subject: RDMA/netlink: Export node_type Add ability to get node_type for RDAM netlink users. Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/nldev.c | 3 +++ include/uapi/rdma/rdma_netlink.h | 2 ++ 2 files changed, 5 insertions(+) (limited to 'include/uapi') diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 32ccb2b88933..474022274e09 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -50,6 +50,7 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_ATTR_LMC] = { .type = NLA_U8 }, [RDMA_NLDEV_ATTR_PORT_STATE] = { .type = NLA_U8 }, [RDMA_NLDEV_ATTR_PORT_PHYS_STATE] = { .type = NLA_U8 }, + [RDMA_NLDEV_ATTR_DEV_NODE_TYPE] = { .type = NLA_U8 }, }; static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) @@ -79,6 +80,8 @@ static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SYS_IMAGE_GUID, be64_to_cpu(device->attrs.sys_image_guid), 0)) return -EMSGSIZE; + if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type)) + return -EMSGSIZE; return 0; } diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index c488c3cf361b..861440a87e7c 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -300,6 +300,8 @@ enum rdma_nldev_attr { RDMA_NLDEV_ATTR_PORT_STATE, /* u8 */ RDMA_NLDEV_ATTR_PORT_PHYS_STATE, /* u8 */ + RDMA_NLDEV_ATTR_DEV_NODE_TYPE, /* u8 */ + RDMA_NLDEV_ATTR_MAX }; #endif /* _UAPI_RDMA_NETLINK_H */ -- cgit v1.2.3 From 55f2467cd717241dd941153d4db1e23c91aecf98 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 17 Aug 2017 15:50:35 +0300 Subject: RDMA/mlx4: Fix create qp command alignment Avoid extra padding by replacing the order of inl_recv_sz and reserved, otherwise 'mlx4_ib_create_qp' structure might be larger than legacy user input leading to copy of some garbage data from the user space buffer. Fixes: ea30b966f7dd ('IB/mlx4: Add inline-receive support') Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/uapi/rdma/mlx4-abi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/rdma/mlx4-abi.h b/include/uapi/rdma/mlx4-abi.h index d915cab37ec3..21cce1a4c4dd 100644 --- a/include/uapi/rdma/mlx4-abi.h +++ b/include/uapi/rdma/mlx4-abi.h @@ -111,8 +111,8 @@ struct mlx4_ib_create_qp { __u8 log_sq_bb_count; __u8 log_sq_stride; __u8 sq_no_prefetch; - __u32 inl_recv_sz; __u8 reserved; + __u32 inl_recv_sz; }; struct mlx4_ib_create_wq { -- cgit v1.2.3 From 078b3573030346df0cdc46d798c0f434dc53c2cc Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Thu, 17 Aug 2017 15:50:47 +0300 Subject: IB/mlx4: Fix struct mlx4_ib_create_wq alignment The mlx4 ABI defines to have structures with alignment of 64B. Fixes: 400b1ebcfe31 ("IB/mlx4: Add support for WQ related verbs") Signed-off-by: Guy Levi Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/qp.c | 9 ++++----- include/uapi/rdma/mlx4-abi.h | 1 - 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'include/uapi') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 0d2923c2225f..c3958fcfed75 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1046,9 +1046,8 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, } if (src == MLX4_IB_RWQ_SRC) { - if (ucmd.wq.comp_mask || ucmd.wq.reserved1 || - ucmd.wq.reserved[0] || ucmd.wq.reserved[1] || - ucmd.wq.reserved[2]) { + if (ucmd.wq.comp_mask || ucmd.wq.reserved[0] || + ucmd.wq.reserved[1] || ucmd.wq.reserved[2]) { pr_debug("user command isn't supported\n"); err = -EOPNOTSUPP; goto err; @@ -4146,8 +4145,8 @@ struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd, if (!(udata && pd->uobject)) return ERR_PTR(-EINVAL); - required_cmd_sz = offsetof(typeof(ucmd), reserved) + - sizeof(ucmd.reserved); + required_cmd_sz = offsetof(typeof(ucmd), comp_mask) + + sizeof(ucmd.comp_mask); if (udata->inlen < required_cmd_sz) { pr_debug("invalid inlen\n"); return ERR_PTR(-EINVAL); diff --git a/include/uapi/rdma/mlx4-abi.h b/include/uapi/rdma/mlx4-abi.h index 21cce1a4c4dd..0e10102861b5 100644 --- a/include/uapi/rdma/mlx4-abi.h +++ b/include/uapi/rdma/mlx4-abi.h @@ -121,7 +121,6 @@ struct mlx4_ib_create_wq { __u8 log_range_size; __u8 reserved[3]; __u32 comp_mask; - __u32 reserved1; }; struct mlx4_ib_modify_wq { -- cgit v1.2.3 From b23673f86fd0d9ccbc088e88e29899b4d3f0f055 Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Thu, 17 Aug 2017 15:50:48 +0300 Subject: IB/mlx4: Remove redundant attribute in mlx4_ib_create_qp_rss struct rx_key_len is not in use and needs to be removed. Fixes: 3078f5f1bd8b ("IB/mlx4: Add support for RSS QP") Signed-off-by: Guy Levi Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/uapi/rdma/mlx4-abi.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/rdma/mlx4-abi.h b/include/uapi/rdma/mlx4-abi.h index 0e10102861b5..c55f60e05f86 100644 --- a/include/uapi/rdma/mlx4-abi.h +++ b/include/uapi/rdma/mlx4-abi.h @@ -98,8 +98,7 @@ struct mlx4_ib_create_srq_resp { struct mlx4_ib_create_qp_rss { __u64 rx_hash_fields_mask; __u8 rx_hash_function; - __u8 rx_key_len; - __u8 reserved[6]; + __u8 reserved[7]; __u8 rx_hash_key[40]; __u32 comp_mask; __u32 reserved1; -- cgit v1.2.3 From 96dc3fc5f1d66b20cdf839d571c7b907e08d5d00 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Thu, 17 Aug 2017 15:52:28 +0300 Subject: IB/mlx5: Expose software parsing for Raw Ethernet QP Software parsing (SWP) is a feature that can be used to instruct the device to stop using its internal parser and to parse packets on the transmit path according to offsets set for each packets. Through this feature, the device allows the handling of checksum and LSO by the hardware according to the location of IP and TCP/UDP headers. Enable SW parsing on Raw Ethernet send queue by default if firmware supports it and report these capabilities to user space. Signed-off-by: Noa Osherovich Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 21 +++++++++++++++++++++ drivers/infiniband/hw/mlx5/qp.c | 3 +++ include/uapi/rdma/mlx5-abi.h | 17 +++++++++++++++++ 3 files changed, 41 insertions(+) (limited to 'include/uapi') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 762ef6bf219e..07789450ec42 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -811,6 +811,27 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (field_avail(typeof(resp), reserved, uhw->outlen)) resp.response_length += sizeof(resp.reserved); + if (field_avail(typeof(resp), sw_parsing_caps, + uhw->outlen)) { + resp.response_length += sizeof(resp.sw_parsing_caps); + if (MLX5_CAP_ETH(mdev, swp)) { + resp.sw_parsing_caps.sw_parsing_offloads |= + MLX5_IB_SW_PARSING; + + if (MLX5_CAP_ETH(mdev, swp_csum)) + resp.sw_parsing_caps.sw_parsing_offloads |= + MLX5_IB_SW_PARSING_CSUM; + + if (MLX5_CAP_ETH(mdev, swp_lso)) + resp.sw_parsing_caps.sw_parsing_offloads |= + MLX5_IB_SW_PARSING_LSO; + + if (resp.sw_parsing_caps.sw_parsing_offloads) + resp.sw_parsing_caps.supported_qpts = + BIT(IB_QPT_RAW_PACKET); + } + } + if (uhw->outlen) { err = ib_copy_to_udata(uhw, &resp, resp.response_length); diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index bc49d14e0a00..656773196f27 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1088,6 +1088,9 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, MLX5_SET(sqc, sqc, cqn, MLX5_GET(qpc, qpc, cqn_snd)); MLX5_SET(sqc, sqc, tis_lst_sz, 1); MLX5_SET(sqc, sqc, tis_num_0, sq->tisn); + if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && + MLX5_CAP_ETH(dev->mdev, swp)) + MLX5_SET(sqc, sqc, allow_swp, 1); wq = MLX5_ADDR_OF(sqc, sqc, wq); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 0b3d30837a9f..64d398e662cd 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -168,6 +168,22 @@ struct mlx5_packet_pacing_caps { __u32 reserved; }; +enum mlx5_ib_sw_parsing_offloads { + MLX5_IB_SW_PARSING = 1 << 0, + MLX5_IB_SW_PARSING_CSUM = 1 << 1, + MLX5_IB_SW_PARSING_LSO = 1 << 2, +}; + +struct mlx5_ib_sw_parsing_caps { + __u32 sw_parsing_offloads; /* enum mlx5_ib_sw_parsing_offloads */ + + /* Corresponding bit will be set if qp type from + * 'enum ib_qp_type' is supported, e.g. + * supported_qpts |= 1 << IB_QPT_RAW_PACKET + */ + __u32 supported_qpts; +}; + struct mlx5_ib_query_device_resp { __u32 comp_mask; __u32 response_length; @@ -177,6 +193,7 @@ struct mlx5_ib_query_device_resp { struct mlx5_packet_pacing_caps packet_pacing_caps; __u32 mlx5_ib_support_multi_pkt_send_wqes; __u32 reserved; + struct mlx5_ib_sw_parsing_caps sw_parsing_caps; }; struct mlx5_ib_create_cq { -- cgit v1.2.3 From 795b609c8b59f8f20fa9d72bf8b4ae3b8aa5582c Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Thu, 17 Aug 2017 15:52:34 +0300 Subject: IB/mlx5: Allow posting multi packet send WQEs if hardware supports Set the field to allow posting multi packet send WQEs if hardware supports this feature. This doesn't mean the send WQEs will be for multi packet unless the send WQE was prepared according to multi packet send WQE format. User space shall use flag MLX5_IB_ALLOW_MPW to check if hardware supports MPW and allows MPW in SQ context. Signed-off-by: Bodong Wang Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 5 +++-- drivers/infiniband/hw/mlx5/qp.c | 2 ++ include/linux/mlx5/mlx5_ifc.h | 2 +- include/uapi/rdma/mlx5-abi.h | 5 +++++ 4 files changed, 11 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 8f7e46090f9a..ba0a97d6f677 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -802,8 +802,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (field_avail(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes, uhw->outlen)) { - resp.mlx5_ib_support_multi_pkt_send_wqes = - MLX5_CAP_ETH(mdev, multi_pkt_send_wqe); + if (MLX5_CAP_ETH(mdev, multi_pkt_send_wqe)) + resp.mlx5_ib_support_multi_pkt_send_wqes = + MLX5_IB_ALLOW_MPW; resp.response_length += sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes); } diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 656773196f27..d6df88a78d5e 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1083,6 +1083,8 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, sqc = MLX5_ADDR_OF(create_sq_in, in, ctx); MLX5_SET(sqc, sqc, flush_in_error_en, 1); + if (MLX5_CAP_ETH(dev->mdev, multi_pkt_send_wqe)) + MLX5_SET(sqc, sqc, allow_multi_pkt_send_wqe, 1); MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); MLX5_SET(sqc, sqc, user_index, MLX5_GET(qpc, qpc, user_index)); MLX5_SET(sqc, sqc, cqn, MLX5_GET(qpc, qpc, cqn_snd)); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 6563500c85de..6865e60ba473 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2445,7 +2445,7 @@ struct mlx5_ifc_sqc_bits { u8 cd_master[0x1]; u8 fre[0x1]; u8 flush_in_error_en[0x1]; - u8 reserved_at_4[0x1]; + u8 allow_multi_pkt_send_wqe[0x1]; u8 min_wqe_inline_mode[0x3]; u8 state[0x4]; u8 reg_umr[0x1]; diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 64d398e662cd..a61a512e5747 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -168,6 +168,11 @@ struct mlx5_packet_pacing_caps { __u32 reserved; }; +enum mlx5_ib_mpw_caps { + MPW_RESERVED = 1 << 0, + MLX5_IB_ALLOW_MPW = 1 << 1, +}; + enum mlx5_ib_sw_parsing_offloads { MLX5_IB_SW_PARSING = 1 << 0, MLX5_IB_SW_PARSING_CSUM = 1 << 1, -- cgit v1.2.3 From 050da902adde8faf6b1bef15ac4876ae145358f4 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Thu, 17 Aug 2017 15:52:35 +0300 Subject: IB/mlx5: Report mlx5 enhanced multi packet WQE capability Expose enhanced multi packet WQE capability to user space through query_device by uhw. Signed-off-by: Bodong Wang Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 5 +++++ include/linux/mlx5/mlx5_ifc.h | 2 +- include/uapi/rdma/mlx5-abi.h | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index ba0a97d6f677..62e6298810e7 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -805,6 +805,11 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (MLX5_CAP_ETH(mdev, multi_pkt_send_wqe)) resp.mlx5_ib_support_multi_pkt_send_wqes = MLX5_IB_ALLOW_MPW; + + if (MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe)) + resp.mlx5_ib_support_multi_pkt_send_wqes |= + MLX5_IB_SUPPORT_EMPW; + resp.response_length += sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes); } diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 6865e60ba473..4eff0b8a1482 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -604,7 +604,7 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 rss_ind_tbl_cap[0x4]; u8 reg_umr_sq[0x1]; u8 scatter_fcs[0x1]; - u8 reserved_at_1a[0x1]; + u8 enhanced_multi_pkt_send_wqe[0x1]; u8 tunnel_lso_const_out_ip_id[0x1]; u8 reserved_at_1c[0x2]; u8 tunnel_statless_gre[0x1]; diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index a61a512e5747..1791bf123ba9 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -171,6 +171,7 @@ struct mlx5_packet_pacing_caps { enum mlx5_ib_mpw_caps { MPW_RESERVED = 1 << 0, MLX5_IB_ALLOW_MPW = 1 << 1, + MLX5_IB_SUPPORT_EMPW = 1 << 2, }; enum mlx5_ib_sw_parsing_offloads { -- cgit v1.2.3 From 9382d4e1d3c09fe20fa53eb12b51ef01ad40774f Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Thu, 17 Aug 2017 15:52:06 +0300 Subject: IB/uverbs: Add XRQ creation parameter to UAPI Add tm_list_size parameter to struct ib_uverbs_create_xsrq. If SRQ type is tag-matching this field defines maximum size of tag matching list. Otherwise, it is expected to be zero. Signed-off-by: Artemy Kovalyov Reviewed-by: Yossi Itigin Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/uapi/rdma/ib_user_verbs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 63656d2e8705..d5434bbf40c8 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -1024,7 +1024,7 @@ struct ib_uverbs_create_xsrq { __u32 max_wr; __u32 max_sge; __u32 srq_limit; - __u32 reserved; + __u32 max_num_tags; __u32 xrcd_handle; __u32 cq_handle; __u64 driver_data[0]; -- cgit v1.2.3 From 8d50505ada728258fcdce99120b937ce68298c4e Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Thu, 17 Aug 2017 15:52:08 +0300 Subject: IB/uverbs: Expose XRQ capabilities Make XRQ capabilities available via ibv_query_device() verb. Signed-off-by: Artemy Kovalyov Reviewed-by: Yossi Itigin Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 10 ++++++++++ include/uapi/rdma/ib_user_verbs.h | 15 +++++++++++++++ 2 files changed, 25 insertions(+) (limited to 'include/uapi') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index e69038a07fa0..e0cb99860934 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3868,6 +3868,16 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, resp.raw_packet_caps = attr.raw_packet_caps; resp.response_length += sizeof(resp.raw_packet_caps); + + if (ucore->outlen < resp.response_length + sizeof(resp.xrq_caps)) + goto end; + + resp.xrq_caps.max_rndv_hdr_size = attr.xrq_caps.max_rndv_hdr_size; + resp.xrq_caps.max_num_tags = attr.xrq_caps.max_num_tags; + resp.xrq_caps.max_ops = attr.xrq_caps.max_ops; + resp.xrq_caps.max_sge = attr.xrq_caps.max_sge; + resp.xrq_caps.flags = attr.xrq_caps.flags; + resp.response_length += sizeof(resp.xrq_caps); end: err = ib_copy_to_udata(ucore, &resp, resp.response_length); return err; diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index d5434bbf40c8..9a0b6479fe0c 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -236,6 +236,20 @@ struct ib_uverbs_rss_caps { __u32 reserved; }; +struct ib_uverbs_tm_caps { + /* Max size of rendezvous request message */ + __u32 max_rndv_hdr_size; + /* Max number of entries in tag matching list */ + __u32 max_num_tags; + /* TM flags */ + __u32 flags; + /* Max number of outstanding list operations */ + __u32 max_ops; + /* Max number of SGE in tag matching entry */ + __u32 max_sge; + __u32 reserved; +}; + struct ib_uverbs_ex_query_device_resp { struct ib_uverbs_query_device_resp base; __u32 comp_mask; @@ -247,6 +261,7 @@ struct ib_uverbs_ex_query_device_resp { struct ib_uverbs_rss_caps rss_caps; __u32 max_wq_type_rq; __u32 raw_packet_caps; + struct ib_uverbs_tm_caps xrq_caps; }; struct ib_uverbs_query_port { -- cgit v1.2.3 From 72f9b089ecd2cc2194d27cbb14fd80a0b1472e89 Mon Sep 17 00:00:00 2001 From: Aditya Sarwade Date: Tue, 29 Aug 2017 15:51:29 -0700 Subject: RDMA/vmw_pvrdma: Report network header type in WC We should report the network header type in the work completion so that the kernel can infer the right RoCE type headers. Reviewed-by: Bryan Tan Signed-off-by: Aditya Sarwade Signed-off-by: Adit Ranadive Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c | 1 + include/uapi/rdma/vmw_pvrdma-abi.h | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c index 90aa326fd7c0..8a12dc73b68e 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c @@ -389,6 +389,7 @@ retry: wc->dlid_path_bits = cqe->dlid_path_bits; wc->port_num = cqe->port_num; wc->vendor_err = cqe->vendor_err; + wc->network_hdr_type = cqe->network_hdr_type; /* Update shared ring state */ pvrdma_idx_ring_inc(&cq->ring_state->rx.cons_head, cq->ibcq.cqe); diff --git a/include/uapi/rdma/vmw_pvrdma-abi.h b/include/uapi/rdma/vmw_pvrdma-abi.h index c8c1d2d6df4d..c6569b0032ec 100644 --- a/include/uapi/rdma/vmw_pvrdma-abi.h +++ b/include/uapi/rdma/vmw_pvrdma-abi.h @@ -125,7 +125,8 @@ enum pvrdma_wc_flags { PVRDMA_WC_IP_CSUM_OK = 1 << 3, PVRDMA_WC_WITH_SMAC = 1 << 4, PVRDMA_WC_WITH_VLAN = 1 << 5, - PVRDMA_WC_FLAGS_MAX = PVRDMA_WC_WITH_VLAN, + PVRDMA_WC_WITH_NETWORK_HDR_TYPE = 1 << 6, + PVRDMA_WC_FLAGS_MAX = PVRDMA_WC_WITH_NETWORK_HDR_TYPE, }; struct pvrdma_alloc_ucontext_resp { @@ -283,7 +284,8 @@ struct pvrdma_cqe { __u8 dlid_path_bits; __u8 port_num; __u8 smac[6]; - __u8 reserved2[7]; /* Pad to next power of 2 (64). */ + __u8 network_hdr_type; + __u8 reserved2[6]; /* Pad to next power of 2 (64). */ }; #endif /* __VMW_PVRDMA_ABI_H__ */ -- cgit v1.2.3 From fac9658cabb98afb68ef1630c558864e6f559c07 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 3 Aug 2017 16:06:57 +0300 Subject: IB/core: Add new ioctl interface In this ioctl interface, processing the command starts from properties of the command and fetching the appropriate user objects before calling the handler. Parsing and validation is done according to a specifier declared by the driver's code. In the driver, all supported objects are declared. These objects are separated to different object namepsaces. Dividing objects to namespaces is done at initialization by using the higher bits of the object ids. This initialization can mix objects declared in different places to one parsing tree using in this ioctl interface. For each object we list all supported methods. Similarly to objects, methods are separated to method namespaces too. Namespacing is done similarly to the objects case. This could be used in order to add methods to an existing object. Each method has a specific handler, which could be either a default handler or a driver specific handler. Along with the handler, a bunch of attributes are specified as well. Similarly to objects and method, attributes are namespaced and hashed by their ids at initialization too. All supported attributes are subject to automatic fetching and validation. These attributes include the command, response and the method's related objects' ids. When these entities (objects, methods and attributes) are used, the high bits of the entities ids are used in order to calculate the hash bucket index. Then, these high bits are masked out in order to have a zero based index. Since we use these high bits for both bucketing and namespacing, we get a compact representation and O(1) array access. This is mandatory for efficient dispatching. Each attribute has a type (PTR_IN, PTR_OUT, IDR and FD) and a length. Attributes could be validated through some attributes, like: (*) Minimum size / Exact size (*) Fops for FD (*) Object type for IDR If an IDR/fd attribute is specified, the kernel also states the object type and the required access (NEW, WRITE, READ or DESTROY). All uobject/fd management is done automatically by the infrastructure, meaning - the infrastructure will fail concurrent commands that at least one of them requires concurrent access (WRITE/DESTROY), synchronize actions with device removals (dissociate context events) and take care of reference counting (increase/decrease) for concurrent actions invocation. The reference counts on the actual kernel objects shall be handled by the handlers. objects +--------+ | | | | methods +--------+ | | ns method method_spec +-----+ |len | +--------+ +------+[d]+-------+ +----------------+[d]+------------+ |attr1+-> |type | | object +> |method+-> | spec +-> + attr_buckets +-> |default_chain+--> +-----+ |idr_type| +--------+ +------+ |handler| | | +------------+ |attr2| |access | | | | | +-------+ +----------------+ |driver chain| +-----+ +--------+ | | | | +------------+ | | +------+ | | | | | | | | | | | | | | | | | | | | +--------+ [d] = Hash ids to groups using the high order bits The right types table is also chosen by using the high bits from the ids. Currently we have either default or driver specific groups. Once validation and object fetching (or creation) completed, we call the handler: int (*handler)(struct ib_device *ib_dev, struct ib_uverbs_file *ufile, struct uverbs_attr_bundle *ctx); ctx bundles attributes of different namespaces. Each element there is an array of attributes which corresponds to one namespaces of attributes. For example, in the usually used case: ctx core +----------------------------+ +------------+ | core: +---> | valid | +----------------------------+ | cmd_attr | | driver: | +------------+ |----------------------------+--+ | valid | | | cmd_attr | | +------------+ | | valid | | | obj_attr | | +------------+ | | drivers | +------------+ +> | valid | | cmd_attr | +------------+ | valid | | cmd_attr | +------------+ | valid | | obj_attr | +------------+ Signed-off-by: Matan Barak Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford --- drivers/infiniband/core/Makefile | 2 +- drivers/infiniband/core/rdma_core.c | 46 +++++ drivers/infiniband/core/rdma_core.h | 5 + drivers/infiniband/core/uverbs_ioctl.c | 364 +++++++++++++++++++++++++++++++++ include/rdma/ib_verbs.h | 2 + include/rdma/uverbs_ioctl.h | 101 ++++++++- include/uapi/rdma/rdma_user_ioctl.h | 33 +++ 7 files changed, 543 insertions(+), 10 deletions(-) create mode 100644 drivers/infiniband/core/uverbs_ioctl.c (limited to 'include/uapi') diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index 920609a0872e..746756dc9877 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -32,4 +32,4 @@ ib_umad-y := user_mad.o ib_ucm-y := ucm.o ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \ - rdma_core.o uverbs_std_types.o + rdma_core.o uverbs_std_types.o uverbs_ioctl.o diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 0fe8ef913387..2a2f002ac7cb 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -36,10 +36,56 @@ #include #include #include +#include #include "uverbs.h" #include "core_priv.h" #include "rdma_core.h" +int uverbs_ns_idx(u16 *id, unsigned int ns_count) +{ + int ret = (*id & UVERBS_ID_NS_MASK) >> UVERBS_ID_NS_SHIFT; + + if (ret >= ns_count) + return -EINVAL; + + *id &= ~UVERBS_ID_NS_MASK; + return ret; +} + +const struct uverbs_object_spec *uverbs_get_object(const struct ib_device *ibdev, + uint16_t object) +{ + const struct uverbs_root_spec *object_hash = ibdev->specs_root; + const struct uverbs_object_spec_hash *objects; + int ret = uverbs_ns_idx(&object, object_hash->num_buckets); + + if (ret < 0) + return NULL; + + objects = object_hash->object_buckets[ret]; + + if (object >= objects->num_objects) + return NULL; + + return objects->objects[object]; +} + +const struct uverbs_method_spec *uverbs_get_method(const struct uverbs_object_spec *object, + uint16_t method) +{ + const struct uverbs_method_spec_hash *methods; + int ret = uverbs_ns_idx(&method, object->num_buckets); + + if (ret < 0) + return NULL; + + methods = object->method_buckets[ret]; + if (method >= methods->num_methods) + return NULL; + + return methods->methods[method]; +} + void uverbs_uobject_get(struct ib_uobject *uobject) { kref_get(&uobject->ref); diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index 9ed6ad0324c7..1efcf93238dd 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -43,6 +43,11 @@ #include #include +int uverbs_ns_idx(u16 *id, unsigned int ns_count); +const struct uverbs_object_spec *uverbs_get_object(const struct ib_device *ibdev, + uint16_t object); +const struct uverbs_method_spec *uverbs_get_method(const struct uverbs_object_spec *object, + uint16_t method); /* * These functions initialize the context and cleanups its uobjects. * The context has a list of objects which is protected by a mutex diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c new file mode 100644 index 000000000000..5286ad57d903 --- /dev/null +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -0,0 +1,364 @@ +/* + * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include "rdma_core.h" +#include "uverbs.h" + +static int uverbs_process_attr(struct ib_device *ibdev, + struct ib_ucontext *ucontext, + const struct ib_uverbs_attr *uattr, + u16 attr_id, + const struct uverbs_attr_spec_hash *attr_spec_bucket, + struct uverbs_attr_bundle_hash *attr_bundle_h, + struct ib_uverbs_attr __user *uattr_ptr) +{ + const struct uverbs_attr_spec *spec; + struct uverbs_attr *e; + const struct uverbs_object_spec *object; + struct uverbs_obj_attr *o_attr; + struct uverbs_attr *elements = attr_bundle_h->attrs; + + if (uattr->reserved) + return -EINVAL; + + if (attr_id >= attr_spec_bucket->num_attrs) { + if (uattr->flags & UVERBS_ATTR_F_MANDATORY) + return -EINVAL; + else + return 0; + } + + spec = &attr_spec_bucket->attrs[attr_id]; + e = &elements[attr_id]; + e->uattr = uattr_ptr; + + switch (spec->type) { + case UVERBS_ATTR_TYPE_PTR_IN: + case UVERBS_ATTR_TYPE_PTR_OUT: + if (uattr->len < spec->len || + (!(spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ) && + uattr->len > spec->len)) + return -EINVAL; + + e->ptr_attr.data = uattr->data; + e->ptr_attr.len = uattr->len; + e->ptr_attr.flags = uattr->flags; + break; + + case UVERBS_ATTR_TYPE_IDR: + if (uattr->data >> 32) + return -EINVAL; + /* fall through */ + case UVERBS_ATTR_TYPE_FD: + if (uattr->len != 0 || !ucontext || uattr->data > INT_MAX) + return -EINVAL; + + o_attr = &e->obj_attr; + object = uverbs_get_object(ibdev, spec->obj.obj_type); + if (!object) + return -EINVAL; + o_attr->type = object->type_attrs; + + o_attr->id = (int)uattr->data; + o_attr->uobject = uverbs_get_uobject_from_context( + o_attr->type, + ucontext, + spec->obj.access, + o_attr->id); + + if (IS_ERR(o_attr->uobject)) + return PTR_ERR(o_attr->uobject); + + if (spec->obj.access == UVERBS_ACCESS_NEW) { + u64 id = o_attr->uobject->id; + + /* Copy the allocated id to the user-space */ + if (put_user(id, &e->uattr->data)) { + uverbs_finalize_object(o_attr->uobject, + UVERBS_ACCESS_NEW, + false); + return -EFAULT; + } + } + + break; + default: + return -EOPNOTSUPP; + } + + set_bit(attr_id, attr_bundle_h->valid_bitmap); + return 0; +} + +static int uverbs_uattrs_process(struct ib_device *ibdev, + struct ib_ucontext *ucontext, + const struct ib_uverbs_attr *uattrs, + size_t num_uattrs, + const struct uverbs_method_spec *method, + struct uverbs_attr_bundle *attr_bundle, + struct ib_uverbs_attr __user *uattr_ptr) +{ + size_t i; + int ret = 0; + int num_given_buckets = 0; + + for (i = 0; i < num_uattrs; i++) { + const struct ib_uverbs_attr *uattr = &uattrs[i]; + u16 attr_id = uattr->attr_id; + struct uverbs_attr_spec_hash *attr_spec_bucket; + + ret = uverbs_ns_idx(&attr_id, method->num_buckets); + if (ret < 0) { + if (uattr->flags & UVERBS_ATTR_F_MANDATORY) { + uverbs_finalize_objects(attr_bundle, + method->attr_buckets, + num_given_buckets, + false); + return ret; + } + continue; + } + + /* + * ret is the found ns, so increase num_given_buckets if + * necessary. + */ + if (ret >= num_given_buckets) + num_given_buckets = ret + 1; + + attr_spec_bucket = method->attr_buckets[ret]; + ret = uverbs_process_attr(ibdev, ucontext, uattr, attr_id, + attr_spec_bucket, &attr_bundle->hash[ret], + uattr_ptr++); + if (ret) { + uverbs_finalize_objects(attr_bundle, + method->attr_buckets, + num_given_buckets, + false); + return ret; + } + } + + return num_given_buckets; +} + +static int uverbs_validate_kernel_mandatory(const struct uverbs_method_spec *method_spec, + struct uverbs_attr_bundle *attr_bundle) +{ + unsigned int i; + + for (i = 0; i < attr_bundle->num_buckets; i++) { + struct uverbs_attr_spec_hash *attr_spec_bucket = + method_spec->attr_buckets[i]; + + if (!bitmap_subset(attr_spec_bucket->mandatory_attrs_bitmask, + attr_bundle->hash[i].valid_bitmap, + attr_spec_bucket->num_attrs)) + return -EINVAL; + } + + return 0; +} + +static int uverbs_handle_method(struct ib_uverbs_attr __user *uattr_ptr, + const struct ib_uverbs_attr *uattrs, + size_t num_uattrs, + struct ib_device *ibdev, + struct ib_uverbs_file *ufile, + const struct uverbs_method_spec *method_spec, + struct uverbs_attr_bundle *attr_bundle) +{ + int ret; + int finalize_ret; + int num_given_buckets; + + num_given_buckets = uverbs_uattrs_process(ibdev, ufile->ucontext, uattrs, + num_uattrs, method_spec, + attr_bundle, uattr_ptr); + if (num_given_buckets <= 0) + return -EINVAL; + + attr_bundle->num_buckets = num_given_buckets; + ret = uverbs_validate_kernel_mandatory(method_spec, attr_bundle); + if (ret) + goto cleanup; + + ret = method_spec->handler(ibdev, ufile, attr_bundle); +cleanup: + finalize_ret = uverbs_finalize_objects(attr_bundle, + method_spec->attr_buckets, + attr_bundle->num_buckets, + !ret); + + return ret ? ret : finalize_ret; +} + +#define UVERBS_OPTIMIZE_USING_STACK_SZ 256 +static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct ib_uverbs_ioctl_hdr *hdr, + void __user *buf) +{ + const struct uverbs_object_spec *object_spec; + const struct uverbs_method_spec *method_spec; + long err = 0; + unsigned int i; + struct { + struct ib_uverbs_attr *uattrs; + struct uverbs_attr_bundle *uverbs_attr_bundle; + } *ctx = NULL; + struct uverbs_attr *curr_attr; + unsigned long *curr_bitmap; + size_t ctx_size; +#ifdef UVERBS_OPTIMIZE_USING_STACK_SZ + uintptr_t data[UVERBS_OPTIMIZE_USING_STACK_SZ / sizeof(uintptr_t)]; +#endif + + if (hdr->reserved) + return -EINVAL; + + object_spec = uverbs_get_object(ib_dev, hdr->object_id); + if (!object_spec) + return -EOPNOTSUPP; + + method_spec = uverbs_get_method(object_spec, hdr->method_id); + if (!method_spec) + return -EOPNOTSUPP; + + if ((method_spec->flags & UVERBS_ACTION_FLAG_CREATE_ROOT) ^ !file->ucontext) + return -EINVAL; + + ctx_size = sizeof(*ctx) + + sizeof(struct uverbs_attr_bundle) + + sizeof(struct uverbs_attr_bundle_hash) * method_spec->num_buckets + + sizeof(*ctx->uattrs) * hdr->num_attrs + + sizeof(*ctx->uverbs_attr_bundle->hash[0].attrs) * + method_spec->num_child_attrs + + sizeof(*ctx->uverbs_attr_bundle->hash[0].valid_bitmap) * + (method_spec->num_child_attrs / BITS_PER_LONG + + method_spec->num_buckets); + +#ifdef UVERBS_OPTIMIZE_USING_STACK_SZ + if (ctx_size <= UVERBS_OPTIMIZE_USING_STACK_SZ) + ctx = (void *)data; + + if (!ctx) +#endif + ctx = kmalloc(ctx_size, GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->uverbs_attr_bundle = (void *)ctx + sizeof(*ctx); + ctx->uattrs = (void *)(ctx->uverbs_attr_bundle + 1) + + (sizeof(ctx->uverbs_attr_bundle->hash[0]) * + method_spec->num_buckets); + curr_attr = (void *)(ctx->uattrs + hdr->num_attrs); + curr_bitmap = (void *)(curr_attr + method_spec->num_child_attrs); + + /* + * We just fill the pointers and num_attrs here. The data itself will be + * filled at a later stage (uverbs_process_attr) + */ + for (i = 0; i < method_spec->num_buckets; i++) { + unsigned int curr_num_attrs = method_spec->attr_buckets[i]->num_attrs; + + ctx->uverbs_attr_bundle->hash[i].attrs = curr_attr; + curr_attr += curr_num_attrs; + ctx->uverbs_attr_bundle->hash[i].num_attrs = curr_num_attrs; + ctx->uverbs_attr_bundle->hash[i].valid_bitmap = curr_bitmap; + bitmap_zero(curr_bitmap, curr_num_attrs); + curr_bitmap += BITS_TO_LONGS(curr_num_attrs); + } + + err = copy_from_user(ctx->uattrs, buf, + sizeof(*ctx->uattrs) * hdr->num_attrs); + if (err) { + err = -EFAULT; + goto out; + } + + err = uverbs_handle_method(buf, ctx->uattrs, hdr->num_attrs, ib_dev, + file, method_spec, ctx->uverbs_attr_bundle); +out: +#ifdef UVERBS_OPTIMIZE_USING_STACK_SZ + if (ctx_size > UVERBS_OPTIMIZE_USING_STACK_SZ) +#endif + kfree(ctx); + return err; +} + +#define IB_UVERBS_MAX_CMD_SZ 4096 + +long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + struct ib_uverbs_file *file = filp->private_data; + struct ib_uverbs_ioctl_hdr __user *user_hdr = + (struct ib_uverbs_ioctl_hdr __user *)arg; + struct ib_uverbs_ioctl_hdr hdr; + struct ib_device *ib_dev; + int srcu_key; + long err; + + srcu_key = srcu_read_lock(&file->device->disassociate_srcu); + ib_dev = srcu_dereference(file->device->ib_dev, + &file->device->disassociate_srcu); + if (!ib_dev) { + err = -EIO; + goto out; + } + + if (cmd == RDMA_VERBS_IOCTL) { + err = copy_from_user(&hdr, user_hdr, sizeof(hdr)); + + if (err || hdr.length > IB_UVERBS_MAX_CMD_SZ || + hdr.length != sizeof(hdr) + hdr.num_attrs * sizeof(struct ib_uverbs_attr)) { + err = -EINVAL; + goto out; + } + + if (hdr.reserved) { + err = -EOPNOTSUPP; + goto out; + } + + err = ib_uverbs_cmd_verbs(ib_dev, file, &hdr, + (__user void *)arg + sizeof(hdr)); + } else { + err = -ENOIOCTLCMD; + } +out: + srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); + + return err; +} diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 1b4bb8743969..e6df68048517 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2348,6 +2348,8 @@ struct ib_device { void (*get_dev_fw_str)(struct ib_device *, char *str); const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev, int comp_vector); + + struct uverbs_root_spec *specs_root; }; struct ib_client { diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index d3ec02b7d937..f83f56329761 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -43,6 +43,8 @@ enum uverbs_attr_type { UVERBS_ATTR_TYPE_NA, + UVERBS_ATTR_TYPE_PTR_IN, + UVERBS_ATTR_TYPE_PTR_OUT, UVERBS_ATTR_TYPE_IDR, UVERBS_ATTR_TYPE_FD, }; @@ -54,29 +56,110 @@ enum uverbs_obj_access { UVERBS_ACCESS_DESTROY }; +enum { + UVERBS_ATTR_SPEC_F_MANDATORY = 1U << 0, + /* Support extending attributes by length */ + UVERBS_ATTR_SPEC_F_MIN_SZ = 1U << 1, +}; + struct uverbs_attr_spec { enum uverbs_attr_type type; - struct { - /* - * higher bits mean the namespace and lower bits mean - * the type id within the namespace. - */ - u16 obj_type; - u8 access; - } obj; + union { + u16 len; + struct { + /* + * higher bits mean the namespace and lower bits mean + * the type id within the namespace. + */ + u16 obj_type; + u8 access; + } obj; + }; + /* Combination of bits from enum UVERBS_ATTR_SPEC_F_XXXX */ + u8 flags; }; struct uverbs_attr_spec_hash { size_t num_attrs; + unsigned long *mandatory_attrs_bitmask; struct uverbs_attr_spec attrs[0]; }; +struct uverbs_attr_bundle; +struct ib_uverbs_file; + +enum { + /* + * Action marked with this flag creates a context (or root for all + * objects). + */ + UVERBS_ACTION_FLAG_CREATE_ROOT = 1U << 0, +}; + +struct uverbs_method_spec { + /* Combination of bits from enum UVERBS_ACTION_FLAG_XXXX */ + u32 flags; + size_t num_buckets; + size_t num_child_attrs; + int (*handler)(struct ib_device *ib_dev, struct ib_uverbs_file *ufile, + struct uverbs_attr_bundle *ctx); + struct uverbs_attr_spec_hash *attr_buckets[0]; +}; + +struct uverbs_method_spec_hash { + size_t num_methods; + struct uverbs_method_spec *methods[0]; +}; + +struct uverbs_object_spec { + const struct uverbs_obj_type *type_attrs; + size_t num_buckets; + struct uverbs_method_spec_hash *method_buckets[0]; +}; + +struct uverbs_object_spec_hash { + size_t num_objects; + struct uverbs_object_spec *objects[0]; +}; + +struct uverbs_root_spec { + size_t num_buckets; + struct uverbs_object_spec_hash *object_buckets[0]; +}; + +/* ================================================= + * Parsing infrastructure + * ================================================= + */ + +struct uverbs_ptr_attr { + union { + u64 data; + void __user *ptr; + }; + u16 len; + /* Combination of bits from enum UVERBS_ATTR_F_XXXX */ + u16 flags; +}; + struct uverbs_obj_attr { + /* pointer to the kernel descriptor -> type, access, etc */ + const struct uverbs_obj_type *type; struct ib_uobject *uobject; + /* fd or id in idr of this object */ + int id; }; struct uverbs_attr { - struct uverbs_obj_attr obj_attr; + /* + * pointer to the user-space given attribute, in order to write the + * new uobject's id or update flags. + */ + struct ib_uverbs_attr __user *uattr; + union { + struct uverbs_ptr_attr ptr_attr; + struct uverbs_obj_attr obj_attr; + }; }; struct uverbs_attr_bundle_hash { diff --git a/include/uapi/rdma/rdma_user_ioctl.h b/include/uapi/rdma/rdma_user_ioctl.h index 9388125ad51b..165a27e969d5 100644 --- a/include/uapi/rdma/rdma_user_ioctl.h +++ b/include/uapi/rdma/rdma_user_ioctl.h @@ -43,6 +43,39 @@ /* Legacy name, for user space application which already use it */ #define IB_IOCTL_MAGIC RDMA_IOCTL_MAGIC +#define RDMA_VERBS_IOCTL \ + _IOWR(RDMA_IOCTL_MAGIC, 1, struct ib_uverbs_ioctl_hdr) + +#define UVERBS_ID_NS_MASK 0xF000 +#define UVERBS_ID_NS_SHIFT 12 + +enum { + /* User input */ + UVERBS_ATTR_F_MANDATORY = 1U << 0, + /* + * Valid output bit should be ignored and considered set in + * mandatory fields. This bit is kernel output. + */ + UVERBS_ATTR_F_VALID_OUTPUT = 1U << 1, +}; + +struct ib_uverbs_attr { + __u16 attr_id; /* command specific type attribute */ + __u16 len; /* only for pointers */ + __u16 flags; /* combination of UVERBS_ATTR_F_XXXX */ + __u16 reserved; + __u64 data; /* ptr to command, inline data or idr/fd */ +}; + +struct ib_uverbs_ioctl_hdr { + __u16 length; + __u16 object_id; + __u16 method_id; + __u16 num_attrs; + __u64 reserved; + struct ib_uverbs_attr attrs[0]; +}; + /* * General blocks assignments * It is closed on purpose do not expose it it user space -- cgit v1.2.3 From 64b19e1323e96c34af7ca90d1954e70890c7a98e Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 3 Aug 2017 16:07:03 +0300 Subject: IB/core: Export ioctl enum types to user-space Add a new ib_user_ioctl_verbs.h which exports all required ABI enums and structs to the user-space. Export the default types to user-space through this file. Signed-off-by: Matan Barak Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford --- include/rdma/uverbs_std_types.h | 18 +---------- include/uapi/rdma/ib_user_ioctl_verbs.h | 54 +++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 17 deletions(-) create mode 100644 include/uapi/rdma/ib_user_ioctl_verbs.h (limited to 'include/uapi') diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h index bef74099b7c5..400efe2a4d3c 100644 --- a/include/rdma/uverbs_std_types.h +++ b/include/rdma/uverbs_std_types.h @@ -34,23 +34,7 @@ #define _UVERBS_STD_TYPES__ #include - -enum uverbs_default_objects { - UVERBS_OBJECT_DEVICE, /* No instances of DEVICE are allowed */ - UVERBS_OBJECT_PD, - UVERBS_OBJECT_COMP_CHANNEL, - UVERBS_OBJECT_CQ, - UVERBS_OBJECT_QP, - UVERBS_OBJECT_SRQ, - UVERBS_OBJECT_AH, - UVERBS_OBJECT_MR, - UVERBS_OBJECT_MW, - UVERBS_OBJECT_FLOW, - UVERBS_OBJECT_XRCD, - UVERBS_OBJECT_RWQ_IND_TBL, - UVERBS_OBJECT_WQ, - UVERBS_OBJECT_LAST, -}; +#include extern const struct uverbs_object_def uverbs_object_comp_channel; extern const struct uverbs_object_def uverbs_object_cq; diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h new file mode 100644 index 000000000000..78a2e5be4d6e --- /dev/null +++ b/include/uapi/rdma/ib_user_ioctl_verbs.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef IB_USER_IOCTL_VERBS_H +#define IB_USER_IOCTL_VERBS_H + +enum uverbs_default_objects { + UVERBS_OBJECT_DEVICE, /* No instances of DEVICE are allowed */ + UVERBS_OBJECT_PD, + UVERBS_OBJECT_COMP_CHANNEL, + UVERBS_OBJECT_CQ, + UVERBS_OBJECT_QP, + UVERBS_OBJECT_SRQ, + UVERBS_OBJECT_AH, + UVERBS_OBJECT_MR, + UVERBS_OBJECT_MW, + UVERBS_OBJECT_FLOW, + UVERBS_OBJECT_XRCD, + UVERBS_OBJECT_RWQ_IND_TBL, + UVERBS_OBJECT_WQ, + UVERBS_OBJECT_LAST, +}; + +#endif + -- cgit v1.2.3 From d70724f149b107f8e4062320270d3d8b6713a1bb Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 3 Aug 2017 16:07:04 +0300 Subject: IB/core: Add legacy driver's user-data In this phase, we don't want to change all the drivers to use flexible driver's specific attributes. Therefore, we add two default attributes: UHW_IN and UHW_OUT. These attributes are optional in some methods and they encode the driver specific command data. We add a function that extract this data and creates the legacy udata over it. Driver's data should start from UVERBS_UDATA_DRIVER_DATA_FLAG. This turns on the first bit of the namespace, indicating this attribute belongs to the driver's namespace. Signed-off-by: Matan Barak Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_std_types.c | 40 ++++++++++++++++++++++++++ include/rdma/uverbs_ioctl.h | 46 ++++++++++++++++++++++++++++++ include/uapi/rdma/ib_user_ioctl_verbs.h | 10 +++++++ 3 files changed, 96 insertions(+) (limited to 'include/uapi') diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index 5f90978bda8d..db66c18857e4 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -209,6 +209,46 @@ static int uverbs_hot_unplug_completion_event_file(struct ib_uobject_file *uobj_ return 0; }; +/* + * This spec is used in order to pass information to the hardware driver in a + * legacy way. Every verb that could get driver specific data should get this + * spec. + */ +static const struct uverbs_attr_def uverbs_uhw_compat_in = + UVERBS_ATTR_PTR_IN_SZ(UVERBS_UHW_IN, 0, UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ)); +static const struct uverbs_attr_def uverbs_uhw_compat_out = + UVERBS_ATTR_PTR_OUT_SZ(UVERBS_UHW_OUT, 0, UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ)); + +static void create_udata(struct uverbs_attr_bundle *ctx, + struct ib_udata *udata) +{ + /* + * This is for ease of conversion. The purpose is to convert all drivers + * to use uverbs_attr_bundle instead of ib_udata. + * Assume attr == 0 is input and attr == 1 is output. + */ + void __user *inbuf; + size_t inbuf_len = 0; + void __user *outbuf; + size_t outbuf_len = 0; + const struct uverbs_attr *uhw_in = + uverbs_attr_get(ctx, UVERBS_UHW_IN); + const struct uverbs_attr *uhw_out = + uverbs_attr_get(ctx, UVERBS_UHW_OUT); + + if (!IS_ERR(uhw_in)) { + inbuf = uhw_in->ptr_attr.ptr; + inbuf_len = uhw_in->ptr_attr.len; + } + + if (!IS_ERR(uhw_out)) { + outbuf = uhw_out->ptr_attr.ptr; + outbuf_len = uhw_out->ptr_attr.len; + } + + INIT_UDATA_BUF_OR_NULL(udata, inbuf, outbuf, inbuf_len, outbuf_len); +} + DECLARE_UVERBS_OBJECT(uverbs_object_comp_channel, UVERBS_OBJECT_COMP_CHANNEL, &UVERBS_TYPE_ALLOC_FD(0, diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 9a8d217cdc1d..759afa0621ea 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -36,6 +36,7 @@ #include #include #include +#include /* * ======================================= @@ -338,6 +339,51 @@ static inline bool uverbs_attr_is_valid(const struct uverbs_attr_bundle *attrs_b idx & ~UVERBS_ID_NS_MASK); } +static inline const struct uverbs_attr *uverbs_attr_get(const struct uverbs_attr_bundle *attrs_bundle, + u16 idx) +{ + u16 idx_bucket = idx >> UVERBS_ID_NS_SHIFT; + + if (!uverbs_attr_is_valid(attrs_bundle, idx)) + return ERR_PTR(-ENOENT); + + return &attrs_bundle->hash[idx_bucket].attrs[idx & ~UVERBS_ID_NS_MASK]; +} + +static inline int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle, + size_t idx, const void *from) +{ + const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx); + u16 flags; + + if (IS_ERR(attr)) + return PTR_ERR(attr); + + flags = attr->ptr_attr.flags | UVERBS_ATTR_F_VALID_OUTPUT; + return (!copy_to_user(attr->ptr_attr.ptr, from, attr->ptr_attr.len) && + !put_user(flags, &attr->uattr->flags)) ? 0 : -EFAULT; +} + +static inline int _uverbs_copy_from(void *to, size_t to_size, + const struct uverbs_attr_bundle *attrs_bundle, + size_t idx) +{ + const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx); + + if (IS_ERR(attr)) + return PTR_ERR(attr); + + if (to_size <= sizeof(((struct ib_uverbs_attr *)0)->data)) + memcpy(to, &attr->ptr_attr.data, attr->ptr_attr.len); + else if (copy_from_user(to, attr->ptr_attr.ptr, attr->ptr_attr.len)) + return -EFAULT; + + return 0; +} + +#define uverbs_copy_from(to, attrs_bundle, idx) \ + _uverbs_copy_from(to, sizeof(*(to)), attrs_bundle, idx) + /* ================================================= * Definitions -> Specs infrastructure * ================================================= diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h index 78a2e5be4d6e..90f81eeca35b 100644 --- a/include/uapi/rdma/ib_user_ioctl_verbs.h +++ b/include/uapi/rdma/ib_user_ioctl_verbs.h @@ -33,6 +33,11 @@ #ifndef IB_USER_IOCTL_VERBS_H #define IB_USER_IOCTL_VERBS_H +#include + +#define UVERBS_UDATA_DRIVER_DATA_NS 1 +#define UVERBS_UDATA_DRIVER_DATA_FLAG (1UL << UVERBS_ID_NS_SHIFT) + enum uverbs_default_objects { UVERBS_OBJECT_DEVICE, /* No instances of DEVICE are allowed */ UVERBS_OBJECT_PD, @@ -50,5 +55,10 @@ enum uverbs_default_objects { UVERBS_OBJECT_LAST, }; +enum { + UVERBS_UHW_IN = UVERBS_UDATA_DRIVER_DATA_FLAG, + UVERBS_UHW_OUT, +}; + #endif -- cgit v1.2.3 From 9ee79fce364216df35ec46e26d20780c3c1644cc Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 3 Aug 2017 16:07:05 +0300 Subject: IB/core: Add completion queue (cq) object actions Adding CQ ioctl actions: 1. create_cq 2. destroy_cq This requires adding the following: 1. A specification describing the method a. Handler b. Attributes specification Each attribute is one of the following: a. PTR_IN - input data Note: This could be encoded inlined for data < 64bit b. PTR_OUT - response data c. IDR - idr based object d. FD - fd based object Blobs attributes (clauses a and b) contain their type, while objects specifications (clauses c and d) contains the expected object type (for example, the given id should be UVERBS_TYPE_PD) and the required access (READ, WRITE, NEW or DESTROY). If a NEW is required, the new object's id will be assigned to this attribute. All attributes could get UA_FLAGS attribute. Currently we support stating that an attribute is mandatory or that the specification size corresponds to a lower bound (and that this attribute could be extended). We currently add both default attributes and the two generic UHW_IN and UHW_OUT driver specific attributes. 2. Handler A handler gets a uverbs_attr_bundle. The handler developer uses uverbs_attr_get to fetch an attribute of a given id. Each of these attribute groups correspond to the specification group defined in the action (clauses 1.b and 1.c respectively). The indices of these arrays corresponds to the attribute ids declared in the specifications (clause 2). The handler is quite simple. It assumes the infrastructure fetched all objects and locked, created or destroyed them as required by the specification. Pointer (or blob) attributes were validated to match their required sizes. After the handler finished, the infrastructure commits or rollbacks the objects. Signed-off-by: Matan Barak Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_std_types.c | 138 ++++++++++++++++++++++++++++- include/uapi/rdma/ib_user_ioctl_verbs.h | 20 +++++ 2 files changed, 157 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index db66c18857e4..0a98579700ec 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -249,6 +249,140 @@ static void create_udata(struct uverbs_attr_bundle *ctx, INIT_UDATA_BUF_OR_NULL(udata, inbuf, outbuf, inbuf_len, outbuf_len); } +static int uverbs_create_cq_handler(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs) +{ + struct ib_ucontext *ucontext = file->ucontext; + struct ib_ucq_object *obj; + struct ib_udata uhw; + int ret; + u64 user_handle; + struct ib_cq_init_attr attr = {}; + struct ib_cq *cq; + struct ib_uverbs_completion_event_file *ev_file = NULL; + const struct uverbs_attr *ev_file_attr; + struct ib_uobject *ev_file_uobj; + + if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_CREATE_CQ)) + return -EOPNOTSUPP; + + ret = uverbs_copy_from(&attr.comp_vector, attrs, CREATE_CQ_COMP_VECTOR); + if (!ret) + ret = uverbs_copy_from(&attr.cqe, attrs, CREATE_CQ_CQE); + if (!ret) + ret = uverbs_copy_from(&user_handle, attrs, CREATE_CQ_USER_HANDLE); + if (ret) + return ret; + + /* Optional param, if it doesn't exist, we get -ENOENT and skip it */ + if (uverbs_copy_from(&attr.flags, attrs, CREATE_CQ_FLAGS) == -EFAULT) + return -EFAULT; + + ev_file_attr = uverbs_attr_get(attrs, CREATE_CQ_COMP_CHANNEL); + if (!IS_ERR(ev_file_attr)) { + ev_file_uobj = ev_file_attr->obj_attr.uobject; + + ev_file = container_of(ev_file_uobj, + struct ib_uverbs_completion_event_file, + uobj_file.uobj); + uverbs_uobject_get(ev_file_uobj); + } + + if (attr.comp_vector >= ucontext->ufile->device->num_comp_vectors) { + ret = -EINVAL; + goto err_event_file; + } + + obj = container_of(uverbs_attr_get(attrs, CREATE_CQ_HANDLE)->obj_attr.uobject, + typeof(*obj), uobject); + obj->uverbs_file = ucontext->ufile; + obj->comp_events_reported = 0; + obj->async_events_reported = 0; + INIT_LIST_HEAD(&obj->comp_list); + INIT_LIST_HEAD(&obj->async_list); + + /* Temporary, only until drivers get the new uverbs_attr_bundle */ + create_udata(attrs, &uhw); + + cq = ib_dev->create_cq(ib_dev, &attr, ucontext, &uhw); + if (IS_ERR(cq)) { + ret = PTR_ERR(cq); + goto err_event_file; + } + + cq->device = ib_dev; + cq->uobject = &obj->uobject; + cq->comp_handler = ib_uverbs_comp_handler; + cq->event_handler = ib_uverbs_cq_event_handler; + cq->cq_context = &ev_file->ev_queue; + obj->uobject.object = cq; + obj->uobject.user_handle = user_handle; + atomic_set(&cq->usecnt, 0); + + ret = uverbs_copy_to(attrs, CREATE_CQ_RESP_CQE, &cq->cqe); + if (ret) + goto err_cq; + + return 0; +err_cq: + ib_destroy_cq(cq); + +err_event_file: + if (ev_file) + uverbs_uobject_put(ev_file_uobj); + return ret; +}; + +static DECLARE_UVERBS_METHOD( + uverbs_method_cq_create, UVERBS_CQ_CREATE, uverbs_create_cq_handler, + &UVERBS_ATTR_IDR(CREATE_CQ_HANDLE, UVERBS_OBJECT_CQ, UVERBS_ACCESS_NEW, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &UVERBS_ATTR_PTR_IN(CREATE_CQ_CQE, u32, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &UVERBS_ATTR_PTR_IN(CREATE_CQ_USER_HANDLE, u64, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &UVERBS_ATTR_FD(CREATE_CQ_COMP_CHANNEL, UVERBS_OBJECT_COMP_CHANNEL, + UVERBS_ACCESS_READ), + &UVERBS_ATTR_PTR_IN(CREATE_CQ_COMP_VECTOR, u32, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &UVERBS_ATTR_PTR_IN(CREATE_CQ_FLAGS, u32), + &UVERBS_ATTR_PTR_OUT(CREATE_CQ_RESP_CQE, u32, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &uverbs_uhw_compat_in, &uverbs_uhw_compat_out); + +static int uverbs_destroy_cq_handler(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs) +{ + struct ib_uverbs_destroy_cq_resp resp; + struct ib_uobject *uobj = + uverbs_attr_get(attrs, DESTROY_CQ_HANDLE)->obj_attr.uobject; + struct ib_ucq_object *obj = container_of(uobj, struct ib_ucq_object, + uobject); + int ret; + + if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_DESTROY_CQ)) + return -EOPNOTSUPP; + + ret = rdma_explicit_destroy(uobj); + if (ret) + return ret; + + resp.comp_events_reported = obj->comp_events_reported; + resp.async_events_reported = obj->async_events_reported; + + return uverbs_copy_to(attrs, DESTROY_CQ_RESP, &resp); +} + +static DECLARE_UVERBS_METHOD( + uverbs_method_cq_destroy, UVERBS_CQ_DESTROY, uverbs_destroy_cq_handler, + &UVERBS_ATTR_IDR(DESTROY_CQ_HANDLE, UVERBS_OBJECT_CQ, + UVERBS_ACCESS_DESTROY, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &UVERBS_ATTR_PTR_OUT(DESTROY_CQ_RESP, struct ib_uverbs_destroy_cq_resp, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + DECLARE_UVERBS_OBJECT(uverbs_object_comp_channel, UVERBS_OBJECT_COMP_CHANNEL, &UVERBS_TYPE_ALLOC_FD(0, @@ -259,7 +393,9 @@ DECLARE_UVERBS_OBJECT(uverbs_object_comp_channel, DECLARE_UVERBS_OBJECT(uverbs_object_cq, UVERBS_OBJECT_CQ, &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), 0, - uverbs_free_cq)); + uverbs_free_cq), + &uverbs_method_cq_create, + &uverbs_method_cq_destroy); DECLARE_UVERBS_OBJECT(uverbs_object_qp, UVERBS_OBJECT_QP, &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), 0, diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h index 90f81eeca35b..842792eae383 100644 --- a/include/uapi/rdma/ib_user_ioctl_verbs.h +++ b/include/uapi/rdma/ib_user_ioctl_verbs.h @@ -60,5 +60,25 @@ enum { UVERBS_UHW_OUT, }; +enum uverbs_create_cq_cmd_attr_ids { + CREATE_CQ_HANDLE, + CREATE_CQ_CQE, + CREATE_CQ_USER_HANDLE, + CREATE_CQ_COMP_CHANNEL, + CREATE_CQ_COMP_VECTOR, + CREATE_CQ_FLAGS, + CREATE_CQ_RESP_CQE, +}; + +enum uverbs_destroy_cq_cmd_attr_ids { + DESTROY_CQ_HANDLE, + DESTROY_CQ_RESP, +}; + +enum uverbs_actions_cq_ops { + UVERBS_CQ_CREATE, + UVERBS_CQ_DESTROY, +}; + #endif -- cgit v1.2.3