diff options
Diffstat (limited to 'drivers/infiniband/hw/mlx5/main.c')
-rw-r--r-- | drivers/infiniband/hw/mlx5/main.c | 513 |
1 files changed, 442 insertions, 71 deletions
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index b4d8ff8ab807..e52dd21519b4 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -982,13 +982,21 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) { - resp.cqe_comp_caps.max_num = - MLX5_CAP_GEN(dev->mdev, cqe_compression) ? - MLX5_CAP_GEN(dev->mdev, cqe_compression_max_num) : 0; - resp.cqe_comp_caps.supported_format = - MLX5_IB_CQE_RES_FORMAT_HASH | - MLX5_IB_CQE_RES_FORMAT_CSUM; resp.response_length += sizeof(resp.cqe_comp_caps); + + if (MLX5_CAP_GEN(dev->mdev, cqe_compression)) { + resp.cqe_comp_caps.max_num = + MLX5_CAP_GEN(dev->mdev, + cqe_compression_max_num); + + resp.cqe_comp_caps.supported_format = + MLX5_IB_CQE_RES_FORMAT_HASH | + MLX5_IB_CQE_RES_FORMAT_CSUM; + + if (MLX5_CAP_GEN(dev->mdev, mini_cqe_resp_stride_index)) + resp.cqe_comp_caps.supported_format |= + MLX5_IB_CQE_RES_FORMAT_CSUM_STRIDX; + } } if (field_avail(typeof(resp), packet_pacing_caps, uhw->outlen) && @@ -1084,6 +1092,14 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) resp.tunnel_offloads_caps |= MLX5_IB_TUNNELED_OFFLOADS_GRE; + if (MLX5_CAP_GEN(mdev, flex_parser_protocols) & + MLX5_FLEX_PROTO_CW_MPLS_GRE) + resp.tunnel_offloads_caps |= + MLX5_IB_TUNNELED_OFFLOADS_MPLS_GRE; + if (MLX5_CAP_GEN(mdev, flex_parser_protocols) & + MLX5_FLEX_PROTO_CW_MPLS_UDP) + resp.tunnel_offloads_caps |= + MLX5_IB_TUNNELED_OFFLOADS_MPLS_UDP; } if (uhw->outlen) { @@ -1953,49 +1969,15 @@ static int mlx5_ib_set_vma_data(struct vm_area_struct *vma, static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext) { - int ret; struct vm_area_struct *vma; struct mlx5_ib_vma_private_data *vma_private, *n; struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); - struct task_struct *owning_process = NULL; - struct mm_struct *owning_mm = NULL; - owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID); - if (!owning_process) - return; - - owning_mm = get_task_mm(owning_process); - if (!owning_mm) { - pr_info("no mm, disassociate ucontext is pending task termination\n"); - while (1) { - put_task_struct(owning_process); - usleep_range(1000, 2000); - owning_process = get_pid_task(ibcontext->tgid, - PIDTYPE_PID); - if (!owning_process || - owning_process->state == TASK_DEAD) { - pr_info("disassociate ucontext done, task was terminated\n"); - /* in case task was dead need to release the - * task struct. - */ - if (owning_process) - put_task_struct(owning_process); - return; - } - } - } - - /* need to protect from a race on closing the vma as part of - * mlx5_ib_vma_close. - */ - down_write(&owning_mm->mmap_sem); mutex_lock(&context->vma_private_list_mutex); list_for_each_entry_safe(vma_private, n, &context->vma_private_list, list) { vma = vma_private->vma; - ret = zap_vma_ptes(vma, vma->vm_start, - PAGE_SIZE); - WARN_ONCE(ret, "%s: zap_vma_ptes failed", __func__); + zap_vma_ptes(vma, vma->vm_start, PAGE_SIZE); /* context going to be destroyed, should * not access ops any more. */ @@ -2005,9 +1987,6 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext) kfree(vma_private); } mutex_unlock(&context->vma_private_list_mutex); - up_write(&owning_mm->mmap_sem); - mmput(owning_mm); - put_task_struct(owning_process); } static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd) @@ -2051,10 +2030,6 @@ static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev, if (err) return err; - mlx5_ib_dbg(dev, "mapped clock info at 0x%lx, PA 0x%llx\n", - vma->vm_start, - (unsigned long long)pfn << PAGE_SHIFT); - return mlx5_ib_set_vma_data(vma, context); } @@ -2149,15 +2124,14 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, err = io_remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE, vma->vm_page_prot); if (err) { - mlx5_ib_err(dev, "io_remap_pfn_range failed with error=%d, vm_start=0x%lx, pfn=%pa, mmap_cmd=%s\n", - err, vma->vm_start, &pfn, mmap_cmd2str(cmd)); + mlx5_ib_err(dev, + "io_remap_pfn_range failed with error=%d, mmap_cmd=%s\n", + err, mmap_cmd2str(cmd)); err = -EAGAIN; goto err; } pa = pfn << PAGE_SHIFT; - mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA %pa\n", mmap_cmd2str(cmd), - vma->vm_start, &pa); err = mlx5_ib_set_vma_data(vma, context); if (err) @@ -2243,10 +2217,6 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm if (io_remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE, vma->vm_page_prot)) return -EAGAIN; - - mlx5_ib_dbg(dev, "mapped internal timer at 0x%lx, PA 0x%llx\n", - vma->vm_start, - (unsigned long long)pfn << PAGE_SHIFT); break; case MLX5_IB_MMAP_CLOCK_INFO: return mlx5_ib_mmap_clock_info_page(dev, vma, context); @@ -2386,7 +2356,8 @@ static int mlx5_ib_dealloc_pd(struct ib_pd *pd) enum { MATCH_CRITERIA_ENABLE_OUTER_BIT, MATCH_CRITERIA_ENABLE_MISC_BIT, - MATCH_CRITERIA_ENABLE_INNER_BIT + MATCH_CRITERIA_ENABLE_INNER_BIT, + MATCH_CRITERIA_ENABLE_MISC2_BIT }; #define HEADER_IS_ZERO(match_criteria, headers) \ @@ -2406,6 +2377,9 @@ static u8 get_match_criteria_enable(u32 *match_criteria) match_criteria_enable |= (!HEADER_IS_ZERO(match_criteria, inner_headers)) << MATCH_CRITERIA_ENABLE_INNER_BIT; + match_criteria_enable |= + (!HEADER_IS_ZERO(match_criteria, misc_parameters_2)) << + MATCH_CRITERIA_ENABLE_MISC2_BIT; return match_criteria_enable; } @@ -2416,7 +2390,7 @@ static void set_proto(void *outer_c, void *outer_v, u8 mask, u8 val) MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val); } -static void set_flow_label(void *misc_c, void *misc_v, u8 mask, u8 val, +static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val, bool inner) { if (inner) { @@ -2440,6 +2414,27 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val) MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2); } +static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask) +{ + if (MLX5_GET(fte_match_mpls, set_mask, mpls_label) && + !(field_support & MLX5_FIELD_SUPPORT_MPLS_LABEL)) + return -EOPNOTSUPP; + + if (MLX5_GET(fte_match_mpls, set_mask, mpls_exp) && + !(field_support & MLX5_FIELD_SUPPORT_MPLS_EXP)) + return -EOPNOTSUPP; + + if (MLX5_GET(fte_match_mpls, set_mask, mpls_s_bos) && + !(field_support & MLX5_FIELD_SUPPORT_MPLS_S_BOS)) + return -EOPNOTSUPP; + + if (MLX5_GET(fte_match_mpls, set_mask, mpls_ttl) && + !(field_support & MLX5_FIELD_SUPPORT_MPLS_TTL)) + return -EOPNOTSUPP; + + return 0; +} + #define LAST_ETH_FIELD vlan_tag #define LAST_IB_FIELD sl #define LAST_IPV4_FIELD tos @@ -2448,6 +2443,7 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val) #define LAST_TUNNEL_FIELD tunnel_id #define LAST_FLOW_TAG_FIELD tag_id #define LAST_DROP_FIELD size +#define LAST_COUNTERS_FIELD counters /* Field is the last supported field */ #define FIELDS_NOT_SUPPORTED(filter, field)\ @@ -2479,12 +2475,16 @@ static int parse_flow_flow_action(const union ib_flow_spec *ib_spec, static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, u32 *match_v, const union ib_flow_spec *ib_spec, const struct ib_flow_attr *flow_attr, - struct mlx5_flow_act *action) + struct mlx5_flow_act *action, u32 prev_type) { void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c, misc_parameters); void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v, misc_parameters); + void *misc_params2_c = MLX5_ADDR_OF(fte_match_param, match_c, + misc_parameters_2); + void *misc_params2_v = MLX5_ADDR_OF(fte_match_param, match_v, + misc_parameters_2); void *headers_c; void *headers_v; int match_ipv; @@ -2689,6 +2689,93 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, ntohs(ib_spec->tcp_udp.val.dst_port)); break; + case IB_FLOW_SPEC_GRE: + if (ib_spec->gre.mask.c_ks_res0_ver) + return -EOPNOTSUPP; + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, + 0xff); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, + IPPROTO_GRE); + + MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol, + 0xffff); + MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol, + ntohs(ib_spec->gre.val.protocol)); + + memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c, + gre_key_h), + &ib_spec->gre.mask.key, + sizeof(ib_spec->gre.mask.key)); + memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v, + gre_key_h), + &ib_spec->gre.val.key, + sizeof(ib_spec->gre.val.key)); + break; + case IB_FLOW_SPEC_MPLS: + switch (prev_type) { + case IB_FLOW_SPEC_UDP: + if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.outer_first_mpls_over_udp), + &ib_spec->mpls.mask.tag)) + return -EOPNOTSUPP; + + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, + outer_first_mpls_over_udp), + &ib_spec->mpls.val.tag, + sizeof(ib_spec->mpls.val.tag)); + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, + outer_first_mpls_over_udp), + &ib_spec->mpls.mask.tag, + sizeof(ib_spec->mpls.mask.tag)); + break; + case IB_FLOW_SPEC_GRE: + if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.outer_first_mpls_over_gre), + &ib_spec->mpls.mask.tag)) + return -EOPNOTSUPP; + + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, + outer_first_mpls_over_gre), + &ib_spec->mpls.val.tag, + sizeof(ib_spec->mpls.val.tag)); + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, + outer_first_mpls_over_gre), + &ib_spec->mpls.mask.tag, + sizeof(ib_spec->mpls.mask.tag)); + break; + default: + if (ib_spec->type & IB_FLOW_SPEC_INNER) { + if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.inner_first_mpls), + &ib_spec->mpls.mask.tag)) + return -EOPNOTSUPP; + + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, + inner_first_mpls), + &ib_spec->mpls.val.tag, + sizeof(ib_spec->mpls.val.tag)); + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, + inner_first_mpls), + &ib_spec->mpls.mask.tag, + sizeof(ib_spec->mpls.mask.tag)); + } else { + if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.outer_first_mpls), + &ib_spec->mpls.mask.tag)) + return -EOPNOTSUPP; + + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, + outer_first_mpls), + &ib_spec->mpls.val.tag, + sizeof(ib_spec->mpls.val.tag)); + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, + outer_first_mpls), + &ib_spec->mpls.mask.tag, + sizeof(ib_spec->mpls.mask.tag)); + } + } + break; case IB_FLOW_SPEC_VXLAN_TUNNEL: if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask, LAST_TUNNEL_FIELD)) @@ -2720,6 +2807,18 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, if (ret) return ret; break; + case IB_FLOW_SPEC_ACTION_COUNT: + if (FIELDS_NOT_SUPPORTED(ib_spec->flow_count, + LAST_COUNTERS_FIELD)) + return -EOPNOTSUPP; + + /* for now support only one counters spec per flow */ + if (action->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) + return -EINVAL; + + action->counters = ib_spec->flow_count.counters; + action->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + break; default: return -EINVAL; } @@ -2867,6 +2966,17 @@ static void put_flow_table(struct mlx5_ib_dev *dev, } } +static void counters_clear_description(struct ib_counters *counters) +{ + struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); + + mutex_lock(&mcounters->mcntrs_mutex); + kfree(mcounters->counters_data); + mcounters->counters_data = NULL; + mcounters->cntrs_max_index = 0; + mutex_unlock(&mcounters->mcntrs_mutex); +} + static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) { struct mlx5_ib_dev *dev = to_mdev(flow_id->qp->device); @@ -2886,8 +2996,11 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) mlx5_del_flow_rules(handler->rule); put_flow_table(dev, handler->prio, true); - mutex_unlock(&dev->flow_db->lock); + if (handler->ibcounters && + atomic_read(&handler->ibcounters->usecnt) == 1) + counters_clear_description(handler->ibcounters); + mutex_unlock(&dev->flow_db->lock); kfree(handler); return 0; @@ -3007,21 +3120,143 @@ static void set_underlay_qp(struct mlx5_ib_dev *dev, } } +static int read_flow_counters(struct ib_device *ibdev, + struct mlx5_read_counters_attr *read_attr) +{ + struct mlx5_fc *fc = read_attr->hw_cntrs_hndl; + struct mlx5_ib_dev *dev = to_mdev(ibdev); + + return mlx5_fc_query(dev->mdev, fc, + &read_attr->out[IB_COUNTER_PACKETS], + &read_attr->out[IB_COUNTER_BYTES]); +} + +/* flow counters currently expose two counters packets and bytes */ +#define FLOW_COUNTERS_NUM 2 +static int counters_set_description(struct ib_counters *counters, + enum mlx5_ib_counters_type counters_type, + struct mlx5_ib_flow_counters_desc *desc_data, + u32 ncounters) +{ + struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); + u32 cntrs_max_index = 0; + int i; + + if (counters_type != MLX5_IB_COUNTERS_FLOW) + return -EINVAL; + + /* init the fields for the object */ + mcounters->type = counters_type; + mcounters->read_counters = read_flow_counters; + mcounters->counters_num = FLOW_COUNTERS_NUM; + mcounters->ncounters = ncounters; + /* each counter entry have both description and index pair */ + for (i = 0; i < ncounters; i++) { + if (desc_data[i].description > IB_COUNTER_BYTES) + return -EINVAL; + + if (cntrs_max_index <= desc_data[i].index) + cntrs_max_index = desc_data[i].index + 1; + } + + mutex_lock(&mcounters->mcntrs_mutex); + mcounters->counters_data = desc_data; + mcounters->cntrs_max_index = cntrs_max_index; + mutex_unlock(&mcounters->mcntrs_mutex); + + return 0; +} + +#define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2)) +static int flow_counters_set_data(struct ib_counters *ibcounters, + struct mlx5_ib_create_flow *ucmd) +{ + struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters); + struct mlx5_ib_flow_counters_data *cntrs_data = NULL; + struct mlx5_ib_flow_counters_desc *desc_data = NULL; + bool hw_hndl = false; + int ret = 0; + + if (ucmd && ucmd->ncounters_data != 0) { + cntrs_data = ucmd->data; + if (cntrs_data->ncounters > MAX_COUNTERS_NUM) + return -EINVAL; + + desc_data = kcalloc(cntrs_data->ncounters, + sizeof(*desc_data), + GFP_KERNEL); + if (!desc_data) + return -ENOMEM; + + if (copy_from_user(desc_data, + u64_to_user_ptr(cntrs_data->counters_data), + sizeof(*desc_data) * cntrs_data->ncounters)) { + ret = -EFAULT; + goto free; + } + } + + if (!mcounters->hw_cntrs_hndl) { + mcounters->hw_cntrs_hndl = mlx5_fc_create( + to_mdev(ibcounters->device)->mdev, false); + if (!mcounters->hw_cntrs_hndl) { + ret = -ENOMEM; + goto free; + } + hw_hndl = true; + } + + if (desc_data) { + /* counters already bound to at least one flow */ + if (mcounters->cntrs_max_index) { + ret = -EINVAL; + goto free_hndl; + } + + ret = counters_set_description(ibcounters, + MLX5_IB_COUNTERS_FLOW, + desc_data, + cntrs_data->ncounters); + if (ret) + goto free_hndl; + + } else if (!mcounters->cntrs_max_index) { + /* counters not bound yet, must have udata passed */ + ret = -EINVAL; + goto free_hndl; + } + + return 0; + +free_hndl: + if (hw_hndl) { + mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev, + mcounters->hw_cntrs_hndl); + mcounters->hw_cntrs_hndl = NULL; + } +free: + kfree(desc_data); + return ret; +} + static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, struct mlx5_ib_flow_prio *ft_prio, const struct ib_flow_attr *flow_attr, struct mlx5_flow_destination *dst, - u32 underlay_qpn) + u32 underlay_qpn, + struct mlx5_ib_create_flow *ucmd) { struct mlx5_flow_table *ft = ft_prio->flow_table; struct mlx5_ib_flow_handler *handler; struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG}; struct mlx5_flow_spec *spec; - struct mlx5_flow_destination *rule_dst = dst; + struct mlx5_flow_destination dest_arr[2] = {}; + struct mlx5_flow_destination *rule_dst = dest_arr; const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr); unsigned int spec_index; + u32 prev_type = 0; int err = 0; - int dest_num = 1; + int dest_num = 0; bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS; if (!is_valid_attr(dev->mdev, flow_attr)) @@ -3035,14 +3270,20 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, } INIT_LIST_HEAD(&handler->list); + if (dst) { + memcpy(&dest_arr[0], dst, sizeof(*dst)); + dest_num++; + } for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { err = parse_flow_attr(dev->mdev, spec->match_criteria, spec->match_value, - ib_flow, flow_attr, &flow_act); + ib_flow, flow_attr, &flow_act, + prev_type); if (err < 0) goto free; + prev_type = ((union ib_flow_spec *)ib_flow)->type; ib_flow += ((union ib_flow_spec *)ib_flow)->size; } @@ -3069,15 +3310,30 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, goto free; } + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + err = flow_counters_set_data(flow_act.counters, ucmd); + if (err) + goto free; + + handler->ibcounters = flow_act.counters; + dest_arr[dest_num].type = + MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest_arr[dest_num].counter = + to_mcounters(flow_act.counters)->hw_cntrs_hndl; + dest_num++; + } + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) { - rule_dst = NULL; - dest_num = 0; + if (!(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT)) { + rule_dst = NULL; + dest_num = 0; + } } else { if (is_egress) flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW; else flow_act.action |= - dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST : + dest_num ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST : MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; } @@ -3103,8 +3359,12 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, ft_prio->flow_table = ft; free: - if (err) + if (err && handler) { + if (handler->ibcounters && + atomic_read(&handler->ibcounters->usecnt) == 1) + counters_clear_description(handler->ibcounters); kfree(handler); + } kvfree(spec); return err ? ERR_PTR(err) : handler; } @@ -3114,7 +3374,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, const struct ib_flow_attr *flow_attr, struct mlx5_flow_destination *dst) { - return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0); + return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL); } static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *dev, @@ -3244,7 +3504,8 @@ err: static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr, - int domain) + int domain, + struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(qp->device); struct mlx5_ib_qp *mqp = to_mqp(qp); @@ -3253,9 +3514,44 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, struct mlx5_ib_flow_prio *ft_prio_tx = NULL; struct mlx5_ib_flow_prio *ft_prio; bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS; + struct mlx5_ib_create_flow *ucmd = NULL, ucmd_hdr; + size_t min_ucmd_sz, required_ucmd_sz; int err; int underlay_qpn; + if (udata && udata->inlen) { + min_ucmd_sz = offsetof(typeof(ucmd_hdr), reserved) + + sizeof(ucmd_hdr.reserved); + if (udata->inlen < min_ucmd_sz) + return ERR_PTR(-EOPNOTSUPP); + + err = ib_copy_from_udata(&ucmd_hdr, udata, min_ucmd_sz); + if (err) + return ERR_PTR(err); + + /* currently supports only one counters data */ + if (ucmd_hdr.ncounters_data > 1) + return ERR_PTR(-EINVAL); + + required_ucmd_sz = min_ucmd_sz + + sizeof(struct mlx5_ib_flow_counters_data) * + ucmd_hdr.ncounters_data; + if (udata->inlen > required_ucmd_sz && + !ib_is_udata_cleared(udata, required_ucmd_sz, + udata->inlen - required_ucmd_sz)) + return ERR_PTR(-EOPNOTSUPP); + + ucmd = kzalloc(required_ucmd_sz, GFP_KERNEL); + if (!ucmd) + return ERR_PTR(-ENOMEM); + + err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz); + if (err) { + kfree(ucmd); + return ERR_PTR(err); + } + } + if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) return ERR_PTR(-ENOMEM); @@ -3309,7 +3605,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, underlay_qpn = (mqp->flags & MLX5_IB_QP_UNDERLAY) ? mqp->underlay_qpn : 0; handler = _create_flow_rule(dev, ft_prio, flow_attr, - dst, underlay_qpn); + dst, underlay_qpn, ucmd); } } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { @@ -3330,6 +3626,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, mutex_unlock(&dev->flow_db->lock); kfree(dst); + kfree(ucmd); return &handler->ibflow; @@ -3340,6 +3637,7 @@ destroy_ft: unlock: mutex_unlock(&dev->flow_db->lock); kfree(dst); + kfree(ucmd); kfree(handler); return ERR_PTR(err); } @@ -5000,6 +5298,76 @@ static void depopulate_specs_root(struct mlx5_ib_dev *dev) uverbs_free_spec_tree(dev->ib_dev.specs_root); } +static int mlx5_ib_read_counters(struct ib_counters *counters, + struct ib_counters_read_attr *read_attr, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); + struct mlx5_read_counters_attr mread_attr = {}; + struct mlx5_ib_flow_counters_desc *desc; + int ret, i; + + mutex_lock(&mcounters->mcntrs_mutex); + if (mcounters->cntrs_max_index > read_attr->ncounters) { + ret = -EINVAL; + goto err_bound; + } + + mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64), + GFP_KERNEL); + if (!mread_attr.out) { + ret = -ENOMEM; + goto err_bound; + } + + mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl; + mread_attr.flags = read_attr->flags; + ret = mcounters->read_counters(counters->device, &mread_attr); + if (ret) + goto err_read; + + /* do the pass over the counters data array to assign according to the + * descriptions and indexing pairs + */ + desc = mcounters->counters_data; + for (i = 0; i < mcounters->ncounters; i++) + read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description]; + +err_read: + kfree(mread_attr.out); +err_bound: + mutex_unlock(&mcounters->mcntrs_mutex); + return ret; +} + +static int mlx5_ib_destroy_counters(struct ib_counters *counters) +{ + struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); + + counters_clear_description(counters); + if (mcounters->hw_cntrs_hndl) + mlx5_fc_destroy(to_mdev(counters->device)->mdev, + mcounters->hw_cntrs_hndl); + + kfree(mcounters); + + return 0; +} + +static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_mcounters *mcounters; + + mcounters = kzalloc(sizeof(*mcounters), GFP_KERNEL); + if (!mcounters) + return ERR_PTR(-ENOMEM); + + mutex_init(&mcounters->mcntrs_mutex); + + return &mcounters->ibcntrs; +} + void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) { mlx5_ib_cleanup_multiport_master(dev); @@ -5243,6 +5611,9 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) dev->ib_dev.destroy_flow_action = mlx5_ib_destroy_flow_action; dev->ib_dev.modify_flow_action_esp = mlx5_ib_modify_flow_action_esp; dev->ib_dev.driver_id = RDMA_DRIVER_MLX5; + dev->ib_dev.create_counters = mlx5_ib_create_counters; + dev->ib_dev.destroy_counters = mlx5_ib_destroy_counters; + dev->ib_dev.read_counters = mlx5_ib_read_counters; err = init_node_data(dev); if (err) |