diff options
Diffstat (limited to 'drivers/infiniband')
31 files changed, 392 insertions, 238 deletions
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index b7c078b7f7cf..a1291f475466 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -582,8 +582,8 @@ static int __ib_cache_gid_add(struct ib_device *ib_dev, u32 port, out_unlock: mutex_unlock(&table->lock); if (ret) - pr_warn("%s: unable to add gid %pI6 error=%d\n", - __func__, gid->raw, ret); + pr_warn_ratelimited("%s: unable to add gid %pI6 error=%d\n", + __func__, gid->raw, ret); return ret; } diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 07fb8d3c037f..d45e3909dafe 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -166,7 +166,7 @@ struct cm_port { struct cm_device { struct kref kref; struct list_head list; - spinlock_t mad_agent_lock; + rwlock_t mad_agent_lock; struct ib_device *ib_device; u8 ack_delay; int going_down; @@ -284,7 +284,7 @@ static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv) if (!cm_id_priv->av.port) return ERR_PTR(-EINVAL); - spin_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock); + read_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock); mad_agent = cm_id_priv->av.port->mad_agent; if (!mad_agent) { m = ERR_PTR(-EINVAL); @@ -315,7 +315,7 @@ static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv) m->context[0] = cm_id_priv; out: - spin_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock); + read_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock); return m; } @@ -1294,10 +1294,10 @@ static __be64 cm_form_tid(struct cm_id_private *cm_id_priv) if (!cm_id_priv->av.port) return cpu_to_be64(low_tid); - spin_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock); + read_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock); if (cm_id_priv->av.port->mad_agent) hi_tid = ((u64)cm_id_priv->av.port->mad_agent->hi_tid) << 32; - spin_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock); + read_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock); return cpu_to_be64(hi_tid | low_tid); } @@ -4374,7 +4374,7 @@ static int cm_add_one(struct ib_device *ib_device) return -ENOMEM; kref_init(&cm_dev->kref); - spin_lock_init(&cm_dev->mad_agent_lock); + rwlock_init(&cm_dev->mad_agent_lock); cm_dev->ib_device = ib_device; cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay; cm_dev->going_down = 0; @@ -4490,9 +4490,9 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data) * The above ensures no call paths from the work are running, * the remaining paths all take the mad_agent_lock. */ - spin_lock(&cm_dev->mad_agent_lock); + write_lock(&cm_dev->mad_agent_lock); port->mad_agent = NULL; - spin_unlock(&cm_dev->mad_agent_lock); + write_unlock(&cm_dev->mad_agent_lock); ib_unregister_mad_agent(mad_agent); ib_port_unregister_client_groups(ib_device, i, cm_counter_groups); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 176d0b3e4488..81bc24a346d3 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -5231,7 +5231,8 @@ static int cma_netevent_callback(struct notifier_block *self, neigh->ha, ETH_ALEN)) continue; cma_id_get(current_id); - queue_work(cma_wq, ¤t_id->id.net_work); + if (!queue_work(cma_wq, ¤t_id->id.net_work)) + cma_id_put(current_id); } out: spin_unlock_irqrestore(&id_table_lock, flags); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 46102f179955..df2aa15a5bc9 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1368,6 +1368,9 @@ static void ib_device_notify_register(struct ib_device *device) down_read(&devices_rwsem); + /* Mark for userspace that device is ready */ + kobject_uevent(&device->dev.kobj, KOBJ_ADD); + ret = rdma_nl_notify_event(device, 0, RDMA_REGISTER_EVENT); if (ret) goto out; @@ -1484,10 +1487,9 @@ int ib_register_device(struct ib_device *device, const char *name, return ret; } dev_set_uevent_suppress(&device->dev, false); - /* Mark for userspace that device is ready */ - kobject_uevent(&device->dev.kobj, KOBJ_ADD); ib_device_notify_register(device); + ib_device_put(device); return 0; diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index 7e3a55349e10..96a678250e55 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -366,12 +366,9 @@ EXPORT_SYMBOL(iw_cm_disconnect); /* * CM_ID <-- DESTROYING * - * Clean up all resources associated with the connection and release - * the initial reference taken by iw_create_cm_id. - * - * Returns true if and only if the last cm_id_priv reference has been dropped. + * Clean up all resources associated with the connection. */ -static bool destroy_cm_id(struct iw_cm_id *cm_id) +static void destroy_cm_id(struct iw_cm_id *cm_id) { struct iwcm_id_private *cm_id_priv; struct ib_qp *qp; @@ -440,20 +437,22 @@ static bool destroy_cm_id(struct iw_cm_id *cm_id) iwpm_remove_mapinfo(&cm_id->local_addr, &cm_id->m_local_addr); iwpm_remove_mapping(&cm_id->local_addr, RDMA_NL_IWCM); } - - return iwcm_deref_id(cm_id_priv); } /* - * This function is only called by the application thread and cannot - * be called by the event thread. The function will wait for all - * references to be released on the cm_id and then kfree the cm_id - * object. + * Destroy cm_id. If the cm_id still has other references, wait for all + * references to be released on the cm_id and then release the initial + * reference taken by iw_create_cm_id. */ void iw_destroy_cm_id(struct iw_cm_id *cm_id) { - if (!destroy_cm_id(cm_id)) + struct iwcm_id_private *cm_id_priv; + + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + destroy_cm_id(cm_id); + if (refcount_read(&cm_id_priv->refcount) > 1) flush_workqueue(iwcm_wq); + iwcm_deref_id(cm_id_priv); } EXPORT_SYMBOL(iw_destroy_cm_id); @@ -1033,8 +1032,10 @@ static void cm_work_handler(struct work_struct *_work) if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) { ret = process_event(cm_id_priv, &levent); - if (ret) - WARN_ON_ONCE(destroy_cm_id(&cm_id_priv->id)); + if (ret) { + destroy_cm_id(&cm_id_priv->id); + WARN_ON_ONCE(iwcm_deref_id(cm_id_priv)); + } } else pr_debug("dropping event %d\n", levent.event); if (iwcm_deref_id(cm_id_priv)) diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index f12189986303..fef11a80647c 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -1468,10 +1468,11 @@ static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = { }; -static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh, - struct netlink_ext_ack *extack, - enum rdma_restrack_type res_type, - res_fill_func_t fill_func) +static noinline_for_stack int +res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack, + enum rdma_restrack_type res_type, + res_fill_func_t fill_func) { const struct nldev_fill_res_entry *fe = &fill_entries[res_type]; struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; @@ -2256,10 +2257,10 @@ err: return ret; } -static int stat_get_doit_default_counter(struct sk_buff *skb, - struct nlmsghdr *nlh, - struct netlink_ext_ack *extack, - struct nlattr *tb[]) +static noinline_for_stack int +stat_get_doit_default_counter(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack, + struct nlattr *tb[]) { struct rdma_hw_stats *stats; struct nlattr *table_attr; @@ -2349,8 +2350,9 @@ err: return ret; } -static int stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh, - struct netlink_ext_ack *extack, struct nlattr *tb[]) +static noinline_for_stack int +stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack, struct nlattr *tb[]) { static enum rdma_nl_counter_mode mode; diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 07c571c7b699..c5b686394760 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -80,9 +80,12 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, unsigned long pgsz_bitmap, unsigned long virt) { - struct scatterlist *sg; + unsigned long curr_len = 0; + dma_addr_t curr_base = ~0; unsigned long va, pgoff; + struct scatterlist *sg; dma_addr_t mask; + dma_addr_t end; int i; umem->iova = va = virt; @@ -107,17 +110,30 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, pgoff = umem->address & ~PAGE_MASK; for_each_sgtable_dma_sg(&umem->sgt_append.sgt, sg, i) { - /* Walk SGL and reduce max page size if VA/PA bits differ - * for any address. + /* If the current entry is physically contiguous with the previous + * one, no need to take its start addresses into consideration. */ - mask |= (sg_dma_address(sg) + pgoff) ^ va; + if (check_add_overflow(curr_base, curr_len, &end) || + end != sg_dma_address(sg)) { + + curr_base = sg_dma_address(sg); + curr_len = 0; + + /* Reduce max page size if VA/PA bits differ */ + mask |= (curr_base + pgoff) ^ va; + + /* The alignment of any VA matching a discontinuity point + * in the physical memory sets the maximum possible page + * size as this must be a starting point of a new page that + * needs to be aligned. + */ + if (i != 0) + mask |= va; + } + + curr_len += sg_dma_len(sg); va += sg_dma_len(sg) - pgoff; - /* Except for the last entry, the ending iova alignment sets - * the maximum possible page size as the low bits of the iova - * must be zero when starting the next chunk. - */ - if (i != (umem->sgt_append.sgt.nents - 1)) - mask |= va; + pgoff = 0; } diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index edef79daed3f..535bb99ed9f5 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -718,8 +718,8 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs) goto err_free; pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs); - if (!pd) { - ret = -EINVAL; + if (IS_ERR(pd)) { + ret = PTR_ERR(pd); goto err_free; } @@ -809,8 +809,8 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs) if (cmd.flags & IB_MR_REREG_PD) { new_pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs); - if (!new_pd) { - ret = -EINVAL; + if (IS_ERR(new_pd)) { + ret = PTR_ERR(new_pd); goto put_uobjs; } } else { @@ -919,8 +919,8 @@ static int ib_uverbs_alloc_mw(struct uverbs_attr_bundle *attrs) return PTR_ERR(uobj); pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs); - if (!pd) { - ret = -EINVAL; + if (IS_ERR(pd)) { + ret = PTR_ERR(pd); goto err_free; } @@ -1127,8 +1127,8 @@ static int ib_uverbs_resize_cq(struct uverbs_attr_bundle *attrs) return ret; cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs); - if (!cq) - return -EINVAL; + if (IS_ERR(cq)) + return PTR_ERR(cq); ret = cq->device->ops.resize_cq(cq, cmd.cqe, &attrs->driver_udata); if (ret) @@ -1189,8 +1189,8 @@ static int ib_uverbs_poll_cq(struct uverbs_attr_bundle *attrs) return ret; cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs); - if (!cq) - return -EINVAL; + if (IS_ERR(cq)) + return PTR_ERR(cq); /* we copy a struct ib_uverbs_poll_cq_resp to user space */ header_ptr = attrs->ucore.outbuf; @@ -1238,8 +1238,8 @@ static int ib_uverbs_req_notify_cq(struct uverbs_attr_bundle *attrs) return ret; cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs); - if (!cq) - return -EINVAL; + if (IS_ERR(cq)) + return PTR_ERR(cq); ib_req_notify_cq(cq, cmd.solicited_only ? IB_CQ_SOLICITED : IB_CQ_NEXT_COMP); @@ -1321,8 +1321,8 @@ static int create_qp(struct uverbs_attr_bundle *attrs, ind_tbl = uobj_get_obj_read(rwq_ind_table, UVERBS_OBJECT_RWQ_IND_TBL, cmd->rwq_ind_tbl_handle, attrs); - if (!ind_tbl) { - ret = -EINVAL; + if (IS_ERR(ind_tbl)) { + ret = PTR_ERR(ind_tbl); goto err_put; } @@ -1360,8 +1360,10 @@ static int create_qp(struct uverbs_attr_bundle *attrs, if (cmd->is_srq) { srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd->srq_handle, attrs); - if (!srq || srq->srq_type == IB_SRQT_XRC) { - ret = -EINVAL; + if (IS_ERR(srq) || + srq->srq_type == IB_SRQT_XRC) { + ret = IS_ERR(srq) ? PTR_ERR(srq) : + -EINVAL; goto err_put; } } @@ -1371,23 +1373,29 @@ static int create_qp(struct uverbs_attr_bundle *attrs, rcq = uobj_get_obj_read( cq, UVERBS_OBJECT_CQ, cmd->recv_cq_handle, attrs); - if (!rcq) { - ret = -EINVAL; + if (IS_ERR(rcq)) { + ret = PTR_ERR(rcq); goto err_put; } } } } - if (has_sq) + if (has_sq) { scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->send_cq_handle, attrs); + if (IS_ERR(scq)) { + ret = PTR_ERR(scq); + goto err_put; + } + } + if (!ind_tbl && cmd->qp_type != IB_QPT_XRC_INI) rcq = rcq ?: scq; pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, attrs); - if (!pd || (!scq && has_sq)) { - ret = -EINVAL; + if (IS_ERR(pd)) { + ret = PTR_ERR(pd); goto err_put; } @@ -1482,18 +1490,18 @@ static int create_qp(struct uverbs_attr_bundle *attrs, err_put: if (!IS_ERR(xrcd_uobj)) uobj_put_read(xrcd_uobj); - if (pd) + if (!IS_ERR_OR_NULL(pd)) uobj_put_obj_read(pd); - if (scq) + if (!IS_ERR_OR_NULL(scq)) rdma_lookup_put_uobject(&scq->uobject->uevent.uobject, UVERBS_LOOKUP_READ); - if (rcq && rcq != scq) + if (!IS_ERR_OR_NULL(rcq) && rcq != scq) rdma_lookup_put_uobject(&rcq->uobject->uevent.uobject, UVERBS_LOOKUP_READ); - if (srq) + if (!IS_ERR_OR_NULL(srq)) rdma_lookup_put_uobject(&srq->uobject->uevent.uobject, UVERBS_LOOKUP_READ); - if (ind_tbl) + if (!IS_ERR_OR_NULL(ind_tbl)) uobj_put_obj_read(ind_tbl); uobj_alloc_abort(&obj->uevent.uobject, attrs); @@ -1655,8 +1663,8 @@ static int ib_uverbs_query_qp(struct uverbs_attr_bundle *attrs) } qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs); - if (!qp) { - ret = -EINVAL; + if (IS_ERR(qp)) { + ret = PTR_ERR(qp); goto out; } @@ -1761,8 +1769,8 @@ static int modify_qp(struct uverbs_attr_bundle *attrs, qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle, attrs); - if (!qp) { - ret = -EINVAL; + if (IS_ERR(qp)) { + ret = PTR_ERR(qp); goto out; } @@ -2028,8 +2036,8 @@ static int ib_uverbs_post_send(struct uverbs_attr_bundle *attrs) return -ENOMEM; qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs); - if (!qp) { - ret = -EINVAL; + if (IS_ERR(qp)) { + ret = PTR_ERR(qp); goto out; } @@ -2066,9 +2074,9 @@ static int ib_uverbs_post_send(struct uverbs_attr_bundle *attrs) ud->ah = uobj_get_obj_read(ah, UVERBS_OBJECT_AH, user_wr->wr.ud.ah, attrs); - if (!ud->ah) { + if (IS_ERR(ud->ah)) { + ret = PTR_ERR(ud->ah); kfree(ud); - ret = -EINVAL; goto out_put; } ud->remote_qpn = user_wr->wr.ud.remote_qpn; @@ -2305,8 +2313,8 @@ static int ib_uverbs_post_recv(struct uverbs_attr_bundle *attrs) return PTR_ERR(wr); qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs); - if (!qp) { - ret = -EINVAL; + if (IS_ERR(qp)) { + ret = PTR_ERR(qp); goto out; } @@ -2356,8 +2364,8 @@ static int ib_uverbs_post_srq_recv(struct uverbs_attr_bundle *attrs) return PTR_ERR(wr); srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs); - if (!srq) { - ret = -EINVAL; + if (IS_ERR(srq)) { + ret = PTR_ERR(srq); goto out; } @@ -2413,8 +2421,8 @@ static int ib_uverbs_create_ah(struct uverbs_attr_bundle *attrs) } pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs); - if (!pd) { - ret = -EINVAL; + if (IS_ERR(pd)) { + ret = PTR_ERR(pd); goto err; } @@ -2483,8 +2491,8 @@ static int ib_uverbs_attach_mcast(struct uverbs_attr_bundle *attrs) return ret; qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs); - if (!qp) - return -EINVAL; + if (IS_ERR(qp)) + return PTR_ERR(qp); obj = qp->uobject; @@ -2533,8 +2541,8 @@ static int ib_uverbs_detach_mcast(struct uverbs_attr_bundle *attrs) return ret; qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs); - if (!qp) - return -EINVAL; + if (IS_ERR(qp)) + return PTR_ERR(qp); obj = qp->uobject; mutex_lock(&obj->mcast_lock); @@ -2668,8 +2676,8 @@ static int kern_spec_to_ib_spec_action(struct uverbs_attr_bundle *attrs, UVERBS_OBJECT_FLOW_ACTION, kern_spec->action.handle, attrs); - if (!ib_spec->action.act) - return -EINVAL; + if (IS_ERR(ib_spec->action.act)) + return PTR_ERR(ib_spec->action.act); ib_spec->action.size = sizeof(struct ib_flow_spec_action_handle); flow_resources_add(uflow_res, @@ -2686,8 +2694,8 @@ static int kern_spec_to_ib_spec_action(struct uverbs_attr_bundle *attrs, UVERBS_OBJECT_COUNTERS, kern_spec->flow_count.handle, attrs); - if (!ib_spec->flow_count.counters) - return -EINVAL; + if (IS_ERR(ib_spec->flow_count.counters)) + return PTR_ERR(ib_spec->flow_count.counters); ib_spec->flow_count.size = sizeof(struct ib_flow_spec_action_count); flow_resources_add(uflow_res, @@ -2905,14 +2913,14 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs) return PTR_ERR(obj); pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs); - if (!pd) { - err = -EINVAL; + if (IS_ERR(pd)) { + err = PTR_ERR(pd); goto err_uobj; } cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs); - if (!cq) { - err = -EINVAL; + if (IS_ERR(cq)) { + err = PTR_ERR(cq); goto err_put_pd; } @@ -3013,8 +3021,8 @@ static int ib_uverbs_ex_modify_wq(struct uverbs_attr_bundle *attrs) return -EINVAL; wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, attrs); - if (!wq) - return -EINVAL; + if (IS_ERR(wq)) + return PTR_ERR(wq); if (cmd.attr_mask & IB_WQ_FLAGS) { wq_attr.flags = cmd.flags; @@ -3097,8 +3105,8 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs) num_read_wqs++) { wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, wqs_handles[num_read_wqs], attrs); - if (!wq) { - err = -EINVAL; + if (IS_ERR(wq)) { + err = PTR_ERR(wq); goto put_wqs; } @@ -3253,8 +3261,8 @@ static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs) } qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs); - if (!qp) { - err = -EINVAL; + if (IS_ERR(qp)) { + err = PTR_ERR(qp); goto err_uobj; } @@ -3400,15 +3408,15 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs, if (ib_srq_has_cq(cmd->srq_type)) { attr.ext.cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->cq_handle, attrs); - if (!attr.ext.cq) { - ret = -EINVAL; + if (IS_ERR(attr.ext.cq)) { + ret = PTR_ERR(attr.ext.cq); goto err_put_xrcd; } } pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, attrs); - if (!pd) { - ret = -EINVAL; + if (IS_ERR(pd)) { + ret = PTR_ERR(pd); goto err_put_cq; } @@ -3515,8 +3523,8 @@ static int ib_uverbs_modify_srq(struct uverbs_attr_bundle *attrs) return ret; srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs); - if (!srq) - return -EINVAL; + if (IS_ERR(srq)) + return PTR_ERR(srq); attr.max_wr = cmd.max_wr; attr.srq_limit = cmd.srq_limit; @@ -3543,8 +3551,8 @@ static int ib_uverbs_query_srq(struct uverbs_attr_bundle *attrs) return ret; srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs); - if (!srq) - return -EINVAL; + if (IS_ERR(srq)) + return PTR_ERR(srq); ret = ib_query_srq(srq, &attr); @@ -3669,8 +3677,8 @@ static int ib_uverbs_ex_modify_cq(struct uverbs_attr_bundle *attrs) return -EOPNOTSUPP; cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs); - if (!cq) - return -EINVAL; + if (IS_ERR(cq)) + return PTR_ERR(cq); ret = rdma_set_cq_moderation(cq, cmd.attr.cq_count, cmd.attr.cq_period); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 473ee0831307..dc40001072a5 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -3109,22 +3109,23 @@ EXPORT_SYMBOL(__rdma_block_iter_start); bool __rdma_block_iter_next(struct ib_block_iter *biter) { unsigned int block_offset; - unsigned int sg_delta; + unsigned int delta; if (!biter->__sg_nents || !biter->__sg) return false; biter->__dma_addr = sg_dma_address(biter->__sg) + biter->__sg_advance; block_offset = biter->__dma_addr & (BIT_ULL(biter->__pg_bit) - 1); - sg_delta = BIT_ULL(biter->__pg_bit) - block_offset; + delta = BIT_ULL(biter->__pg_bit) - block_offset; - if (sg_dma_len(biter->__sg) - biter->__sg_advance > sg_delta) { - biter->__sg_advance += sg_delta; - } else { + while (biter->__sg_nents && biter->__sg && + sg_dma_len(biter->__sg) - biter->__sg_advance <= delta) { + delta -= sg_dma_len(biter->__sg) - biter->__sg_advance; biter->__sg_advance = 0; biter->__sg = sg_next(biter->__sg); biter->__sg_nents--; } + biter->__sg_advance += delta; return true; } diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 0b21d8b5d962..4a3ce61a3bba 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -4642,7 +4642,7 @@ static int UVERBS_HANDLER(BNXT_RE_METHOD_GET_TOGGLE_MEM)(struct uverbs_attr_bund return err; err = uverbs_copy_to(attrs, BNXT_RE_TOGGLE_MEM_MMAP_OFFSET, - &offset, sizeof(length)); + &offset, sizeof(offset)); if (err) return err; diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c index 51d619edb6c5..e56ba86d460e 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.c +++ b/drivers/infiniband/hw/erdma/erdma_verbs.c @@ -597,7 +597,8 @@ err_free_mtt: static void erdma_destroy_mtt_buf_sg(struct erdma_dev *dev, struct erdma_mtt *mtt) { - dma_unmap_sg(&dev->pdev->dev, mtt->sglist, mtt->nsg, DMA_TO_DEVICE); + dma_unmap_sg(&dev->pdev->dev, mtt->sglist, + DIV_ROUND_UP(mtt->size, PAGE_SIZE), DMA_TO_DEVICE); vfree(mtt->sglist); } diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index 7ead8746b79b..f2c530ab85a5 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -964,31 +964,35 @@ static void find_hw_thread_mask(uint hw_thread_no, cpumask_var_t hw_thread_mask, struct hfi1_affinity_node_list *affinity) { int possible, curr_cpu, i; - uint num_cores_per_socket = node_affinity.num_online_cpus / + uint num_cores_per_socket; + + cpumask_copy(hw_thread_mask, &affinity->proc.mask); + + if (affinity->num_core_siblings == 0) + return; + + num_cores_per_socket = node_affinity.num_online_cpus / affinity->num_core_siblings / node_affinity.num_online_nodes; - cpumask_copy(hw_thread_mask, &affinity->proc.mask); - if (affinity->num_core_siblings > 0) { - /* Removing other siblings not needed for now */ - possible = cpumask_weight(hw_thread_mask); - curr_cpu = cpumask_first(hw_thread_mask); - for (i = 0; - i < num_cores_per_socket * node_affinity.num_online_nodes; - i++) - curr_cpu = cpumask_next(curr_cpu, hw_thread_mask); - - for (; i < possible; i++) { - cpumask_clear_cpu(curr_cpu, hw_thread_mask); - curr_cpu = cpumask_next(curr_cpu, hw_thread_mask); - } + /* Removing other siblings not needed for now */ + possible = cpumask_weight(hw_thread_mask); + curr_cpu = cpumask_first(hw_thread_mask); + for (i = 0; + i < num_cores_per_socket * node_affinity.num_online_nodes; + i++) + curr_cpu = cpumask_next(curr_cpu, hw_thread_mask); - /* Identifying correct HW threads within physical cores */ - cpumask_shift_left(hw_thread_mask, hw_thread_mask, - num_cores_per_socket * - node_affinity.num_online_nodes * - hw_thread_no); + for (; i < possible; i++) { + cpumask_clear_cpu(curr_cpu, hw_thread_mask); + curr_cpu = cpumask_next(curr_cpu, hw_thread_mask); } + + /* Identifying correct HW threads within physical cores */ + cpumask_shift_left(hw_thread_mask, hw_thread_mask, + num_cores_per_socket * + node_affinity.num_online_nodes * + hw_thread_no); } int hfi1_get_proc_affinity(int node) diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index 4fc5b9d5fea8..307c35888b30 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -33,7 +33,6 @@ #include <linux/pci.h> #include <rdma/ib_addr.h> #include <rdma/ib_cache.h> -#include "hnae3.h" #include "hns_roce_device.h" #include "hns_roce_hw_v2.h" diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 560a1d9de408..cbe73d9ad525 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -856,6 +856,7 @@ struct hns_roce_caps { u16 default_ceq_arm_st; u8 cong_cap; enum hns_roce_cong_type default_cong_type; + u32 max_ack_req_msg_len; }; enum hns_roce_device_state { diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index ca0798224e56..3d479c63b117 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -249,15 +249,12 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev, } static struct hns_roce_hem *hns_roce_alloc_hem(struct hns_roce_dev *hr_dev, - unsigned long hem_alloc_size, - gfp_t gfp_mask) + unsigned long hem_alloc_size) { struct hns_roce_hem *hem; int order; void *buf; - WARN_ON(gfp_mask & __GFP_HIGHMEM); - order = get_order(hem_alloc_size); if (PAGE_SIZE << order != hem_alloc_size) { dev_err(hr_dev->dev, "invalid hem_alloc_size: %lu!\n", @@ -265,13 +262,12 @@ static struct hns_roce_hem *hns_roce_alloc_hem(struct hns_roce_dev *hr_dev, return NULL; } - hem = kmalloc(sizeof(*hem), - gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN)); + hem = kmalloc(sizeof(*hem), GFP_KERNEL); if (!hem) return NULL; buf = dma_alloc_coherent(hr_dev->dev, hem_alloc_size, - &hem->dma, gfp_mask); + &hem->dma, GFP_KERNEL); if (!buf) goto fail; @@ -378,7 +374,6 @@ static int alloc_mhop_hem(struct hns_roce_dev *hr_dev, { u32 bt_size = mhop->bt_chunk_size; struct device *dev = hr_dev->dev; - gfp_t flag; u64 bt_ba; u32 size; int ret; @@ -417,8 +412,7 @@ static int alloc_mhop_hem(struct hns_roce_dev *hr_dev, * alloc bt space chunk for MTT/CQE. */ size = table->type < HEM_TYPE_MTT ? mhop->buf_chunk_size : bt_size; - flag = GFP_KERNEL | __GFP_NOWARN; - table->hem[index->buf] = hns_roce_alloc_hem(hr_dev, size, flag); + table->hem[index->buf] = hns_roce_alloc_hem(hr_dev, size); if (!table->hem[index->buf]) { ret = -ENOMEM; goto err_alloc_hem; @@ -546,9 +540,7 @@ int hns_roce_table_get(struct hns_roce_dev *hr_dev, goto out; } - table->hem[i] = hns_roce_alloc_hem(hr_dev, - table->table_chunk_size, - GFP_KERNEL | __GFP_NOWARN); + table->hem[i] = hns_roce_alloc_hem(hr_dev, table->table_chunk_size); if (!table->hem[i]) { ret = -ENOMEM; goto out; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index f5c3e560df58..53fe0ef3883d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -43,7 +43,6 @@ #include <rdma/ib_umem.h> #include <rdma/uverbs_ioctl.h> -#include "hnae3.h" #include "hns_roce_common.h" #include "hns_roce_device.h" #include "hns_roce_cmd.h" @@ -943,7 +942,7 @@ static void fill_wqe_idx(struct hns_roce_srq *srq, unsigned int wqe_idx) static void update_srq_db(struct hns_roce_srq *srq) { struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device); - struct hns_roce_v2_db db; + struct hns_roce_v2_db db = {}; hr_reg_write(&db, DB_TAG, srq->srqn); hr_reg_write(&db, DB_CMD, HNS_ROCE_V2_SRQ_DB); @@ -2182,31 +2181,36 @@ static void apply_func_caps(struct hns_roce_dev *hr_dev) static int hns_roce_query_caps(struct hns_roce_dev *hr_dev) { - struct hns_roce_cmq_desc desc[HNS_ROCE_QUERY_PF_CAPS_CMD_NUM]; + struct hns_roce_cmq_desc desc[HNS_ROCE_QUERY_PF_CAPS_CMD_NUM] = {}; struct hns_roce_caps *caps = &hr_dev->caps; struct hns_roce_query_pf_caps_a *resp_a; struct hns_roce_query_pf_caps_b *resp_b; struct hns_roce_query_pf_caps_c *resp_c; struct hns_roce_query_pf_caps_d *resp_d; struct hns_roce_query_pf_caps_e *resp_e; + struct hns_roce_query_pf_caps_f *resp_f; enum hns_roce_opcode_type cmd; int ctx_hop_num; int pbl_hop_num; + int cmd_num; int ret; int i; cmd = hr_dev->is_vf ? HNS_ROCE_OPC_QUERY_VF_CAPS_NUM : HNS_ROCE_OPC_QUERY_PF_CAPS_NUM; + cmd_num = hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 ? + HNS_ROCE_QUERY_PF_CAPS_CMD_NUM_HIP08 : + HNS_ROCE_QUERY_PF_CAPS_CMD_NUM; - for (i = 0; i < HNS_ROCE_QUERY_PF_CAPS_CMD_NUM; i++) { + for (i = 0; i < cmd_num - 1; i++) { hns_roce_cmq_setup_basic_desc(&desc[i], cmd, true); - if (i < (HNS_ROCE_QUERY_PF_CAPS_CMD_NUM - 1)) - desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - else - desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); + desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); } - ret = hns_roce_cmq_send(hr_dev, desc, HNS_ROCE_QUERY_PF_CAPS_CMD_NUM); + hns_roce_cmq_setup_basic_desc(&desc[cmd_num - 1], cmd, true); + desc[cmd_num - 1].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); + + ret = hns_roce_cmq_send(hr_dev, desc, cmd_num); if (ret) return ret; @@ -2215,6 +2219,7 @@ static int hns_roce_query_caps(struct hns_roce_dev *hr_dev) resp_c = (struct hns_roce_query_pf_caps_c *)desc[2].data; resp_d = (struct hns_roce_query_pf_caps_d *)desc[3].data; resp_e = (struct hns_roce_query_pf_caps_e *)desc[4].data; + resp_f = (struct hns_roce_query_pf_caps_f *)desc[5].data; caps->local_ca_ack_delay = resp_a->local_ca_ack_delay; caps->max_sq_sg = le16_to_cpu(resp_a->max_sq_sg); @@ -2279,6 +2284,8 @@ static int hns_roce_query_caps(struct hns_roce_dev *hr_dev) caps->reserved_srqs = hr_reg_read(resp_e, PF_CAPS_E_RSV_SRQS); caps->reserved_lkey = hr_reg_read(resp_e, PF_CAPS_E_RSV_LKEYS); + caps->max_ack_req_msg_len = le32_to_cpu(resp_f->max_ack_req_msg_len); + caps->qpc_hop_num = ctx_hop_num; caps->sccc_hop_num = ctx_hop_num; caps->srqc_hop_num = ctx_hop_num; @@ -2972,14 +2979,22 @@ static int hns_roce_v2_init(struct hns_roce_dev *hr_dev) { int ret; + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) { + ret = free_mr_init(hr_dev); + if (ret) { + dev_err(hr_dev->dev, "failed to init free mr!\n"); + return ret; + } + } + /* The hns ROCEE requires the extdb info to be cleared before using */ ret = hns_roce_clear_extdb_list_info(hr_dev); if (ret) - return ret; + goto err_clear_extdb_failed; ret = get_hem_table(hr_dev); if (ret) - return ret; + goto err_get_hem_table_failed; if (hr_dev->is_vf) return 0; @@ -2994,6 +3009,11 @@ static int hns_roce_v2_init(struct hns_roce_dev *hr_dev) err_llm_init_failed: put_hem_table(hr_dev); +err_get_hem_table_failed: + hns_roce_function_clear(hr_dev); +err_clear_extdb_failed: + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) + free_mr_exit(hr_dev); return ret; } @@ -4547,7 +4567,9 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, dma_addr_t trrl_ba; dma_addr_t irrl_ba; enum ib_mtu ib_mtu; + u8 ack_req_freq; const u8 *smac; + int lp_msg_len; u8 lp_pktn_ini; u64 *mtts; u8 *dmac; @@ -4630,7 +4652,8 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, return -EINVAL; #define MIN_LP_MSG_LEN 1024 /* mtu * (2 ^ lp_pktn_ini) should be in the range of 1024 to mtu */ - lp_pktn_ini = ilog2(max(mtu, MIN_LP_MSG_LEN) / mtu); + lp_msg_len = max(mtu, MIN_LP_MSG_LEN); + lp_pktn_ini = ilog2(lp_msg_len / mtu); if (attr_mask & IB_QP_PATH_MTU) { hr_reg_write(context, QPC_MTU, ib_mtu); @@ -4640,8 +4663,22 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, hr_reg_write(context, QPC_LP_PKTN_INI, lp_pktn_ini); hr_reg_clear(qpc_mask, QPC_LP_PKTN_INI); - /* ACK_REQ_FREQ should be larger than or equal to LP_PKTN_INI */ - hr_reg_write(context, QPC_ACK_REQ_FREQ, lp_pktn_ini); + /* + * There are several constraints for ACK_REQ_FREQ: + * 1. mtu * (2 ^ ACK_REQ_FREQ) should not be too large, otherwise + * it may cause some unexpected retries when sending large + * payload. + * 2. ACK_REQ_FREQ should be larger than or equal to LP_PKTN_INI. + * 3. ACK_REQ_FREQ must be equal to LP_PKTN_INI when using LDCP + * or HC3 congestion control algorithm. + */ + if (hr_qp->cong_type == CONG_TYPE_LDCP || + hr_qp->cong_type == CONG_TYPE_HC3 || + hr_dev->caps.max_ack_req_msg_len < lp_msg_len) + ack_req_freq = lp_pktn_ini; + else + ack_req_freq = ilog2(hr_dev->caps.max_ack_req_msg_len / mtu); + hr_reg_write(context, QPC_ACK_REQ_FREQ, ack_req_freq); hr_reg_clear(qpc_mask, QPC_ACK_REQ_FREQ); hr_reg_clear(qpc_mask, QPC_RX_REQ_PSN_ERR); @@ -5334,11 +5371,10 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); - struct hns_roce_v2_qp_context ctx[2]; - struct hns_roce_v2_qp_context *context = ctx; - struct hns_roce_v2_qp_context *qpc_mask = ctx + 1; + struct hns_roce_v2_qp_context *context; + struct hns_roce_v2_qp_context *qpc_mask; struct ib_device *ibdev = &hr_dev->ib_dev; - int ret; + int ret = -ENOMEM; if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) return -EOPNOTSUPP; @@ -5349,7 +5385,11 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, * we should set all bits of the relevant fields in context mask to * 0 at the same time, else set them to 0x1. */ - memset(context, 0, hr_dev->caps.qpc_sz); + context = kvzalloc(sizeof(*context), GFP_KERNEL); + qpc_mask = kvzalloc(sizeof(*qpc_mask), GFP_KERNEL); + if (!context || !qpc_mask) + goto out; + memset(qpc_mask, 0xff, hr_dev->caps.qpc_sz); ret = hns_roce_v2_set_abs_fields(ibqp, attr, attr_mask, cur_state, @@ -5391,6 +5431,8 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, clear_qp(hr_qp); out: + kvfree(qpc_mask); + kvfree(context); return ret; } @@ -7028,21 +7070,11 @@ static int __hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) goto error_failed_roce_init; } - if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) { - ret = free_mr_init(hr_dev); - if (ret) { - dev_err(hr_dev->dev, "failed to init free mr!\n"); - goto error_failed_free_mr_init; - } - } handle->priv = hr_dev; return 0; -error_failed_free_mr_init: - hns_roce_exit(hr_dev); - error_failed_roce_init: kfree(hr_dev->priv); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 91a5665465ff..1c2660305d27 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -34,6 +34,7 @@ #define _HNS_ROCE_HW_V2_H #include <linux/bitops.h> +#include "hnae3.h" #define HNS_ROCE_V2_MAX_RC_INL_INN_SZ 32 #define HNS_ROCE_V2_MTT_ENTRY_SZ 64 @@ -1167,7 +1168,8 @@ struct hns_roce_cfg_gmv_tb_b { #define GMV_TB_B_SMAC_H GMV_TB_B_FIELD_LOC(47, 32) #define GMV_TB_B_SGID_IDX GMV_TB_B_FIELD_LOC(71, 64) -#define HNS_ROCE_QUERY_PF_CAPS_CMD_NUM 5 +#define HNS_ROCE_QUERY_PF_CAPS_CMD_NUM_HIP08 5 +#define HNS_ROCE_QUERY_PF_CAPS_CMD_NUM 6 struct hns_roce_query_pf_caps_a { u8 number_ports; u8 local_ca_ack_delay; @@ -1279,6 +1281,11 @@ struct hns_roce_query_pf_caps_e { __le16 aeq_period; }; +struct hns_roce_query_pf_caps_f { + __le32 max_ack_req_msg_len; + __le32 rsv[5]; +}; + #define PF_CAPS_E_FIELD_LOC(h, l) \ FIELD_LOC(struct hns_roce_query_pf_caps_e, h, l) diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 8d0b63d4b50a..11fa64044a8d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -37,7 +37,6 @@ #include <rdma/ib_smi.h> #include <rdma/ib_user_verbs.h> #include <rdma/ib_cache.h> -#include "hnae3.h" #include "hns_roce_common.h" #include "hns_roce_device.h" #include "hns_roce_hem.h" @@ -948,10 +947,7 @@ err_unmap_dmpt: static void hns_roce_teardown_hca(struct hns_roce_dev *hr_dev) { hns_roce_cleanup_bitmap(hr_dev); - - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB || - hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) - mutex_destroy(&hr_dev->pgdir_mutex); + mutex_destroy(&hr_dev->pgdir_mutex); } /** @@ -966,11 +962,11 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) spin_lock_init(&hr_dev->sm_lock); - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB || - hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) { - INIT_LIST_HEAD(&hr_dev->pgdir_list); - mutex_init(&hr_dev->pgdir_mutex); - } + INIT_LIST_HEAD(&hr_dev->qp_list); + spin_lock_init(&hr_dev->qp_list_lock); + + INIT_LIST_HEAD(&hr_dev->pgdir_list); + mutex_init(&hr_dev->pgdir_mutex); hns_roce_init_uar_table(hr_dev); @@ -1002,9 +998,7 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) err_uar_table_free: ida_destroy(&hr_dev->uar_ida.ida); - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB || - hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) - mutex_destroy(&hr_dev->pgdir_mutex); + mutex_destroy(&hr_dev->pgdir_mutex); return ret; } @@ -1133,9 +1127,6 @@ int hns_roce_init(struct hns_roce_dev *hr_dev) } } - INIT_LIST_HEAD(&hr_dev->qp_list); - spin_lock_init(&hr_dev->qp_list_lock); - ret = hns_roce_register_device(hr_dev); if (ret) goto error_failed_register_device; diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c index 356d98816949..f637b73b946e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_restrack.c +++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c @@ -4,7 +4,6 @@ #include <rdma/rdma_cm.h> #include <rdma/restrack.h> #include <uapi/rdma/rdma_netlink.h> -#include "hnae3.h" #include "hns_roce_common.h" #include "hns_roce_device.h" #include "hns_roce_hw_v2.h" diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c index 73d67c853b6f..48fef989318b 100644 --- a/drivers/infiniband/hw/mana/qp.c +++ b/drivers/infiniband/hw/mana/qp.c @@ -561,7 +561,7 @@ static int mana_ib_gd_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, req.ah_attr.dest_port = ROCE_V2_UDP_DPORT; req.ah_attr.src_port = rdma_get_udp_sport(attr->ah_attr.grh.flow_label, ibqp->qp_num, attr->dest_qp_num); - req.ah_attr.traffic_class = attr->ah_attr.grh.traffic_class; + req.ah_attr.traffic_class = attr->ah_attr.grh.traffic_class >> 2; req.ah_attr.hop_limit = attr->ah_attr.grh.hop_limit; } diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c index 81cfa74147a1..ad6c195d077b 100644 --- a/drivers/infiniband/hw/mlx5/counters.c +++ b/drivers/infiniband/hw/mlx5/counters.c @@ -391,7 +391,7 @@ static int do_get_hw_stats(struct ib_device *ibdev, return ret; /* We don't expose device counters over Vports */ - if (is_mdev_switchdev_mode(dev->mdev) && port_num != 0) + if (is_mdev_switchdev_mode(dev->mdev) && dev->is_rep && port_num != 0) goto done; if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { @@ -411,7 +411,7 @@ static int do_get_hw_stats(struct ib_device *ibdev, */ goto done; } - ret = mlx5_lag_query_cong_counters(dev->mdev, + ret = mlx5_lag_query_cong_counters(mdev, stats->value + cnts->num_q_counters, cnts->num_cong_counters, diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 69999d8d24f3..f49f78b69ab9 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1914,6 +1914,7 @@ subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table, /* Level1 is valid for future use, no need to free */ return -ENOMEM; + INIT_LIST_HEAD(&obj_event->obj_sub_list); err = xa_insert(&event->object_ids, key_level2, obj_event, @@ -1922,7 +1923,6 @@ subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table, kfree(obj_event); return err; } - INIT_LIST_HEAD(&obj_event->obj_sub_list); } return 0; diff --git a/drivers/infiniband/hw/mlx5/dm.c b/drivers/infiniband/hw/mlx5/dm.c index b4c97fb62abf..9ded2b7c1e31 100644 --- a/drivers/infiniband/hw/mlx5/dm.c +++ b/drivers/infiniband/hw/mlx5/dm.c @@ -282,7 +282,7 @@ static struct ib_dm *handle_alloc_dm_memic(struct ib_ucontext *ctx, int err; u64 address; - if (!MLX5_CAP_DEV_MEM(dm_db->dev, memic)) + if (!dm_db || !MLX5_CAP_DEV_MEM(dm_db->dev, memic)) return ERR_PTR(-EOPNOTSUPP); dm = kzalloc(sizeof(*dm), GFP_KERNEL); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 8c47cb4edd0a..435c456a4fd5 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1766,6 +1766,33 @@ static void deallocate_uars(struct mlx5_ib_dev *dev, context->devx_uid); } +static int mlx5_ib_enable_lb_mp(struct mlx5_core_dev *master, + struct mlx5_core_dev *slave) +{ + int err; + + err = mlx5_nic_vport_update_local_lb(master, true); + if (err) + return err; + + err = mlx5_nic_vport_update_local_lb(slave, true); + if (err) + goto out; + + return 0; + +out: + mlx5_nic_vport_update_local_lb(master, false); + return err; +} + +static void mlx5_ib_disable_lb_mp(struct mlx5_core_dev *master, + struct mlx5_core_dev *slave) +{ + mlx5_nic_vport_update_local_lb(slave, false); + mlx5_nic_vport_update_local_lb(master, false); +} + int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp) { int err = 0; @@ -3448,6 +3475,8 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, lockdep_assert_held(&mlx5_ib_multiport_mutex); + mlx5_ib_disable_lb_mp(ibdev->mdev, mpi->mdev); + mlx5_core_mp_event_replay(ibdev->mdev, MLX5_DRIVER_EVENT_AFFILIATION_REMOVED, NULL); @@ -3543,6 +3572,10 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, MLX5_DRIVER_EVENT_AFFILIATION_DONE, &key); + err = mlx5_ib_enable_lb_mp(ibdev->mdev, mpi->mdev); + if (err) + goto unbind; + return true; unbind: diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 068eac3bdb50..726b81b6330c 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1968,7 +1968,6 @@ static int cache_ent_find_and_store(struct mlx5_ib_dev *dev, if (mr->mmkey.cache_ent) { spin_lock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock); - mr->mmkey.cache_ent->in_use--; goto end; } @@ -2029,32 +2028,62 @@ void mlx5_ib_revoke_data_direct_mrs(struct mlx5_ib_dev *dev) } } -static int mlx5_revoke_mr(struct mlx5_ib_mr *mr) +static int mlx5_umr_revoke_mr_with_lock(struct mlx5_ib_mr *mr) { - struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); - struct mlx5_cache_ent *ent = mr->mmkey.cache_ent; - bool is_odp = is_odp_mr(mr); bool is_odp_dma_buf = is_dmabuf_mr(mr) && - !to_ib_umem_dmabuf(mr->umem)->pinned; - int ret = 0; + !to_ib_umem_dmabuf(mr->umem)->pinned; + bool is_odp = is_odp_mr(mr); + int ret; if (is_odp) mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex); if (is_odp_dma_buf) - dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv, NULL); + dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv, + NULL); + + ret = mlx5r_umr_revoke_mr(mr); - if (mr->mmkey.cacheable && !mlx5r_umr_revoke_mr(mr) && !cache_ent_find_and_store(dev, mr)) { + if (is_odp) { + if (!ret) + to_ib_umem_odp(mr->umem)->private = NULL; + mutex_unlock(&to_ib_umem_odp(mr->umem)->umem_mutex); + } + + if (is_odp_dma_buf) { + if (!ret) + to_ib_umem_dmabuf(mr->umem)->private = NULL; + dma_resv_unlock( + to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv); + } + + return ret; +} + +static int mlx5r_handle_mkey_cleanup(struct mlx5_ib_mr *mr) +{ + bool is_odp_dma_buf = is_dmabuf_mr(mr) && + !to_ib_umem_dmabuf(mr->umem)->pinned; + struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); + struct mlx5_cache_ent *ent = mr->mmkey.cache_ent; + bool is_odp = is_odp_mr(mr); + bool from_cache = !!ent; + int ret; + + if (mr->mmkey.cacheable && !mlx5_umr_revoke_mr_with_lock(mr) && + !cache_ent_find_and_store(dev, mr)) { ent = mr->mmkey.cache_ent; /* upon storing to a clean temp entry - schedule its cleanup */ spin_lock_irq(&ent->mkeys_queue.lock); + if (from_cache) + ent->in_use--; if (ent->is_tmp && !ent->tmp_cleanup_scheduled) { mod_delayed_work(ent->dev->cache.wq, &ent->dwork, msecs_to_jiffies(30 * 1000)); ent->tmp_cleanup_scheduled = true; } spin_unlock_irq(&ent->mkeys_queue.lock); - goto out; + return 0; } if (ent) { @@ -2063,8 +2092,14 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr) mr->mmkey.cache_ent = NULL; spin_unlock_irq(&ent->mkeys_queue.lock); } + + if (is_odp) + mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex); + + if (is_odp_dma_buf) + dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv, + NULL); ret = destroy_mkey(dev, mr); -out: if (is_odp) { if (!ret) to_ib_umem_odp(mr->umem)->private = NULL; @@ -2074,9 +2109,9 @@ out: if (is_odp_dma_buf) { if (!ret) to_ib_umem_dmabuf(mr->umem)->private = NULL; - dma_resv_unlock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv); + dma_resv_unlock( + to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv); } - return ret; } @@ -2125,7 +2160,7 @@ static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr) } /* Stop DMA */ - rc = mlx5_revoke_mr(mr); + rc = mlx5r_handle_mkey_cleanup(mr); if (rc) return rc; diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index e158d5b1ab17..98a76c9db7ab 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -247,8 +247,8 @@ static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr) } if (MLX5_CAP_ODP(mr_to_mdev(mr)->mdev, mem_page_fault)) - __xa_erase(&mr_to_mdev(mr)->odp_mkeys, - mlx5_base_mkey(mr->mmkey.key)); + xa_erase(&mr_to_mdev(mr)->odp_mkeys, + mlx5_base_mkey(mr->mmkey.key)); xa_unlock(&imr->implicit_children); /* Freeing a MR is a sleeping operation, so bounce to a work queue */ @@ -521,8 +521,8 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, } if (MLX5_CAP_ODP(dev->mdev, mem_page_fault)) { - ret = __xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), - &mr->mmkey, GFP_KERNEL); + ret = xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), + &mr->mmkey, GFP_KERNEL); if (xa_is_err(ret)) { ret = ERR_PTR(xa_err(ret)); __xa_erase(&imr->implicit_children, idx); diff --git a/drivers/infiniband/hw/mlx5/qpc.c b/drivers/infiniband/hw/mlx5/qpc.c index d3dcc272200a..146d03ae40bd 100644 --- a/drivers/infiniband/hw/mlx5/qpc.c +++ b/drivers/infiniband/hw/mlx5/qpc.c @@ -21,8 +21,10 @@ mlx5_get_rsc(struct mlx5_qp_table *table, u32 rsn) spin_lock_irqsave(&table->lock, flags); common = radix_tree_lookup(&table->tree, rsn); - if (common) + if (common && !common->invalid) refcount_inc(&common->refcount); + else + common = NULL; spin_unlock_irqrestore(&table->lock, flags); @@ -178,6 +180,18 @@ static int create_resource_common(struct mlx5_ib_dev *dev, return 0; } +static void modify_resource_common_state(struct mlx5_ib_dev *dev, + struct mlx5_core_qp *qp, + bool invalid) +{ + struct mlx5_qp_table *table = &dev->qp_table; + unsigned long flags; + + spin_lock_irqsave(&table->lock, flags); + qp->common.invalid = invalid; + spin_unlock_irqrestore(&table->lock, flags); +} + static void destroy_resource_common(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp) { @@ -609,8 +623,20 @@ err_destroy_rq: int mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev, struct mlx5_core_qp *rq) { + int ret; + + /* The rq destruction can be called again in case it fails, hence we + * mark the common resource as invalid and only once FW destruction + * is completed successfully we actually destroy the resources. + */ + modify_resource_common_state(dev, rq, true); + ret = destroy_rq_tracked(dev, rq->qpn, rq->uid); + if (ret) { + modify_resource_common_state(dev, rq, false); + return ret; + } destroy_resource_common(dev, rq); - return destroy_rq_tracked(dev, rq->qpn, rq->uid); + return 0; } static void destroy_sq_tracked(struct mlx5_ib_dev *dev, u32 sqn, u16 uid) diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c index 793f3c5c4d01..80c665d15218 100644 --- a/drivers/infiniband/hw/mlx5/umr.c +++ b/drivers/infiniband/hw/mlx5/umr.c @@ -32,13 +32,15 @@ static __be64 get_umr_disable_mr_mask(void) return cpu_to_be64(result); } -static __be64 get_umr_update_translation_mask(void) +static __be64 get_umr_update_translation_mask(struct mlx5_ib_dev *dev) { u64 result; result = MLX5_MKEY_MASK_LEN | MLX5_MKEY_MASK_PAGE_SIZE | MLX5_MKEY_MASK_START_ADDR; + if (MLX5_CAP_GEN_2(dev->mdev, umr_log_entity_size_5)) + result |= MLX5_MKEY_MASK_PAGE_SIZE_5; return cpu_to_be64(result); } @@ -654,7 +656,7 @@ static void mlx5r_umr_final_update_xlt(struct mlx5_ib_dev *dev, flags & MLX5_IB_UPD_XLT_ENABLE || flags & MLX5_IB_UPD_XLT_ADDR; if (update_translation) { - wqe->ctrl_seg.mkey_mask |= get_umr_update_translation_mask(); + wqe->ctrl_seg.mkey_mask |= get_umr_update_translation_mask(dev); if (!mr->ibmr.length) MLX5_SET(mkc, &wqe->mkey_seg, length64, 1); } diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c index fec87c9030ab..fffd144d509e 100644 --- a/drivers/infiniband/sw/rxe/rxe_cq.c +++ b/drivers/infiniband/sw/rxe/rxe_cq.c @@ -56,11 +56,8 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, err = do_mmap_info(rxe, uresp ? &uresp->mi : NULL, udata, cq->queue->buf, cq->queue->buf_size, &cq->queue->ip); - if (err) { - vfree(cq->queue->buf); - kfree(cq->queue); + if (err) return err; - } cq->is_user = uresp; diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 91d329e90308..8b805b16136e 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -811,7 +811,12 @@ static void rxe_qp_do_cleanup(struct work_struct *work) spin_unlock_irqrestore(&qp->state_lock, flags); qp->qp_timeout_jiffies = 0; - if (qp_type(qp) == IB_QPT_RC) { + /* In the function timer_setup, .function is initialized. If .function + * is NULL, it indicates the function timer_setup is not called, the + * timer is not initialized. Or else, the timer is initialized. + */ + if (qp_type(qp) == IB_QPT_RC && qp->retrans_timer.function && + qp->rnr_nak_timer.function) { del_timer_sync(&qp->retrans_timer); del_timer_sync(&qp->rnr_nak_timer); } diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c index a034264c5669..43ff1afd3d01 100644 --- a/drivers/infiniband/sw/siw/siw_qp_tx.c +++ b/drivers/infiniband/sw/siw/siw_qp_tx.c @@ -334,18 +334,17 @@ static int siw_tcp_sendpages(struct socket *s, struct page **page, int offset, if (!sendpage_ok(page[i])) msg.msg_flags &= ~MSG_SPLICE_PAGES; bvec_set_page(&bvec, page[i], bytes, offset); - iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size); + iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, bytes); try_page_again: lock_sock(sk); - rv = tcp_sendmsg_locked(sk, &msg, size); + rv = tcp_sendmsg_locked(sk, &msg, bytes); release_sock(sk); if (rv > 0) { size -= rv; sent += rv; if (rv != bytes) { - offset += rv; bytes -= rv; goto try_page_again; } |