From 26312973bfbc1db24b157797776ee2b5b48f5c50 Mon Sep 17 00:00:00 2001 From: Deming Wang Date: Thu, 6 Oct 2022 12:14:56 -0400 Subject: IB/uverbs: fix the typo of optional Fix the typo of optional in the function of UVERBS_HANDLER. Signed-off-by: Deming Wang Link: https://lore.kernel.org/r/20221006161456.2998-1-wangdeming@inspur.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/uverbs_std_types_qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/uverbs_std_types_qp.c b/drivers/infiniband/core/uverbs_std_types_qp.c index dd1075466f61..7b4773fa4bc0 100644 --- a/drivers/infiniband/core/uverbs_std_types_qp.c +++ b/drivers/infiniband/core/uverbs_std_types_qp.c @@ -163,7 +163,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QP_CREATE)( UVERBS_ATTR_CREATE_QP_SRQ_HANDLE)) return -EINVAL; - /* send_cq is optinal */ + /* send_cq is optional */ if (cap.max_send_wr) { send_cq = uverbs_attr_get_obj(attrs, UVERBS_ATTR_CREATE_QP_SEND_CQ_HANDLE); -- cgit v1.2.3 From c4bb733234b0ffd939030bb592b691ac19519455 Mon Sep 17 00:00:00 2001 From: wangjianli Date: Sat, 22 Oct 2022 14:00:30 +0800 Subject: RDMA/core: fix repeated words in comments Delete the redundant word 'the'. Signed-off-by: wangjianli Link: https://lore.kernel.org/r/20221022060030.50900-1-wangjianli@cdjrlc.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 4084d05a4510..2e91d8879326 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -1422,7 +1422,7 @@ int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr, *vlan_id = vlan_dev_vlan_id(ndev); } else { /* If the netdev is upper device and if it's lower - * device is vlan device, consider vlan id of the + * device is vlan device, consider vlan id of * the lower vlan device for this gid entry. */ netdev_walk_all_lower_dev_rcu(attr->ndev, -- cgit v1.2.3 From 4508d32ccced24c972bc4592104513e1ff8439b5 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 25 Oct 2022 10:37:13 +0300 Subject: RDMA/core: Fix order of nldev_exit call Create symmetrical exit flow by calling to nldev_exit() after call to rdma_nl_unregister(RDMA_NL_LS). Fixes: 6c80b41abe22 ("RDMA/netlink: Add nldev initialization flows") Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/64e676774a53a406f4cde265d5a4cfd6b8e97df9.1666683334.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index ae60c73babcc..3409c55ea88b 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2843,8 +2843,8 @@ err: static void __exit ib_core_cleanup(void) { roce_gid_mgmt_cleanup(); - nldev_exit(); rdma_nl_unregister(RDMA_NL_LS); + nldev_exit(); unregister_pernet_device(&rdma_dev_net_ops); unregister_blocking_lsm_notifier(&ibdev_lsm_nb); ib_sa_cleanup(); -- cgit v1.2.3 From 5c20311d76cbaeb7ed2ecf9c8b8322f8fc4a7ae3 Mon Sep 17 00:00:00 2001 From: Leonid Ravich Date: Wed, 9 Nov 2022 11:57:17 +0200 Subject: IB/mad: Don't call to function that might sleep while in atomic context Tracepoints are not allowed to sleep, as such the following splat is generated due to call to ib_query_pkey() in atomic context. WARNING: CPU: 0 PID: 1888000 at kernel/trace/ring_buffer.c:2492 rb_commit+0xc1/0x220 CPU: 0 PID: 1888000 Comm: kworker/u9:0 Kdump: loaded Tainted: G OE --------- - - 4.18.0-305.3.1.el8.x86_64 #1 Hardware name: Red Hat KVM, BIOS 1.13.0-2.module_el8.3.0+555+a55c8938 04/01/2014 Workqueue: ib-comp-unb-wq ib_cq_poll_work [ib_core] RIP: 0010:rb_commit+0xc1/0x220 RSP: 0000:ffffa8ac80f9bca0 EFLAGS: 00010202 RAX: ffff8951c7c01300 RBX: ffff8951c7c14a00 RCX: 0000000000000246 RDX: ffff8951c707c000 RSI: ffff8951c707c57c RDI: ffff8951c7c14a00 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 R10: ffff8951c7c01300 R11: 0000000000000001 R12: 0000000000000246 R13: 0000000000000000 R14: ffffffff964c70c0 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff8951fbc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f20e8f39010 CR3: 000000002ca10005 CR4: 0000000000170ef0 Call Trace: ring_buffer_unlock_commit+0x1d/0xa0 trace_buffer_unlock_commit_regs+0x3b/0x1b0 trace_event_buffer_commit+0x67/0x1d0 trace_event_raw_event_ib_mad_recv_done_handler+0x11c/0x160 [ib_core] ib_mad_recv_done+0x48b/0xc10 [ib_core] ? trace_event_raw_event_cq_poll+0x6f/0xb0 [ib_core] __ib_process_cq+0x91/0x1c0 [ib_core] ib_cq_poll_work+0x26/0x80 [ib_core] process_one_work+0x1a7/0x360 ? create_worker+0x1a0/0x1a0 worker_thread+0x30/0x390 ? create_worker+0x1a0/0x1a0 kthread+0x116/0x130 ? kthread_flush_work_fn+0x10/0x10 ret_from_fork+0x35/0x40 ---[ end trace 78ba8509d3830a16 ]--- Fixes: 821bf1de45a1 ("IB/MAD: Add recv path trace point") Signed-off-by: Leonid Ravich Link: https://lore.kernel.org/r/Y2t5feomyznrVj7V@leonid-Inspiron-3421 Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/mad.c | 5 ----- include/trace/events/ib_mad.h | 13 ++++--------- 2 files changed, 4 insertions(+), 14 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 1893aa613ad7..674344eb8e2f 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -59,9 +59,6 @@ static void create_mad_addr_info(struct ib_mad_send_wr_private *mad_send_wr, struct ib_mad_qp_info *qp_info, struct trace_event_raw_ib_mad_send_template *entry) { - u16 pkey; - struct ib_device *dev = qp_info->port_priv->device; - u32 pnum = qp_info->port_priv->port_num; struct ib_ud_wr *wr = &mad_send_wr->send_wr; struct rdma_ah_attr attr = {}; @@ -69,8 +66,6 @@ static void create_mad_addr_info(struct ib_mad_send_wr_private *mad_send_wr, /* These are common */ entry->sl = attr.sl; - ib_query_pkey(dev, pnum, wr->pkey_index, &pkey); - entry->pkey = pkey; entry->rqpn = wr->remote_qpn; entry->rqkey = wr->remote_qkey; entry->dlid = rdma_ah_get_dlid(&attr); diff --git a/include/trace/events/ib_mad.h b/include/trace/events/ib_mad.h index 59363a083ecb..d92691c78cff 100644 --- a/include/trace/events/ib_mad.h +++ b/include/trace/events/ib_mad.h @@ -49,7 +49,6 @@ DECLARE_EVENT_CLASS(ib_mad_send_template, __field(int, retries_left) __field(int, max_retries) __field(int, retry) - __field(u16, pkey) ), TP_fast_assign( @@ -89,7 +88,7 @@ DECLARE_EVENT_CLASS(ib_mad_send_template, "hdr : base_ver 0x%x class 0x%x class_ver 0x%x " \ "method 0x%x status 0x%x class_specific 0x%x tid 0x%llx " \ "attr_id 0x%x attr_mod 0x%x => dlid 0x%08x sl %d "\ - "pkey 0x%x rpqn 0x%x rqpkey 0x%x", + "rpqn 0x%x rqpkey 0x%x", __entry->dev_index, __entry->port_num, __entry->qp_num, __entry->agent_priv, be64_to_cpu(__entry->wrtid), __entry->retries_left, __entry->max_retries, @@ -100,7 +99,7 @@ DECLARE_EVENT_CLASS(ib_mad_send_template, be16_to_cpu(__entry->class_specific), be64_to_cpu(__entry->tid), be16_to_cpu(__entry->attr_id), be32_to_cpu(__entry->attr_mod), - be32_to_cpu(__entry->dlid), __entry->sl, __entry->pkey, + be32_to_cpu(__entry->dlid), __entry->sl, __entry->rqpn, __entry->rqkey ) ); @@ -204,7 +203,6 @@ TRACE_EVENT(ib_mad_recv_done_handler, __field(u16, wc_status) __field(u32, slid) __field(u32, dev_index) - __field(u16, pkey) ), TP_fast_assign( @@ -224,9 +222,6 @@ TRACE_EVENT(ib_mad_recv_done_handler, __entry->slid = wc->slid; __entry->src_qp = wc->src_qp; __entry->sl = wc->sl; - ib_query_pkey(qp_info->port_priv->device, - qp_info->port_priv->port_num, - wc->pkey_index, &__entry->pkey); __entry->wc_status = wc->status; ), @@ -234,7 +229,7 @@ TRACE_EVENT(ib_mad_recv_done_handler, "base_ver 0x%02x class 0x%02x class_ver 0x%02x " \ "method 0x%02x status 0x%04x class_specific 0x%04x " \ "tid 0x%016llx attr_id 0x%04x attr_mod 0x%08x " \ - "slid 0x%08x src QP%d, sl %d pkey 0x%04x", + "slid 0x%08x src QP%d, sl %d", __entry->dev_index, __entry->port_num, __entry->qp_num, __entry->wc_status, __entry->length, @@ -244,7 +239,7 @@ TRACE_EVENT(ib_mad_recv_done_handler, be16_to_cpu(__entry->class_specific), be64_to_cpu(__entry->tid), be16_to_cpu(__entry->attr_id), be32_to_cpu(__entry->attr_mod), - __entry->slid, __entry->src_qp, __entry->sl, __entry->pkey + __entry->slid, __entry->src_qp, __entry->sl ) ); -- cgit v1.2.3 From dac153f2802db1ad46207283cb9b2aae3d707a45 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Mon, 7 Nov 2022 10:51:34 +0200 Subject: RDMA/restrack: Release MR restrack when delete The MR restrack also needs to be released when delete it, otherwise it cause memory leak as the task struct won't be released. Fixes: 13ef5539def7 ("RDMA/restrack: Count references to the verbs objects") Signed-off-by: Mark Zhang Reviewed-by: Michael Guralnik Link: https://lore.kernel.org/r/703db18e8d4ef628691fb93980a709be673e62e3.1667810736.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/restrack.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index 1f935d9f6178..01a499a8b88d 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -343,8 +343,6 @@ void rdma_restrack_del(struct rdma_restrack_entry *res) rt = &dev->res[res->type]; old = xa_erase(&rt->xa, res->id); - if (res->type == RDMA_RESTRACK_MR) - return; WARN_ON(old != res); out: -- cgit v1.2.3 From 5e15ff29b156bbbdeadae230c8ecd5ecd8ca2477 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Mon, 7 Nov 2022 10:51:35 +0200 Subject: RDMA/core: Make sure "ib_port" is valid when access sysfs node The "ib_port" structure must be set before adding the sysfs kobject, and reset after removing it, otherwise it may crash when accessing the sysfs node: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000050 Mem abort info: ESR = 0x96000006 Exception class = DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 Data abort info: ISV = 0, ISS = 0x00000006 CM = 0, WnR = 0 user pgtable: 4k pages, 48-bit VAs, pgdp = 00000000e85f5ba5 [0000000000000050] pgd=0000000848fd9003, pud=000000085b387003, pmd=0000000000000000 Internal error: Oops: 96000006 [#2] PREEMPT SMP Modules linked in: ib_umad(O) mlx5_ib(O) nfnetlink_cttimeout(E) nfnetlink(E) act_gact(E) cls_flower(E) sch_ingress(E) openvswitch(E) nsh(E) nf_nat_ipv6(E) nf_nat_ipv4(E) nf_conncount(E) nf_nat(E) nf_conntrack(E) nf_defrag_ipv6(E) nf_defrag_ipv4(E) mst_pciconf(O) ipmi_devintf(E) ipmi_msghandler(E) ipmb_dev_int(OE) mlx5_core(O) mlxfw(O) mlxdevm(O) auxiliary(O) ib_uverbs(O) ib_core(O) mlx_compat(O) psample(E) sbsa_gwdt(E) uio_pdrv_genirq(E) uio(E) mlxbf_pmc(OE) mlxbf_gige(OE) mlxbf_tmfifo(OE) gpio_mlxbf2(OE) pwr_mlxbf(OE) mlx_trio(OE) i2c_mlxbf(OE) mlx_bootctl(OE) bluefield_edac(OE) knem(O) ip_tables(E) ipv6(E) crc_ccitt(E) [last unloaded: mst_pci] Process grep (pid: 3372, stack limit = 0x0000000022055c92) CPU: 5 PID: 3372 Comm: grep Tainted: G D OE 4.19.161-mlnx.47.gadcd9e3 #1 Hardware name: https://www.mellanox.com BlueField SoC/BlueField SoC, BIOS BlueField:3.9.2-15-ga2403ab Sep 8 2022 pstate: 40000005 (nZcv daif -PAN -UAO) pc : hw_stat_port_show+0x4c/0x80 [ib_core] lr : port_attr_show+0x40/0x58 [ib_core] sp : ffff000029f43b50 x29: ffff000029f43b50 x28: 0000000019375000 x27: ffff8007b821a540 x26: ffff000029f43e30 x25: 0000000000008000 x24: ffff000000eaa958 x23: 0000000000001000 x22: ffff8007a4ce3000 x21: ffff8007baff8000 x20: ffff8007b9066ac0 x19: ffff8007bae97578 x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000 x8 : ffff8007a4ce4000 x7 : 0000000000000000 x6 : 000000000000003f x5 : ffff000000e6a280 x4 : ffff8007a4ce3000 x3 : 0000000000000000 x2 : aaaaaaaaaaaaaaab x1 : ffff8007b9066a10 x0 : ffff8007baff8000 Call trace: hw_stat_port_show+0x4c/0x80 [ib_core] port_attr_show+0x40/0x58 [ib_core] sysfs_kf_seq_show+0x8c/0x150 kernfs_seq_show+0x44/0x50 seq_read+0x1b4/0x45c kernfs_fop_read+0x148/0x1d8 __vfs_read+0x58/0x180 vfs_read+0x94/0x154 ksys_read+0x68/0xd8 __arm64_sys_read+0x28/0x34 el0_svc_common+0x88/0x18c el0_svc_handler+0x78/0x94 el0_svc+0x8/0xe8 Code: f2955562 aa1603e4 aa1503e0 f9405683 (f9402861) Fixes: d8a5883814b9 ("RDMA/core: Replace the ib_port_data hw_stats pointers with a ib_port pointer") Signed-off-by: Mark Zhang Reviewed-by: Michael Guralnik Link: https://lore.kernel.org/r/88867e705c42c1cd2011e45201c25eecdb9fef94.1667810736.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/sysfs.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 84c53bd2a52d..ee59d7391568 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -1213,6 +1213,9 @@ static struct ib_port *setup_port(struct ib_core_device *coredev, int port_num, p->port_num = port_num; kobject_init(&p->kobj, &port_type); + if (device->port_data && is_full_dev) + device->port_data[port_num].sysfs = p; + cur_group = p->groups_list; ret = alloc_port_table_group("gids", &p->groups[0], p->attrs_list, attr->gid_tbl_len, show_port_gid); @@ -1258,9 +1261,6 @@ static struct ib_port *setup_port(struct ib_core_device *coredev, int port_num, } list_add_tail(&p->kobj.entry, &coredev->port_list); - if (device->port_data && is_full_dev) - device->port_data[port_num].sysfs = p; - return p; err_groups: @@ -1268,6 +1268,8 @@ err_groups: err_del: kobject_del(&p->kobj); err_put: + if (device->port_data && is_full_dev) + device->port_data[port_num].sysfs = NULL; kobject_put(&p->kobj); return ERR_PTR(ret); } @@ -1276,14 +1278,17 @@ static void destroy_port(struct ib_core_device *coredev, struct ib_port *port) { bool is_full_dev = &port->ibdev->coredev == coredev; - if (port->ibdev->port_data && - port->ibdev->port_data[port->port_num].sysfs == port) - port->ibdev->port_data[port->port_num].sysfs = NULL; list_del(&port->kobj.entry); if (is_full_dev) sysfs_remove_groups(&port->kobj, port->ibdev->ops.port_groups); + sysfs_remove_groups(&port->kobj, port->groups_list); kobject_del(&port->kobj); + + if (port->ibdev->port_data && + port->ibdev->port_data[port->port_num].sysfs == port) + port->ibdev->port_data[port->port_num].sysfs = NULL; + kobject_put(&port->kobj); } -- cgit v1.2.3 From ecacb3751f254572af0009b9501e2cdc83a30b6a Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Mon, 7 Nov 2022 10:51:36 +0200 Subject: RDMA/nldev: Return "-EAGAIN" if the cm_id isn't from expected port When filling a cm_id entry, return "-EAGAIN" instead of 0 if the cm_id doesn'the have the same port as requested, otherwise an incomplete entry may be returned, which causes "rdam res show cm_id" to return an error. For example on a machine with two rdma devices with "rping -C 1 -v -s" running background, the "rdma" command fails: $ rdma -V rdma utility, iproute2-5.19.0 $ rdma res show cm_id link mlx5_0/- cm-idn 0 state LISTEN ps TCP pid 28056 comm rping src-addr 0.0.0.0:7174 error: Protocol not available While with this fix it succeeds: $ rdma res show cm_id link mlx5_0/- cm-idn 0 state LISTEN ps TCP pid 26395 comm rping src-addr 0.0.0.0:7174 link mlx5_1/- cm-idn 0 state LISTEN ps TCP pid 26395 comm rping src-addr 0.0.0.0:7174 Fixes: 00313983cda6 ("RDMA/nldev: provide detailed CM_ID information") Signed-off-by: Mark Zhang Link: https://lore.kernel.org/r/a08e898cdac5e28428eb749a99d9d981571b8ea7.1667810736.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/nldev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index b92358f606d0..2be76a3fdd87 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -552,7 +552,7 @@ static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_cm_id *cm_id = &id_priv->id; if (port && port != cm_id->port_num) - return 0; + return -EAGAIN; if (cm_id->port_num && nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num)) -- cgit v1.2.3 From 09f530f0c6d6689eee5e690c6d98f495fcc3a0f9 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 23 Nov 2022 20:27:14 -0400 Subject: RDMA: Add netdevice_tracker to ib_device_set_netdev() This will cause an informative backtrace to print if the user of ib_device_set_netdev() isn't careful about tearing down the ibdevice before its the netdevice parent is destroyed. Such as like this: unregister_netdevice: waiting for vlan0 to become free. Usage count = 2 leaked reference. ib_device_set_netdev+0x266/0x730 siw_newlink+0x4e0/0xfd0 nldev_newlink+0x35c/0x5c0 rdma_nl_rcv_msg+0x36d/0x690 rdma_nl_rcv+0x2ee/0x430 netlink_unicast+0x543/0x7f0 netlink_sendmsg+0x918/0xe20 sock_sendmsg+0xcf/0x120 ____sys_sendmsg+0x70d/0x8b0 ___sys_sendmsg+0x11d/0x1b0 __sys_sendmsg+0xfa/0x1d0 do_syscall_64+0x35/0xb0 entry_SYSCALL_64_after_hwframe+0x63/0xcd This will help debug the issues syzkaller is seeing. Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/0-v1-a7c81b3842ce+e5-netdev_tracker_jgg@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/device.c | 6 ++++-- include/rdma/ib_verbs.h | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 3409c55ea88b..ff35cebb25e2 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2159,14 +2159,16 @@ int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev, return 0; } + if (old_ndev) + netdev_tracker_free(ndev, &pdata->netdev_tracker); if (ndev) - dev_hold(ndev); + netdev_hold(ndev, &pdata->netdev_tracker, GFP_ATOMIC); rcu_assign_pointer(pdata->netdev, ndev); spin_unlock_irqrestore(&pdata->netdev_lock, flags); add_ndev_hash(pdata); if (old_ndev) - dev_put(old_ndev); + __dev_put(old_ndev); return 0; } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index a1f4d53a4bb6..77dd9148815b 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2203,6 +2203,7 @@ struct ib_port_data { struct ib_port_cache cache; struct net_device __rcu *netdev; + netdevice_tracker netdev_tracker; struct hlist_node ndev_hash_link; struct rdma_port_counter port_counter; struct ib_port *sysfs; -- cgit v1.2.3 From ea5ef136e215fdef35f14010bc51fcd6686e6922 Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Sat, 26 Nov 2022 04:34:10 +0000 Subject: RDMA/nldev: Add checks for nla_nest_start() in fill_stat_counter_qps() As the nla_nest_start() may fail with NULL returned, the return value needs to be checked. Fixes: c4ffee7c9bdb ("RDMA/netlink: Implement counter dumpit calback") Signed-off-by: Yuan Can Link: https://lore.kernel.org/r/20221126043410.85632-1-yuancan@huawei.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/nldev.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 2be76a3fdd87..ca0ed7d14326 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -894,6 +894,8 @@ static int fill_stat_counter_qps(struct sk_buff *msg, int ret = 0; table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP); + if (!table_attr) + return -EMSGSIZE; rt = &counter->device->res[RDMA_RESTRACK_QP]; xa_lock(&rt->xa); -- cgit v1.2.3 From 67e6272d53386f9708f91c4d0015c4a1c470eef5 Mon Sep 17 00:00:00 2001 From: Or Har-Toov Date: Mon, 28 Nov 2022 13:52:45 +0200 Subject: RDMA/nldev: Add NULL check to silence false warnings Using nlmsg_put causes static analysis tools to many false positives of not checking the return value of nlmsg_put. In all uses in nldev.c, payload parameter is 0 so NULL will never be returned. So let's add useless checks to silence the warnings. Signed-off-by: Or Har-Toov Reviewed-by: Michael Guralnik Link: https://lore.kernel.org/r/bd924da89d5b4f5291a4a01d9b5ae47c0a9b6a3f.1669636336.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/nldev.c | 44 +++++++++++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 8 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index ca0ed7d14326..b4716cda65d8 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -1043,6 +1043,10 @@ static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET), 0, 0); + if (!nlh) { + err = -EMSGSIZE; + goto err_free; + } err = fill_dev_info(msg, device); if (err) @@ -1128,7 +1132,7 @@ static int _nldev_get_dumpit(struct ib_device *device, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET), 0, NLM_F_MULTI); - if (fill_dev_info(skb, device)) { + if (!nlh || fill_dev_info(skb, device)) { nlmsg_cancel(skb, nlh); goto out; } @@ -1187,6 +1191,10 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET), 0, 0); + if (!nlh) { + err = -EMSGSIZE; + goto err_free; + } err = fill_port_info(msg, device, port, sock_net(skb->sk)); if (err) @@ -1248,7 +1256,7 @@ static int nldev_port_get_dumpit(struct sk_buff *skb, RDMA_NLDEV_CMD_PORT_GET), 0, NLM_F_MULTI); - if (fill_port_info(skb, device, p, sock_net(skb->sk))) { + if (!nlh || fill_port_info(skb, device, p, sock_net(skb->sk))) { nlmsg_cancel(skb, nlh); goto out; } @@ -1290,6 +1298,10 @@ static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET), 0, 0); + if (!nlh) { + ret = -EMSGSIZE; + goto err_free; + } ret = fill_res_info(msg, device); if (ret) @@ -1321,7 +1333,7 @@ static int _nldev_res_get_dumpit(struct ib_device *device, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET), 0, NLM_F_MULTI); - if (fill_res_info(skb, device)) { + if (!nlh || fill_res_info(skb, device)) { nlmsg_cancel(skb, nlh); goto out; } @@ -1456,7 +1468,7 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh, RDMA_NL_GET_OP(nlh->nlmsg_type)), 0, 0); - if (fill_nldev_handle(msg, device)) { + if (!nlh || fill_nldev_handle(msg, device)) { ret = -EMSGSIZE; goto err_free; } @@ -1535,7 +1547,7 @@ static int res_get_common_dumpit(struct sk_buff *skb, RDMA_NL_GET_OP(cb->nlh->nlmsg_type)), 0, NLM_F_MULTI); - if (fill_nldev_handle(skb, device)) { + if (!nlh || fill_nldev_handle(skb, device)) { ret = -EMSGSIZE; goto err; } @@ -1797,6 +1809,10 @@ static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET_CHARDEV), 0, 0); + if (!nlh) { + err = -EMSGSIZE; + goto out_nlmsg; + } data.nl_msg = msg; err = ib_get_client_nl_info(ibdev, client_name, &data); @@ -1854,6 +1870,10 @@ static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_SYS_GET), 0, 0); + if (!nlh) { + nlmsg_free(msg); + return -EMSGSIZE; + } err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_NETNS_MODE, (u8)ib_devices_shared_netns); @@ -2034,7 +2054,7 @@ static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_SET), 0, 0); - if (fill_nldev_handle(msg, device) || + if (!nlh || fill_nldev_handle(msg, device) || nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) { ret = -EMSGSIZE; goto err_free_msg; @@ -2103,6 +2123,10 @@ static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_SET), 0, 0); + if (!nlh) { + ret = -EMSGSIZE; + goto err_fill; + } cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]); qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]); @@ -2173,7 +2197,7 @@ static int stat_get_doit_default_counter(struct sk_buff *skb, RDMA_NLDEV_CMD_STAT_GET), 0, 0); - if (fill_nldev_handle(msg, device) || + if (!nlh || fill_nldev_handle(msg, device) || nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) { ret = -EMSGSIZE; goto err_msg; @@ -2261,6 +2285,10 @@ static int stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_GET), 0, 0); + if (!nlh) { + ret = -EMSGSIZE; + goto err_msg; + } ret = rdma_counter_get_mode(device, port, &mode, &mask); if (ret) @@ -2393,7 +2421,7 @@ static int nldev_stat_get_counter_status_doit(struct sk_buff *skb, 0, 0); ret = -EMSGSIZE; - if (fill_nldev_handle(msg, device) || + if (!nlh || fill_nldev_handle(msg, device) || nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) goto err_msg; -- cgit v1.2.3 From fc8f93ad3e5485d45c992233c96acd902992dfc4 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Mon, 28 Nov 2022 13:52:46 +0200 Subject: RDMA/nldev: Fix failure to send large messages Return "-EMSGSIZE" instead of "-EINVAL" when filling a QP entry, so that new SKBs will be allocated if there's not enough room in current SKB. Fixes: 65959522f806 ("RDMA: Add support to dump resource tracker in RAW format") Signed-off-by: Mark Zhang Reviewed-by: Patrisious Haddad Link: https://lore.kernel.org/r/b5e9c62f6b8369acab5648b661bf539cbceeffdc.1669636336.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/nldev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index b4716cda65d8..a981ac2f0975 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -513,7 +513,7 @@ static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin, /* In create_qp() port is not set yet */ if (qp->port && nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp->port)) - return -EINVAL; + return -EMSGSIZE; ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num); if (ret) -- cgit v1.2.3 From fb4907f487254375830f135dcfe5dd7e6f8b705f Mon Sep 17 00:00:00 2001 From: Chao Leng Date: Fri, 25 Nov 2022 09:00:26 +0800 Subject: RDMA/cma: Change RoCE packet life time from 18 to 16 The ack timeout retransmission time is affected by the following two factors: one is packet life time, another is the HCA processing time. Now the default packet lifetime(CMA_IBOE_PACKET_LIFETIME) is 18. That means the minimum ack timeout is 2 seconds (2^(18+1)*4us=2.097seconds). The packet lifetime means the maximum transmission time of packets on the network, 2 seconds is too long. Assume the network is a clos topology with three layers, every packet will pass through five hops of switches. Assume the buffer of every switch is 128MB and the port transmission rate is 25 Gbit/s, the maximum transmission time of the packet is 200ms(128MB*5/25Gbit/s). Add double redundancy, it is less than 500ms. So change the CMA_IBOE_PACKET_LIFETIME to 16, the maximum transmission time of the packet will be about 500+ms, it is long enough. Link: https://lore.kernel.org/r/20221125010026.755-1-lengchao@huawei.com Signed-off-by: Chao Leng Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index cc2222b85c88..2f5b5e6f3d11 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -47,7 +47,7 @@ MODULE_LICENSE("Dual BSD/GPL"); #define CMA_CM_RESPONSE_TIMEOUT 20 #define CMA_MAX_CM_RETRIES 15 #define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24) -#define CMA_IBOE_PACKET_LIFETIME 18 +#define CMA_IBOE_PACKET_LIFETIME 16 #define CMA_PREFERRED_ROCE_GID_TYPE IB_GID_TYPE_ROCE_UDP_ENCAP static const char * const cma_events[] = { -- cgit v1.2.3 From 8b4d379b399d19f4c803e565bfe13f07b66b5ad7 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Tue, 6 Dec 2022 21:02:00 +0800 Subject: RDMA/cm: Make QP FLUSHABLE for supported device Similar to RDMA and Atomic qp attributes enabled by default in CM, enable FLUSH attribute for supported device. That makes applications that are built with rdma_create_ep, rdma_accept APIs have FLUSH qp attribute natively so that user is able to request FLUSH operation simpler. Note that, a FLUSH operation requires FLUSH are supported by both device(HCA) and memory region(MR) and QP at the same time, so it's safe to enable FLUSH qp attribute by default here. FLUSH attribute can be disable by modify_qp() interface. Link: https://lore.kernel.org/r/20221206130201.30986-10-lizhijian@fujitsu.com Signed-off-by: Li Zhijian Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 1f9938a2c475..603c0aecc361 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -4094,9 +4094,18 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv, *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PORT; qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE; - if (cm_id_priv->responder_resources) + if (cm_id_priv->responder_resources) { + struct ib_device *ib_dev = cm_id_priv->id.device; + u64 support_flush = ib_dev->attrs.device_cap_flags & + (IB_DEVICE_FLUSH_GLOBAL | IB_DEVICE_FLUSH_PERSISTENT); + u32 flushable = support_flush ? + (IB_ACCESS_FLUSH_GLOBAL | + IB_ACCESS_FLUSH_PERSISTENT) : 0; + qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ | - IB_ACCESS_REMOTE_ATOMIC; + IB_ACCESS_REMOTE_ATOMIC | + flushable; + } qp_attr->pkey_index = cm_id_priv->av.pkey_index; if (cm_id_priv->av.port) qp_attr->port_num = cm_id_priv->av.port->port_num; -- cgit v1.2.3 From e42f9c2e6aad583986e91979bf2fce47aaced1c2 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 9 Dec 2022 10:21:56 -0400 Subject: RDMA: Add missed netdev_put() for the netdevice_tracker The netdev core will detect if any untracked puts are done on tracked pointers and throw refcount warnings: refcount_t: decrement hit 0; leaking memory. WARNING: CPU: 1 PID: 33 at lib/refcount.c:31 refcount_warn_saturate+0x1d7/0x1f0 lib/refcount.c:31 Modules linked in: CPU: 1 PID: 33 Comm: kworker/u4:2 Not tainted 6.1.0-rc8-next-20221207-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/26/2022 Workqueue: ib-unreg-wq ib_unregister_work RIP: 0010:refcount_warn_saturate+0x1d7/0x1f0 lib/refcount.c:31 Code: 05 5a 60 51 0a 01 e8 35 0a b5 05 0f 0b e9 d3 fe ff ff e8 6c 9b 75 fd 48 c7 c7 c0 6d a6 8a c6 05 37 60 51 0a 01 e8 16 0a b5 05 <0f> 0b e9 b4 fe +ff ff 48 89 ef e8 5a b5 c3 fd e9 5c fe ff ff 0f 1f RSP: 0018:ffffc90000aa7b30 EFLAGS: 00010082 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: ffff8880172f9d40 RSI: ffffffff8166b1dc RDI: fffff52000154f58 RBP: ffff88807906c600 R08: 0000000000000005 R09: 0000000000000000 R10: 0000000080000001 R11: 0000000000000000 R12: 1ffff92000154f6b R13: 0000000000000000 R14: ffff88807906c600 R15: ffff888046894000 FS: 0000000000000000(0000) GS:ffff8880b9900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffe350a8ff8 CR3: 000000007a9e7000 CR4: 00000000003526e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __refcount_dec include/linux/refcount.h:344 [inline] refcount_dec include/linux/refcount.h:359 [inline] ref_tracker_free+0x539/0x6b0 lib/ref_tracker.c:118 netdev_tracker_free include/linux/netdevice.h:4039 [inline] netdev_put include/linux/netdevice.h:4056 [inline] dev_put include/linux/netdevice.h:4082 [inline] free_netdevs+0x1f8/0x470 drivers/infiniband/core/device.c:2204 __ib_unregister_device+0xa0/0x1a0 drivers/infiniband/core/device.c:1478 ib_unregister_work+0x19/0x30 drivers/infiniband/core/device.c:1586 process_one_work+0x9bf/0x1710 kernel/workqueue.c:2289 worker_thread+0x669/0x1090 kernel/workqueue.c:2436 kthread+0x2e8/0x3a0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:308 So change the missed dev_put for pdata->netdev to also follow the tracker. Fixes: 09f530f0c6d6 ("RDMA: Add netdevice_tracker to ib_device_set_netdev()") Reported-by: syzbot+3fd8326d9a0812d19218@syzkaller.appspotmail.com Reported-by: syzbot+a1ed8ffe3121380cd5dd@syzkaller.appspotmail.com Reported-by: syzbot+8d0a099c8a6d1e4e601c@syzkaller.appspotmail.com Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/0-v1-e99919867b8d+1e2-netdev_tracker2_jgg@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 9211d2794aac..894c06846224 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2201,7 +2201,7 @@ static void free_netdevs(struct ib_device *ib_dev) * comparisons after the put */ rcu_assign_pointer(pdata->netdev, NULL); - dev_put(ndev); + netdev_put(ndev, &pdata->netdev_tracker); } spin_unlock_irqrestore(&pdata->netdev_lock, flags); } -- cgit v1.2.3