From dbc153fd3c142909e564bb256da087e13fbf239c Mon Sep 17 00:00:00 2001 From: Wen Gu Date: Thu, 18 Jan 2024 12:32:10 +0800 Subject: net/smc: fix illegal rmb_desc access in SMC-D connection dump A crash was found when dumping SMC-D connections. It can be reproduced by following steps: - run nginx/wrk test: smc_run nginx smc_run wrk -t 16 -c 1000 -d -H 'Connection: Close' - continuously dump SMC-D connections in parallel: watch -n 1 'smcss -D' BUG: kernel NULL pointer dereference, address: 0000000000000030 CPU: 2 PID: 7204 Comm: smcss Kdump: loaded Tainted: G E 6.7.0+ #55 RIP: 0010:__smc_diag_dump.constprop.0+0x5e5/0x620 [smc_diag] Call Trace: ? __die+0x24/0x70 ? page_fault_oops+0x66/0x150 ? exc_page_fault+0x69/0x140 ? asm_exc_page_fault+0x26/0x30 ? __smc_diag_dump.constprop.0+0x5e5/0x620 [smc_diag] ? __kmalloc_node_track_caller+0x35d/0x430 ? __alloc_skb+0x77/0x170 smc_diag_dump_proto+0xd0/0xf0 [smc_diag] smc_diag_dump+0x26/0x60 [smc_diag] netlink_dump+0x19f/0x320 __netlink_dump_start+0x1dc/0x300 smc_diag_handler_dump+0x6a/0x80 [smc_diag] ? __pfx_smc_diag_dump+0x10/0x10 [smc_diag] sock_diag_rcv_msg+0x121/0x140 ? __pfx_sock_diag_rcv_msg+0x10/0x10 netlink_rcv_skb+0x5a/0x110 sock_diag_rcv+0x28/0x40 netlink_unicast+0x22a/0x330 netlink_sendmsg+0x1f8/0x420 __sock_sendmsg+0xb0/0xc0 ____sys_sendmsg+0x24e/0x300 ? copy_msghdr_from_user+0x62/0x80 ___sys_sendmsg+0x7c/0xd0 ? __do_fault+0x34/0x160 ? do_read_fault+0x5f/0x100 ? do_fault+0xb0/0x110 ? __handle_mm_fault+0x2b0/0x6c0 __sys_sendmsg+0x4d/0x80 do_syscall_64+0x69/0x180 entry_SYSCALL_64_after_hwframe+0x6e/0x76 It is possible that the connection is in process of being established when we dump it. Assumed that the connection has been registered in a link group by smc_conn_create() but the rmb_desc has not yet been initialized by smc_buf_create(), thus causing the illegal access to conn->rmb_desc. So fix it by checking before dump. Fixes: 4b1b7d3b30a6 ("net/smc: add SMC-D diag support") Signed-off-by: Wen Gu Reviewed-by: Dust Li Reviewed-by: Wenjia Zhang Signed-off-by: David S. Miller --- net/smc/smc_diag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/smc') diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index 52f7c4f1e767..5a33908015f3 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -164,7 +164,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, } if (smc_conn_lgr_valid(&smc->conn) && smc->conn.lgr->is_smcd && (req->diag_ext & (1 << (SMC_DIAG_DMBINFO - 1))) && - !list_empty(&smc->conn.lgr->list)) { + !list_empty(&smc->conn.lgr->list) && smc->conn.rmb_desc) { struct smc_connection *conn = &smc->conn; struct smcd_diag_dmbinfo dinfo; struct smcd_dev *smcd = conn->lgr->smcd; -- cgit v1.2.3 From c3dfcdb65ec1a4813ec1e0871c52c671ba9c71ac Mon Sep 17 00:00:00 2001 From: Wen Gu Date: Thu, 25 Jan 2024 20:39:16 +0800 Subject: net/smc: fix incorrect SMC-D link group matching logic The logic to determine if SMC-D link group matches is incorrect. The correct logic should be that it only returns true when the GID is the same, and the SMC-D device is the same and the extended GID is the same (in the case of virtual ISM). It can be fixed by adding brackets around the conditional (or ternary) operator expression. But for better readability and maintainability, it has been changed to an if-else statement. Reported-by: Matthew Rosato Closes: https://lore.kernel.org/r/13579588-eb9d-4626-a063-c0b77ed80f11@linux.ibm.com Fixes: b40584d14570 ("net/smc: compatible with 128-bits extended GID of virtual ISM device") Link: https://lore.kernel.org/r/13579588-eb9d-4626-a063-c0b77ed80f11@linux.ibm.com Signed-off-by: Wen Gu Reviewed-by: Alexandra Winter Link: https://lore.kernel.org/r/20240125123916.77928-1-guwen@linux.alibaba.com Signed-off-by: Jakub Kicinski --- net/smc/smc_core.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'net/smc') diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 95cc95458e2d..e4c858411207 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -1877,9 +1877,15 @@ static bool smcd_lgr_match(struct smc_link_group *lgr, struct smcd_dev *smcismdev, struct smcd_gid *peer_gid) { - return lgr->peer_gid.gid == peer_gid->gid && lgr->smcd == smcismdev && - smc_ism_is_virtual(smcismdev) ? - (lgr->peer_gid.gid_ext == peer_gid->gid_ext) : 1; + if (lgr->peer_gid.gid != peer_gid->gid || + lgr->smcd != smcismdev) + return false; + + if (smc_ism_is_virtual(smcismdev) && + lgr->peer_gid.gid_ext != peer_gid->gid_ext) + return false; + + return true; } /* create a new SMC connection (and a new link group if necessary) */ -- cgit v1.2.3 From 6cf9ff463317217d95732a6cce6fbdd12508921a Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Mon, 12 Feb 2024 17:34:02 +0300 Subject: net: smc: fix spurious error message from __sock_release() Commit 67f562e3e147 ("net/smc: transfer fasync_list in case of fallback") leaves the socket's fasync list pointer within a container socket as well. When the latter is destroyed, '__sock_release()' warns about its non-empty fasync list, which is a dangling pointer to previously freed fasync list of an underlying TCP socket. Fix this spurious warning by nullifying fasync list of a container socket. Fixes: 67f562e3e147 ("net/smc: transfer fasync_list in case of fallback") Signed-off-by: Dmitry Antipov Signed-off-by: David S. Miller --- net/smc/af_smc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/smc') diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index a2cb30af46cb..0f53a5c6fd9d 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -924,6 +924,7 @@ static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code) smc->clcsock->file->private_data = smc->clcsock; smc->clcsock->wq.fasync_list = smc->sk.sk_socket->wq.fasync_list; + smc->sk.sk_socket->wq.fasync_list = NULL; /* There might be some wait entries remaining * in smc sk->sk_wq and they should be woken up -- cgit v1.2.3