From 79ebe9110fa458d58f1fceb078e2068d7ad37390 Mon Sep 17 00:00:00 2001 From: Sun Ke Date: Wed, 12 May 2021 19:43:30 +0800 Subject: nbd: Fix NULL pointer in flush_workqueue Open /dev/nbdX first, the config_refs will be 1 and the pointers in nbd_device are still null. Disconnect /dev/nbdX, then reference a null recv_workq. The protection by config_refs in nbd_genl_disconnect is useless. [ 656.366194] BUG: kernel NULL pointer dereference, address: 0000000000000020 [ 656.368943] #PF: supervisor write access in kernel mode [ 656.369844] #PF: error_code(0x0002) - not-present page [ 656.370717] PGD 10cc87067 P4D 10cc87067 PUD 1074b4067 PMD 0 [ 656.371693] Oops: 0002 [#1] SMP [ 656.372242] CPU: 5 PID: 7977 Comm: nbd-client Not tainted 5.11.0-rc5-00040-g76c057c84d28 #1 [ 656.373661] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20190727_073836-buildvm-ppc64le-16.ppc.fedoraproject.org-3.fc31 04/01/2014 [ 656.375904] RIP: 0010:mutex_lock+0x29/0x60 [ 656.376627] Code: 00 0f 1f 44 00 00 55 48 89 fd 48 83 05 6f d7 fe 08 01 e8 7a c3 ff ff 48 83 05 6a d7 fe 08 01 31 c0 65 48 8b 14 25 00 6d 01 00 48 0f b1 55 d [ 656.378934] RSP: 0018:ffffc900005eb9b0 EFLAGS: 00010246 [ 656.379350] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 [ 656.379915] RDX: ffff888104cf2600 RSI: ffffffffaae8f452 RDI: 0000000000000020 [ 656.380473] RBP: 0000000000000020 R08: 0000000000000000 R09: ffff88813bd6b318 [ 656.381039] R10: 00000000000000c7 R11: fefefefefefefeff R12: ffff888102710b40 [ 656.381599] R13: ffffc900005eb9e0 R14: ffffffffb2930680 R15: ffff88810770ef00 [ 656.382166] FS: 00007fdf117ebb40(0000) GS:ffff88813bd40000(0000) knlGS:0000000000000000 [ 656.382806] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 656.383261] CR2: 0000000000000020 CR3: 0000000100c84000 CR4: 00000000000006e0 [ 656.383819] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 656.384370] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 656.384927] Call Trace: [ 656.385111] flush_workqueue+0x92/0x6c0 [ 656.385395] nbd_disconnect_and_put+0x81/0xd0 [ 656.385716] nbd_genl_disconnect+0x125/0x2a0 [ 656.386034] genl_family_rcv_msg_doit.isra.0+0x102/0x1b0 [ 656.386422] genl_rcv_msg+0xfc/0x2b0 [ 656.386685] ? nbd_ioctl+0x490/0x490 [ 656.386954] ? genl_family_rcv_msg_doit.isra.0+0x1b0/0x1b0 [ 656.387354] netlink_rcv_skb+0x62/0x180 [ 656.387638] genl_rcv+0x34/0x60 [ 656.387874] netlink_unicast+0x26d/0x590 [ 656.388162] netlink_sendmsg+0x398/0x6c0 [ 656.388451] ? netlink_rcv_skb+0x180/0x180 [ 656.388750] ____sys_sendmsg+0x1da/0x320 [ 656.389038] ? ____sys_recvmsg+0x130/0x220 [ 656.389334] ___sys_sendmsg+0x8e/0xf0 [ 656.389605] ? ___sys_recvmsg+0xa2/0xf0 [ 656.389889] ? handle_mm_fault+0x1671/0x21d0 [ 656.390201] __sys_sendmsg+0x6d/0xe0 [ 656.390464] __x64_sys_sendmsg+0x23/0x30 [ 656.390751] do_syscall_64+0x45/0x70 [ 656.391017] entry_SYSCALL_64_after_hwframe+0x44/0xa9 To fix it, just add if (nbd->recv_workq) to nbd_disconnect_and_put(). Fixes: e9e006f5fcf2 ("nbd: fix max number of supported devs") Signed-off-by: Sun Ke Reviewed-by: Josef Bacik Link: https://lore.kernel.org/r/20210512114331.1233964-2-sunke32@huawei.com Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 4ff71b579cfc..974da561b8e5 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1980,7 +1980,8 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd) * config ref and try to destroy the workqueue from inside the work * queue. */ - flush_workqueue(nbd->recv_workq); + if (nbd->recv_workq) + flush_workqueue(nbd->recv_workq); if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF, &nbd->config->runtime_flags)) nbd_config_put(nbd); -- cgit v1.2.3 From bedf78c4cbbbb65e42ede5ca2bd21887ef5b7060 Mon Sep 17 00:00:00 2001 From: Sun Ke Date: Wed, 12 May 2021 19:43:31 +0800 Subject: nbd: share nbd_put and return by goto put_nbd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the following two statements by the statement “goto put_nbd;” nbd_put(nbd); return 0; Signed-off-by: Sun Ke Suggested-by: Markus Elfring Reviewed-by: Josef Bacik Link: https://lore.kernel.org/r/20210512114331.1233964-3-sunke32@huawei.com Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 974da561b8e5..45d2c28c8fc8 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -2015,12 +2015,11 @@ static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } mutex_unlock(&nbd_index_mutex); - if (!refcount_inc_not_zero(&nbd->config_refs)) { - nbd_put(nbd); - return 0; - } + if (!refcount_inc_not_zero(&nbd->config_refs)) + goto put_nbd; nbd_disconnect_and_put(nbd); nbd_config_put(nbd); +put_nbd: nbd_put(nbd); return 0; } -- cgit v1.2.3 From 990e78116d38059c9306cf0560c1c4ed1cf358d3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 5 Jun 2021 17:09:50 +0300 Subject: block: loop: fix deadlock between open and remove Commit c76f48eb5c08 ("block: take bd_mutex around delete_partitions in del_gendisk") adds disk->part0->bd_mutex in del_gendisk(), this way causes the following AB/BA deadlock between removing loop and opening loop: 1) loop_control_ioctl(LOOP_CTL_REMOVE) -> mutex_lock(&loop_ctl_mutex) -> del_gendisk -> mutex_lock(&disk->part0->bd_mutex) 2) blkdev_get_by_dev -> mutex_lock(&disk->part0->bd_mutex) -> lo_open -> mutex_lock(&loop_ctl_mutex) Add a new Lo_deleting state to remove the need for clearing ->private_data and thus holding loop_ctl_mutex in the ioctl LOOP_CTL_REMOVE path. Based on an analysis and earlier patch from Ming Lei . Reported-by: Colin Ian King Fixes: c76f48eb5c08 ("block: take bd_mutex around delete_partitions in del_gendisk") Signed-off-by: Christoph Hellwig Tested-by: Colin Ian King Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20210605140950.5800-1-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/loop.c | 25 +++++++------------------ drivers/block/loop.h | 1 + 2 files changed, 8 insertions(+), 18 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index a370cde3ddd4..f280a96d4de2 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1878,29 +1878,18 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode, static int lo_open(struct block_device *bdev, fmode_t mode) { - struct loop_device *lo; + struct loop_device *lo = bdev->bd_disk->private_data; int err; - /* - * take loop_ctl_mutex to protect lo pointer from race with - * loop_control_ioctl(LOOP_CTL_REMOVE), however, to reduce contention - * release it prior to updating lo->lo_refcnt. - */ - err = mutex_lock_killable(&loop_ctl_mutex); - if (err) - return err; - lo = bdev->bd_disk->private_data; - if (!lo) { - mutex_unlock(&loop_ctl_mutex); - return -ENXIO; - } err = mutex_lock_killable(&lo->lo_mutex); - mutex_unlock(&loop_ctl_mutex); if (err) return err; - atomic_inc(&lo->lo_refcnt); + if (lo->lo_state == Lo_deleting) + err = -ENXIO; + else + atomic_inc(&lo->lo_refcnt); mutex_unlock(&lo->lo_mutex); - return 0; + return err; } static void lo_release(struct gendisk *disk, fmode_t mode) @@ -2284,7 +2273,7 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd, mutex_unlock(&lo->lo_mutex); break; } - lo->lo_disk->private_data = NULL; + lo->lo_state = Lo_deleting; mutex_unlock(&lo->lo_mutex); idr_remove(&loop_index_idr, lo->lo_number); loop_remove(lo); diff --git a/drivers/block/loop.h b/drivers/block/loop.h index a3c04f310672..5beb959b94d3 100644 --- a/drivers/block/loop.h +++ b/drivers/block/loop.h @@ -22,6 +22,7 @@ enum { Lo_unbound, Lo_bound, Lo_rundown, + Lo_deleting, }; struct loop_func_table; -- cgit v1.2.3