From cc27b0c78c79680d128dbac79de0d40556d041bb Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 5 Jun 2017 16:49:39 +1000 Subject: md: fix deadlock between mddev_suspend() and md_write_start() If mddev_suspend() races with md_write_start() we can deadlock with mddev_suspend() waiting for the request that is currently in md_write_start() to complete the ->make_request() call, and md_write_start() waiting for the metadata to be updated to mark the array as 'dirty'. As metadata updates done by md_check_recovery() only happen then the mddev_lock() can be claimed, and as mddev_suspend() is often called with the lock held, these threads wait indefinitely for each other. We fix this by having md_write_start() abort if mddev_suspend() is happening, and ->make_request() aborts if md_write_start() aborted. md_make_request() can detect this abort, decrease the ->active_io count, and wait for mddev_suspend(). Reported-by: Nix Fix: 68866e425be2(MD: no sync IO while suspended) Cc: stable@vger.kernel.org Signed-off-by: NeilBrown Signed-off-by: Shaohua Li --- drivers/md/raid1.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'drivers/md/raid1.c') diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index e1a7e3d4c5e4..c71739b87ab7 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1321,7 +1321,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, * Continue immediately if no resync is active currently. */ - md_write_start(mddev, bio); /* wait on superblock update early */ if ((bio_end_sector(bio) > mddev->suspend_lo && bio->bi_iter.bi_sector < mddev->suspend_hi) || @@ -1550,13 +1549,13 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, wake_up(&conf->wait_barrier); } -static void raid1_make_request(struct mddev *mddev, struct bio *bio) +static bool raid1_make_request(struct mddev *mddev, struct bio *bio) { sector_t sectors; if (unlikely(bio->bi_opf & REQ_PREFLUSH)) { md_flush_request(mddev, bio); - return; + return true; } /* @@ -1571,8 +1570,12 @@ static void raid1_make_request(struct mddev *mddev, struct bio *bio) if (bio_data_dir(bio) == READ) raid1_read_request(mddev, bio, sectors, NULL); - else + else { + if (!md_write_start(mddev,bio)) + return false; raid1_write_request(mddev, bio, sectors); + } + return true; } static void raid1_status(struct seq_file *seq, struct mddev *mddev) -- cgit v1.2.3 From f9c79bc05a2a91f4fba8bfd653579e066714b1ec Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 7 Jun 2017 19:05:31 -0400 Subject: md: don't use flush_signals in userspace processes The function flush_signals clears all pending signals for the process. It may be used by kernel threads when we need to prepare a kernel thread for responding to signals. However using this function for an userspaces processes is incorrect - clearing signals without the program expecting it can cause misbehavior. The raid1 and raid5 code uses flush_signals in its request routine because it wants to prepare for an interruptible wait. This patch drops flush_signals and uses sigprocmask instead to block all signals (including SIGKILL) around the schedule() call. The signals are not lost, but the schedule() call won't respond to them. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Acked-by: NeilBrown Signed-off-by: Shaohua Li --- drivers/md/raid1.c | 5 ++++- drivers/md/raid5.c | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers/md/raid1.c') diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index c71739b87ab7..7866563338fa 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1334,7 +1334,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, */ DEFINE_WAIT(w); for (;;) { - flush_signals(current); + sigset_t full, old; prepare_to_wait(&conf->wait_barrier, &w, TASK_INTERRUPTIBLE); if (bio_end_sector(bio) <= mddev->suspend_lo || @@ -1344,7 +1344,10 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, bio->bi_iter.bi_sector, bio_end_sector(bio)))) break; + sigfillset(&full); + sigprocmask(SIG_BLOCK, &full, &old); schedule(); + sigprocmask(SIG_SETMASK, &old, NULL); } finish_wait(&conf->wait_barrier, &w); } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index b218a42fd702..547d5fa45a42 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5693,12 +5693,15 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi) * userspace, we want an interruptible * wait. */ - flush_signals(current); prepare_to_wait(&conf->wait_for_overlap, &w, TASK_INTERRUPTIBLE); if (logical_sector >= mddev->suspend_lo && logical_sector < mddev->suspend_hi) { + sigset_t full, old; + sigfillset(&full); + sigprocmask(SIG_BLOCK, &full, &old); schedule(); + sigprocmask(SIG_SETMASK, &old, NULL); do_prepare = true; } goto retry; -- cgit v1.2.3 From 037d2ff62cbfd3adf634553fbc25c9af7c25a702 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Thu, 15 Jun 2017 17:00:25 +0800 Subject: md/raid1: remove unused bio in sync_request_write The "bio" is not used in sync_request_write after commit a68e58703575 ("md/raid1: split out two sub-functions from sync_request_write"). Signed-off-by: Guoqing Jiang Signed-off-by: Shaohua Li --- drivers/md/raid1.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/md/raid1.c') diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 7866563338fa..659e1ab922b1 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -2171,9 +2171,7 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio) struct r1conf *conf = mddev->private; int i; int disks = conf->raid_disks * 2; - struct bio *bio, *wbio; - - bio = r1_bio->bios[r1_bio->read_disk]; + struct bio *wbio; if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) /* ouch - failed to read all of that. */ -- cgit v1.2.3