From 9a3f530f39f4490eaa18b02719fb74ce5f4d2d86 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 8 Nov 2011 16:22:01 +1100 Subject: md/raid5: abort any pending parity operations when array fails. When the number of failed devices exceeds the allowed number we must abort any active parity operations (checks or updates) as they are no longer meaningful, and can lead to a BUG_ON in handle_parity_checks6. This bug was introduce by commit 6c0069c0ae9659e3a91b68eaed06a5c6c37f45c8 in 2.6.29. Reported-by: Manish Katiyar Tested-by: Manish Katiyar Acked-by: Dan Williams Signed-off-by: NeilBrown Cc: stable@kernel.org --- drivers/md/raid5.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 472aedfb07cf..318bdae9b56c 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3159,10 +3159,14 @@ static void handle_stripe(struct stripe_head *sh) /* check if the array has lost more than max_degraded devices and, * if so, some requests might need to be failed. */ - if (s.failed > conf->max_degraded && s.to_read+s.to_write+s.written) - handle_failed_stripe(conf, sh, &s, disks, &s.return_bi); - if (s.failed > conf->max_degraded && s.syncing) - handle_failed_sync(conf, sh, &s); + if (s.failed > conf->max_degraded) { + sh->check_state = 0; + sh->reconstruct_state = 0; + if (s.to_read+s.to_write+s.written) + handle_failed_stripe(conf, sh, &s, disks, &s.return_bi); + if (s.syncing) + handle_failed_sync(conf, sh, &s); + } /* * might be able to return some write requests if the parity blocks -- cgit v1.2.3 From 257a4b42af7586fab4eaec7f04e6896b86551843 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Nov 2011 16:22:06 +1100 Subject: md/raid5: STRIPE_ACTIVE has lock semantics, add barriers All updates that occur under STRIPE_ACTIVE should be globally visible when STRIPE_ACTIVE clears. test_and_set_bit() implies a barrier, but clear_bit() does not. This is suitable for 3.1-stable. Signed-off-by: Dan Williams Signed-off-by: NeilBrown Cc: stable@kernel.org --- drivers/md/raid5.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 318bdae9b56c..297e26092178 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3110,7 +3110,7 @@ static void handle_stripe(struct stripe_head *sh) struct r5dev *pdev, *qdev; clear_bit(STRIPE_HANDLE, &sh->state); - if (test_and_set_bit(STRIPE_ACTIVE, &sh->state)) { + if (test_and_set_bit_lock(STRIPE_ACTIVE, &sh->state)) { /* already being handled, ensure it gets handled * again when current action finishes */ set_bit(STRIPE_HANDLE, &sh->state); @@ -3375,7 +3375,7 @@ finish: return_io(s.return_bi); - clear_bit(STRIPE_ACTIVE, &sh->state); + clear_bit_unlock(STRIPE_ACTIVE, &sh->state); } static void raid5_activate_delayed(struct r5conf *conf) -- cgit v1.2.3