diff options
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r-- | drivers/md/raid5.c | 69 |
1 files changed, 54 insertions, 15 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 2d2aaf6e98e2..2ae63c5b1c25 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -524,7 +524,11 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) bi->bi_end_io = raid5_end_read_request; rcu_read_lock(); - rdev = rcu_dereference(conf->disks[i].rdev); + if (rw == READ && + test_bit(R5_ReadRepl, &sh->dev[i].flags)) + rdev = rcu_dereference(conf->disks[i].replacement); + else + rdev = rcu_dereference(conf->disks[i].rdev); if (rdev && test_bit(Faulty, &rdev->flags)) rdev = NULL; if (rdev) @@ -1605,11 +1609,18 @@ static void raid5_end_read_request(struct bio * bi, int error) BUG(); return; } + if (test_bit(R5_ReadRepl, &sh->dev[i].flags)) + rdev = conf->disks[i].replacement; + else + rdev = conf->disks[i].rdev; if (uptodate) { set_bit(R5_UPTODATE, &sh->dev[i].flags); if (test_bit(R5_ReadError, &sh->dev[i].flags)) { - rdev = conf->disks[i].rdev; + /* Note that this cannot happen on a + * replacement device. We just fail those on + * any error + */ printk_ratelimited( KERN_INFO "md/raid:%s: read error corrected" @@ -1622,16 +1633,24 @@ static void raid5_end_read_request(struct bio * bi, int error) clear_bit(R5_ReadError, &sh->dev[i].flags); clear_bit(R5_ReWrite, &sh->dev[i].flags); } - if (atomic_read(&conf->disks[i].rdev->read_errors)) - atomic_set(&conf->disks[i].rdev->read_errors, 0); + if (atomic_read(&rdev->read_errors)) + atomic_set(&rdev->read_errors, 0); } else { - const char *bdn = bdevname(conf->disks[i].rdev->bdev, b); + const char *bdn = bdevname(rdev->bdev, b); int retry = 0; - rdev = conf->disks[i].rdev; clear_bit(R5_UPTODATE, &sh->dev[i].flags); atomic_inc(&rdev->read_errors); - if (conf->mddev->degraded >= conf->max_degraded) + if (test_bit(R5_ReadRepl, &sh->dev[i].flags)) + printk_ratelimited( + KERN_WARNING + "md/raid:%s: read error on replacement device " + "(sector %llu on %s).\n", + mdname(conf->mddev), + (unsigned long long)(sh->sector + + rdev->data_offset), + bdn); + else if (conf->mddev->degraded >= conf->max_degraded) printk_ratelimited( KERN_WARNING "md/raid:%s: read error not correctable " @@ -1665,7 +1684,7 @@ static void raid5_end_read_request(struct bio * bi, int error) md_error(conf->mddev, rdev); } } - rdev_dec_pending(conf->disks[i].rdev, conf->mddev); + rdev_dec_pending(rdev, conf->mddev); clear_bit(R5_LOCKED, &sh->dev[i].flags); set_bit(STRIPE_HANDLE, &sh->state); release_stripe(sh); @@ -3036,7 +3055,19 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) } if (dev->written) s->written++; - rdev = rcu_dereference(conf->disks[i].rdev); + /* Prefer to use the replacement for reads, but only + * if it is recovered enough and has no bad blocks. + */ + rdev = rcu_dereference(conf->disks[i].replacement); + if (rdev && !test_bit(Faulty, &rdev->flags) && + rdev->recovery_offset >= sh->sector + STRIPE_SECTORS && + !is_badblock(rdev, sh->sector, STRIPE_SECTORS, + &first_bad, &bad_sectors)) + set_bit(R5_ReadRepl, &dev->flags); + else { + rdev = rcu_dereference(conf->disks[i].rdev); + clear_bit(R5_ReadRepl, &dev->flags); + } if (rdev && test_bit(Faulty, &rdev->flags)) rdev = NULL; if (rdev) { @@ -3078,17 +3109,26 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) set_bit(R5_Insync, &dev->flags); if (rdev && test_bit(R5_WriteError, &dev->flags)) { - clear_bit(R5_Insync, &dev->flags); - if (!test_bit(Faulty, &rdev->flags)) { + /* This flag does not apply to '.replacement' + * only to .rdev, so make sure to check that*/ + struct md_rdev *rdev2 = rcu_dereference( + conf->disks[i].rdev); + if (rdev2 == rdev) + clear_bit(R5_Insync, &dev->flags); + if (rdev2 && !test_bit(Faulty, &rdev2->flags)) { s->handle_bad_blocks = 1; - atomic_inc(&rdev->nr_pending); + atomic_inc(&rdev2->nr_pending); } else clear_bit(R5_WriteError, &dev->flags); } if (rdev && test_bit(R5_MadeGood, &dev->flags)) { - if (!test_bit(Faulty, &rdev->flags)) { + /* This flag does not apply to '.replacement' + * only to .rdev, so make sure to check that*/ + struct md_rdev *rdev2 = rcu_dereference( + conf->disks[i].rdev); + if (rdev2 && !test_bit(Faulty, &rdev2->flags)) { s->handle_bad_blocks = 1; - atomic_inc(&rdev->nr_pending); + atomic_inc(&rdev2->nr_pending); } else clear_bit(R5_MadeGood, &dev->flags); } @@ -4220,7 +4260,6 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) return handled; } - set_bit(R5_ReadError, &sh->dev[dd_idx].flags); if (!add_stripe_bio(sh, raid_bio, dd_idx, 0)) { release_stripe(sh); raid5_set_bi_hw_segments(raid_bio, scnt); |