diff options
Diffstat (limited to 'drivers/md/raid10.c')
-rw-r--r-- | drivers/md/raid10.c | 112 |
1 files changed, 71 insertions, 41 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 59d4daa5f4c7..cd066b63bdaf 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -97,7 +97,7 @@ static int max_queued_requests = 1024; static void allow_barrier(struct r10conf *conf); static void lower_barrier(struct r10conf *conf); -static int enough(struct r10conf *conf, int ignore); +static int _enough(struct r10conf *conf, int previous, int ignore); static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *skipped); static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio); @@ -392,11 +392,9 @@ static void raid10_end_read_request(struct bio *bio, int error) * than fail the last device. Here we redefine * "uptodate" to mean "Don't want to retry" */ - unsigned long flags; - spin_lock_irqsave(&conf->device_lock, flags); - if (!enough(conf, rdev->raid_disk)) + if (!_enough(conf, test_bit(R10BIO_Previous, &r10_bio->state), + rdev->raid_disk)) uptodate = 1; - spin_unlock_irqrestore(&conf->device_lock, flags); } if (uptodate) { raid_end_bio_io(r10_bio); @@ -490,7 +488,17 @@ static void raid10_end_write_request(struct bio *bio, int error) sector_t first_bad; int bad_sectors; - set_bit(R10BIO_Uptodate, &r10_bio->state); + /* + * Do not set R10BIO_Uptodate if the current device is + * rebuilding or Faulty. This is because we cannot use + * such device for properly reading the data back (we could + * potentially use it, if the current write would have felt + * before rdev->recovery_offset, but for simplicity we don't + * check this here. + */ + if (test_bit(In_sync, &rdev->flags) && + !test_bit(Faulty, &rdev->flags)) + set_bit(R10BIO_Uptodate, &r10_bio->state); /* Maybe we can clear some bad blocks. */ if (is_badblock(rdev, @@ -1055,17 +1063,17 @@ static void allow_barrier(struct r10conf *conf) wake_up(&conf->wait_barrier); } -static void freeze_array(struct r10conf *conf) +static void freeze_array(struct r10conf *conf, int extra) { /* stop syncio and normal IO and wait for everything to * go quiet. * We increment barrier and nr_waiting, and then - * wait until nr_pending match nr_queued+1 + * wait until nr_pending match nr_queued+extra * This is called in the context of one normal IO request * that has failed. Thus any sync request that might be pending * will be blocked by nr_pending, and we need to wait for * pending IO requests to complete or be queued for re-try. - * Thus the number queued (nr_queued) plus this request (1) + * Thus the number queued (nr_queued) plus this request (extra) * must match the number of pending IOs (nr_pending) before * we continue. */ @@ -1073,7 +1081,7 @@ static void freeze_array(struct r10conf *conf) conf->barrier++; conf->nr_waiting++; wait_event_lock_irq_cmd(conf->wait_barrier, - conf->nr_pending == conf->nr_queued+1, + conf->nr_pending == conf->nr_queued+extra, conf->resync_lock, flush_pending_writes(conf)); @@ -1622,37 +1630,58 @@ static void status(struct seq_file *seq, struct mddev *mddev) * Don't consider the device numbered 'ignore' * as we might be about to remove it. */ -static int _enough(struct r10conf *conf, struct geom *geo, int ignore) +static int _enough(struct r10conf *conf, int previous, int ignore) { int first = 0; + int has_enough = 0; + int disks, ncopies; + if (previous) { + disks = conf->prev.raid_disks; + ncopies = conf->prev.near_copies; + } else { + disks = conf->geo.raid_disks; + ncopies = conf->geo.near_copies; + } + rcu_read_lock(); do { int n = conf->copies; int cnt = 0; int this = first; while (n--) { - if (conf->mirrors[this].rdev && - this != ignore) + struct md_rdev *rdev; + if (this != ignore && + (rdev = rcu_dereference(conf->mirrors[this].rdev)) && + test_bit(In_sync, &rdev->flags)) cnt++; - this = (this+1) % geo->raid_disks; + this = (this+1) % disks; } if (cnt == 0) - return 0; - first = (first + geo->near_copies) % geo->raid_disks; + goto out; + first = (first + ncopies) % disks; } while (first != 0); - return 1; + has_enough = 1; +out: + rcu_read_unlock(); + return has_enough; } static int enough(struct r10conf *conf, int ignore) { - return _enough(conf, &conf->geo, ignore) && - _enough(conf, &conf->prev, ignore); + /* when calling 'enough', both 'prev' and 'geo' must + * be stable. + * This is ensured if ->reconfig_mutex or ->device_lock + * is held. + */ + return _enough(conf, 0, ignore) && + _enough(conf, 1, ignore); } static void error(struct mddev *mddev, struct md_rdev *rdev) { char b[BDEVNAME_SIZE]; struct r10conf *conf = mddev->private; + unsigned long flags; /* * If it is not operational, then we have already marked it as dead @@ -1660,18 +1689,18 @@ static void error(struct mddev *mddev, struct md_rdev *rdev) * next level up know. * else mark the drive as failed */ + spin_lock_irqsave(&conf->device_lock, flags); if (test_bit(In_sync, &rdev->flags) - && !enough(conf, rdev->raid_disk)) + && !enough(conf, rdev->raid_disk)) { /* * Don't fail the drive, just return an IO error. */ + spin_unlock_irqrestore(&conf->device_lock, flags); return; + } if (test_and_clear_bit(In_sync, &rdev->flags)) { - unsigned long flags; - spin_lock_irqsave(&conf->device_lock, flags); mddev->degraded++; - spin_unlock_irqrestore(&conf->device_lock, flags); - /* + /* * if recovery is running, make sure it aborts. */ set_bit(MD_RECOVERY_INTR, &mddev->recovery); @@ -1679,6 +1708,7 @@ static void error(struct mddev *mddev, struct md_rdev *rdev) set_bit(Blocked, &rdev->flags); set_bit(Faulty, &rdev->flags); set_bit(MD_CHANGE_DEVS, &mddev->flags); + spin_unlock_irqrestore(&conf->device_lock, flags); printk(KERN_ALERT "md/raid10:%s: Disk failure on %s, disabling device.\n" "md/raid10:%s: Operation continuing on %d devices.\n", @@ -1781,7 +1811,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) * very different from resync */ return -EBUSY; - if (rdev->saved_raid_disk < 0 && !_enough(conf, &conf->prev, -1)) + if (rdev->saved_raid_disk < 0 && !_enough(conf, 1, -1)) return -EINVAL; if (rdev->raid_disk >= 0) @@ -1809,15 +1839,17 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) set_bit(Replacement, &rdev->flags); rdev->raid_disk = mirror; err = 0; - disk_stack_limits(mddev->gendisk, rdev->bdev, - rdev->data_offset << 9); + if (mddev->gendisk) + disk_stack_limits(mddev->gendisk, rdev->bdev, + rdev->data_offset << 9); conf->fullsync = 1; rcu_assign_pointer(p->replacement, rdev); break; } - disk_stack_limits(mddev->gendisk, rdev->bdev, - rdev->data_offset << 9); + if (mddev->gendisk) + disk_stack_limits(mddev->gendisk, rdev->bdev, + rdev->data_offset << 9); p->head_position = 0; p->recovery_disabled = mddev->recovery_disabled - 1; @@ -1837,8 +1869,8 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) * we wait for all outstanding requests to complete. */ synchronize_sched(); - raise_barrier(conf, 0); - lower_barrier(conf); + freeze_array(conf, 0); + unfreeze_array(conf); clear_bit(Unmerged, &rdev->flags); } md_integrity_add_rdev(rdev, mddev); @@ -2612,7 +2644,7 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio) r10_bio->devs[slot].bio = NULL; if (mddev->ro == 0) { - freeze_array(conf); + freeze_array(conf, 1); fix_read_error(conf, mddev, r10_bio); unfreeze_array(conf); } else @@ -2899,14 +2931,13 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, */ if (mddev->bitmap == NULL && mddev->recovery_cp == MaxSector && + mddev->reshape_position == MaxSector && + !test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) && + !test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && conf->fullsync == 0) { *skipped = 1; - max_sector = mddev->dev_sectors; - if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) || - test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) - max_sector = mddev->resync_max_sectors; - return max_sector - sector_nr; + return mddev->dev_sectors - sector_nr; } skipped: @@ -3522,7 +3553,7 @@ static struct r10conf *setup_conf(struct mddev *mddev) /* FIXME calc properly */ conf->mirrors = kzalloc(sizeof(struct raid10_info)*(mddev->raid_disks + - max(0,mddev->delta_disks)), + max(0,-mddev->delta_disks)), GFP_KERNEL); if (!conf->mirrors) goto out; @@ -3609,8 +3640,7 @@ static int run(struct mddev *mddev) if (mddev->queue) { blk_queue_max_discard_sectors(mddev->queue, mddev->chunk_sectors); - blk_queue_max_write_same_sectors(mddev->queue, - mddev->chunk_sectors); + blk_queue_max_write_same_sectors(mddev->queue, 0); blk_queue_io_min(mddev->queue, chunk_size); if (conf->geo.raid_disks % conf->geo.near_copies) blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks); @@ -3682,7 +3712,7 @@ static int run(struct mddev *mddev) conf->geo.far_offset == 0) goto out_free_conf; if (conf->prev.far_copies != 1 && - conf->geo.far_offset == 0) + conf->prev.far_offset == 0) goto out_free_conf; } |