diff options
Diffstat (limited to 'drivers/md/raid10.c')
-rw-r--r-- | drivers/md/raid10.c | 109 |
1 files changed, 93 insertions, 16 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index d6f7978b4449..b98e746e7fc4 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -25,6 +25,7 @@ #include <linux/seq_file.h> #include <linux/ratelimit.h> #include <linux/kthread.h> +#include <linux/raid/md_p.h> #include <trace/events/block.h> #include "md.h" #include "raid10.h" @@ -1808,6 +1809,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) first = last = rdev->raid_disk; if (rdev->saved_raid_disk >= first && + rdev->saved_raid_disk < conf->geo.raid_disks && conf->mirrors[rdev->saved_raid_disk].rdev == NULL) mirror = rdev->saved_raid_disk; else @@ -3079,6 +3081,8 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, sector_t sect; int must_sync; int any_working; + int need_recover = 0; + int need_replace = 0; struct raid10_info *mirror = &conf->mirrors[i]; struct md_rdev *mrdev, *mreplace; @@ -3086,11 +3090,15 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, mrdev = rcu_dereference(mirror->rdev); mreplace = rcu_dereference(mirror->replacement); - if ((mrdev == NULL || - test_bit(Faulty, &mrdev->flags) || - test_bit(In_sync, &mrdev->flags)) && - (mreplace == NULL || - test_bit(Faulty, &mreplace->flags))) { + if (mrdev != NULL && + !test_bit(Faulty, &mrdev->flags) && + !test_bit(In_sync, &mrdev->flags)) + need_recover = 1; + if (mreplace != NULL && + !test_bit(Faulty, &mreplace->flags)) + need_replace = 1; + + if (!need_recover && !need_replace) { rcu_read_unlock(); continue; } @@ -3213,7 +3221,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, r10_bio->devs[1].devnum = i; r10_bio->devs[1].addr = to_addr; - if (!test_bit(In_sync, &mrdev->flags)) { + if (need_recover) { bio = r10_bio->devs[1].bio; bio->bi_next = biolist; biolist = bio; @@ -3230,16 +3238,11 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, bio = r10_bio->devs[1].repl_bio; if (bio) bio->bi_end_io = NULL; - /* Note: if mreplace != NULL, then bio + /* Note: if need_replace, then bio * cannot be NULL as r10buf_pool_alloc will * have allocated it. - * So the second test here is pointless. - * But it keeps semantic-checkers happy, and - * this comment keeps human reviewers - * happy. */ - if (mreplace == NULL || bio == NULL || - test_bit(Faulty, &mreplace->flags)) + if (!need_replace) break; bio->bi_next = biolist; biolist = bio; @@ -4286,12 +4289,46 @@ static int raid10_start_reshape(struct mddev *mddev) spin_unlock_irq(&conf->device_lock); if (mddev->delta_disks && mddev->bitmap) { - ret = md_bitmap_resize(mddev->bitmap, - raid10_size(mddev, 0, conf->geo.raid_disks), - 0, 0); + struct mdp_superblock_1 *sb = NULL; + sector_t oldsize, newsize; + + oldsize = raid10_size(mddev, 0, 0); + newsize = raid10_size(mddev, 0, conf->geo.raid_disks); + + if (!mddev_is_clustered(mddev)) { + ret = md_bitmap_resize(mddev->bitmap, newsize, 0, 0); + if (ret) + goto abort; + else + goto out; + } + + rdev_for_each(rdev, mddev) { + if (rdev->raid_disk > -1 && + !test_bit(Faulty, &rdev->flags)) + sb = page_address(rdev->sb_page); + } + + /* + * some node is already performing reshape, and no need to + * call md_bitmap_resize again since it should be called when + * receiving BITMAP_RESIZE msg + */ + if ((sb && (le32_to_cpu(sb->feature_map) & + MD_FEATURE_RESHAPE_ACTIVE)) || (oldsize == newsize)) + goto out; + + ret = md_bitmap_resize(mddev->bitmap, newsize, 0, 0); if (ret) goto abort; + + ret = md_cluster_ops->resize_bitmaps(mddev, newsize, oldsize); + if (ret) { + md_bitmap_resize(mddev->bitmap, oldsize, 0, 0); + goto abort; + } } +out: if (mddev->delta_disks > 0) { rdev_for_each(rdev, mddev) if (rdev->raid_disk < 0 && @@ -4568,6 +4605,32 @@ read_more: r10_bio->master_bio = read_bio; r10_bio->read_slot = r10_bio->devs[r10_bio->read_slot].devnum; + /* + * Broadcast RESYNC message to other nodes, so all nodes would not + * write to the region to avoid conflict. + */ + if (mddev_is_clustered(mddev) && conf->cluster_sync_high <= sector_nr) { + struct mdp_superblock_1 *sb = NULL; + int sb_reshape_pos = 0; + + conf->cluster_sync_low = sector_nr; + conf->cluster_sync_high = sector_nr + CLUSTER_RESYNC_WINDOW_SECTORS; + sb = page_address(rdev->sb_page); + if (sb) { + sb_reshape_pos = le64_to_cpu(sb->reshape_position); + /* + * Set cluster_sync_low again if next address for array + * reshape is less than cluster_sync_low. Since we can't + * update cluster_sync_low until it has finished reshape. + */ + if (sb_reshape_pos < conf->cluster_sync_low) + conf->cluster_sync_low = sb_reshape_pos; + } + + md_cluster_ops->resync_info_update(mddev, conf->cluster_sync_low, + conf->cluster_sync_high); + } + /* Now find the locations in the new layout */ __raid10_find_phys(&conf->geo, r10_bio); @@ -4719,6 +4782,19 @@ static void end_reshape(struct r10conf *conf) conf->fullsync = 0; } +static void raid10_update_reshape_pos(struct mddev *mddev) +{ + struct r10conf *conf = mddev->private; + sector_t lo, hi; + + md_cluster_ops->resync_info_get(mddev, &lo, &hi); + if (((mddev->reshape_position <= hi) && (mddev->reshape_position >= lo)) + || mddev->reshape_position == MaxSector) + conf->reshape_progress = mddev->reshape_position; + else + WARN_ON_ONCE(1); +} + static int handle_reshape_read_error(struct mddev *mddev, struct r10bio *r10_bio) { @@ -4887,6 +4963,7 @@ static struct md_personality raid10_personality = .check_reshape = raid10_check_reshape, .start_reshape = raid10_start_reshape, .finish_reshape = raid10_finish_reshape, + .update_reshape_pos = raid10_update_reshape_pos, .congested = raid10_congested, }; |