diff options
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r-- | drivers/md/md.c | 236 |
1 files changed, 146 insertions, 90 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 08fcaebc61bd..4e4dee0ec2de 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -69,7 +69,7 @@ #include <trace/events/block.h> #include "md.h" -#include "bitmap.h" +#include "md-bitmap.h" #include "md-cluster.h" #ifndef MODULE @@ -266,6 +266,52 @@ static DEFINE_SPINLOCK(all_mddevs_lock); * call has finished, the bio has been linked into some internal structure * and so is visible to ->quiesce(), so we don't need the refcount any more. */ +static bool is_suspended(struct mddev *mddev, struct bio *bio) +{ + if (mddev->suspended) + return true; + if (bio_data_dir(bio) != WRITE) + return false; + if (mddev->suspend_lo >= mddev->suspend_hi) + return false; + if (bio->bi_iter.bi_sector >= mddev->suspend_hi) + return false; + if (bio_end_sector(bio) < mddev->suspend_lo) + return false; + return true; +} + +void md_handle_request(struct mddev *mddev, struct bio *bio) +{ +check_suspended: + rcu_read_lock(); + if (is_suspended(mddev, bio)) { + DEFINE_WAIT(__wait); + for (;;) { + prepare_to_wait(&mddev->sb_wait, &__wait, + TASK_UNINTERRUPTIBLE); + if (!is_suspended(mddev, bio)) + break; + rcu_read_unlock(); + schedule(); + rcu_read_lock(); + } + finish_wait(&mddev->sb_wait, &__wait); + } + atomic_inc(&mddev->active_io); + rcu_read_unlock(); + + if (!mddev->pers->make_request(mddev, bio)) { + atomic_dec(&mddev->active_io); + wake_up(&mddev->sb_wait); + goto check_suspended; + } + + if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended) + wake_up(&mddev->sb_wait); +} +EXPORT_SYMBOL(md_handle_request); + static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio) { const int rw = bio_data_dir(bio); @@ -285,23 +331,6 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio) bio_endio(bio); return BLK_QC_T_NONE; } -check_suspended: - rcu_read_lock(); - if (mddev->suspended) { - DEFINE_WAIT(__wait); - for (;;) { - prepare_to_wait(&mddev->sb_wait, &__wait, - TASK_UNINTERRUPTIBLE); - if (!mddev->suspended) - break; - rcu_read_unlock(); - schedule(); - rcu_read_lock(); - } - finish_wait(&mddev->sb_wait, &__wait); - } - atomic_inc(&mddev->active_io); - rcu_read_unlock(); /* * save the sectors now since our bio can @@ -310,20 +339,14 @@ check_suspended: sectors = bio_sectors(bio); /* bio could be mergeable after passing to underlayer */ bio->bi_opf &= ~REQ_NOMERGE; - if (!mddev->pers->make_request(mddev, bio)) { - atomic_dec(&mddev->active_io); - wake_up(&mddev->sb_wait); - goto check_suspended; - } + + md_handle_request(mddev, bio); cpu = part_stat_lock(); part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors); part_stat_unlock(); - if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended) - wake_up(&mddev->sb_wait); - return BLK_QC_T_NONE; } @@ -336,12 +359,17 @@ check_suspended: void mddev_suspend(struct mddev *mddev) { WARN_ON_ONCE(mddev->thread && current == mddev->thread->tsk); + lockdep_assert_held(&mddev->reconfig_mutex); if (mddev->suspended++) return; synchronize_rcu(); wake_up(&mddev->sb_wait); + set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags); + smp_mb__after_atomic(); wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0); mddev->pers->quiesce(mddev, 1); + clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags); + wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags)); del_timer_sync(&mddev->safemode_timer); } @@ -349,6 +377,7 @@ EXPORT_SYMBOL_GPL(mddev_suspend); void mddev_resume(struct mddev *mddev) { + lockdep_assert_held(&mddev->reconfig_mutex); if (--mddev->suspended) return; wake_up(&mddev->sb_wait); @@ -439,16 +468,22 @@ static void md_submit_flush_data(struct work_struct *ws) struct mddev *mddev = container_of(ws, struct mddev, flush_work); struct bio *bio = mddev->flush_bio; + /* + * must reset flush_bio before calling into md_handle_request to avoid a + * deadlock, because other bios passed md_handle_request suspend check + * could wait for this and below md_handle_request could wait for those + * bios because of suspend check + */ + mddev->flush_bio = NULL; + wake_up(&mddev->sb_wait); + if (bio->bi_iter.bi_size == 0) /* an empty barrier - all done */ bio_endio(bio); else { bio->bi_opf &= ~REQ_PREFLUSH; - mddev->pers->make_request(mddev, bio); + md_handle_request(mddev, bio); } - - mddev->flush_bio = NULL; - wake_up(&mddev->sb_wait); } void md_flush_request(struct mddev *mddev, struct bio *bio) @@ -506,7 +541,7 @@ static void mddev_put(struct mddev *mddev) bioset_free(sync_bs); } -static void md_safemode_timeout(unsigned long data); +static void md_safemode_timeout(struct timer_list *t); void mddev_init(struct mddev *mddev) { @@ -515,8 +550,7 @@ void mddev_init(struct mddev *mddev) mutex_init(&mddev->bitmap_info.mutex); INIT_LIST_HEAD(&mddev->disks); INIT_LIST_HEAD(&mddev->all_mddevs); - setup_timer(&mddev->safemode_timer, md_safemode_timeout, - (unsigned long) mddev); + timer_setup(&mddev->safemode_timer, md_safemode_timeout, 0); atomic_set(&mddev->active, 1); atomic_set(&mddev->openers, 0); atomic_set(&mddev->active_io, 0); @@ -649,6 +683,7 @@ void mddev_unlock(struct mddev *mddev) */ spin_lock(&pers_lock); md_wakeup_thread(mddev->thread); + wake_up(&mddev->sb_wait); spin_unlock(&pers_lock); } EXPORT_SYMBOL_GPL(mddev_unlock); @@ -2299,7 +2334,7 @@ static void export_array(struct mddev *mddev) static bool set_in_sync(struct mddev *mddev) { - WARN_ON_ONCE(NR_CPUS != 1 && !spin_is_locked(&mddev->lock)); + lockdep_assert_held(&mddev->lock); if (!mddev->in_sync) { mddev->sync_checkers++; spin_unlock(&mddev->lock); @@ -2418,10 +2453,18 @@ repeat: } } - /* First make sure individual recovery_offsets are correct */ + /* + * First make sure individual recovery_offsets are correct + * curr_resync_completed can only be used during recovery. + * During reshape/resync it might use array-addresses rather + * that device addresses. + */ rdev_for_each(rdev, mddev) { if (rdev->raid_disk >= 0 && mddev->delta_disks >= 0 && + test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) && + test_bit(MD_RECOVERY_RECOVER, &mddev->recovery) && + !test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && !test_bit(Journal, &rdev->flags) && !test_bit(In_sync, &rdev->flags) && mddev->curr_resync_completed > rdev->recovery_offset) @@ -2637,7 +2680,7 @@ state_show(struct md_rdev *rdev, char *page) { char *sep = ","; size_t len = 0; - unsigned long flags = ACCESS_ONCE(rdev->flags); + unsigned long flags = READ_ONCE(rdev->flags); if (test_bit(Faulty, &flags) || (!test_bit(ExternalBbl, &flags) && @@ -4810,7 +4853,7 @@ suspend_lo_show(struct mddev *mddev, char *page) static ssize_t suspend_lo_store(struct mddev *mddev, const char *buf, size_t len) { - unsigned long long old, new; + unsigned long long new; int err; err = kstrtoull(buf, 10, &new); @@ -4826,16 +4869,10 @@ suspend_lo_store(struct mddev *mddev, const char *buf, size_t len) if (mddev->pers == NULL || mddev->pers->quiesce == NULL) goto unlock; - old = mddev->suspend_lo; + mddev_suspend(mddev); mddev->suspend_lo = new; - if (new >= old) - /* Shrinking suspended region */ - mddev->pers->quiesce(mddev, 2); - else { - /* Expanding suspended region - need to wait */ - mddev->pers->quiesce(mddev, 1); - mddev->pers->quiesce(mddev, 0); - } + mddev_resume(mddev); + err = 0; unlock: mddev_unlock(mddev); @@ -4853,7 +4890,7 @@ suspend_hi_show(struct mddev *mddev, char *page) static ssize_t suspend_hi_store(struct mddev *mddev, const char *buf, size_t len) { - unsigned long long old, new; + unsigned long long new; int err; err = kstrtoull(buf, 10, &new); @@ -4866,19 +4903,13 @@ suspend_hi_store(struct mddev *mddev, const char *buf, size_t len) if (err) return err; err = -EINVAL; - if (mddev->pers == NULL || - mddev->pers->quiesce == NULL) + if (mddev->pers == NULL) goto unlock; - old = mddev->suspend_hi; + + mddev_suspend(mddev); mddev->suspend_hi = new; - if (new <= old) - /* Shrinking suspended region */ - mddev->pers->quiesce(mddev, 2); - else { - /* Expanding suspended region - need to wait */ - mddev->pers->quiesce(mddev, 1); - mddev->pers->quiesce(mddev, 0); - } + mddev_resume(mddev); + err = 0; unlock: mddev_unlock(mddev); @@ -5343,7 +5374,7 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) return NULL; } -static int add_named_array(const char *val, struct kernel_param *kp) +static int add_named_array(const char *val, const struct kernel_param *kp) { /* * val must be "md_*" or "mdNNN". @@ -5372,9 +5403,9 @@ static int add_named_array(const char *val, struct kernel_param *kp) return -EINVAL; } -static void md_safemode_timeout(unsigned long data) +static void md_safemode_timeout(struct timer_list *t) { - struct mddev *mddev = (struct mddev *) data; + struct mddev *mddev = from_timer(mddev, t, safemode_timer); mddev->safemode = 1; if (mddev->external) @@ -5820,8 +5851,14 @@ void md_stop(struct mddev *mddev) * This is called from dm-raid */ __md_stop(mddev); - if (mddev->bio_set) + if (mddev->bio_set) { bioset_free(mddev->bio_set); + mddev->bio_set = NULL; + } + if (mddev->sync_set) { + bioset_free(mddev->sync_set); + mddev->sync_set = NULL; + } } EXPORT_SYMBOL_GPL(md_stop); @@ -6348,7 +6385,7 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info) break; } } - if (has_journal) { + if (has_journal || mddev->bitmap) { export_rdev(rdev); return -EBUSY; } @@ -6604,22 +6641,26 @@ static int set_bitmap_file(struct mddev *mddev, int fd) return -ENOENT; /* cannot remove what isn't there */ err = 0; if (mddev->pers) { - mddev->pers->quiesce(mddev, 1); if (fd >= 0) { struct bitmap *bitmap; bitmap = bitmap_create(mddev, -1); + mddev_suspend(mddev); if (!IS_ERR(bitmap)) { mddev->bitmap = bitmap; err = bitmap_load(mddev); } else err = PTR_ERR(bitmap); - } - if (fd < 0 || err) { + if (err) { + bitmap_destroy(mddev); + fd = -1; + } + mddev_resume(mddev); + } else if (fd < 0) { + mddev_suspend(mddev); bitmap_destroy(mddev); - fd = -1; /* make sure to put the file */ + mddev_resume(mddev); } - mddev->pers->quiesce(mddev, 0); } if (fd < 0) { struct file *f = mddev->bitmap_info.file; @@ -6721,7 +6762,7 @@ static int set_array_info(struct mddev *mddev, mdu_array_info_t *info) void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors) { - WARN(!mddev_is_locked(mddev), "%s: unlocked mddev!\n", __func__); + lockdep_assert_held(&mddev->reconfig_mutex); if (mddev->external_size) return; @@ -6903,8 +6944,8 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info) mddev->bitmap_info.default_offset; mddev->bitmap_info.space = mddev->bitmap_info.default_space; - mddev->pers->quiesce(mddev, 1); bitmap = bitmap_create(mddev, -1); + mddev_suspend(mddev); if (!IS_ERR(bitmap)) { mddev->bitmap = bitmap; rv = bitmap_load(mddev); @@ -6912,7 +6953,7 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info) rv = PTR_ERR(bitmap); if (rv) bitmap_destroy(mddev); - mddev->pers->quiesce(mddev, 0); + mddev_resume(mddev); } else { /* remove the bitmap */ if (!mddev->bitmap) { @@ -6935,9 +6976,9 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info) mddev->bitmap_info.nodes = 0; md_cluster_ops->leave(mddev); } - mddev->pers->quiesce(mddev, 1); + mddev_suspend(mddev); bitmap_destroy(mddev); - mddev->pers->quiesce(mddev, 0); + mddev_resume(mddev); mddev->bitmap_info.offset = 0; } } @@ -7454,8 +7495,8 @@ void md_wakeup_thread(struct md_thread *thread) { if (thread) { pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm); - if (!test_and_set_bit(THREAD_WAKEUP, &thread->flags)) - wake_up(&thread->wqueue); + set_bit(THREAD_WAKEUP, &thread->flags); + wake_up(&thread->wqueue); } } EXPORT_SYMBOL(md_wakeup_thread); @@ -7564,7 +7605,9 @@ static int status_resync(struct seq_file *seq, struct mddev *mddev) if (test_bit(MD_RECOVERY_DONE, &mddev->recovery)) /* Still cleaning up */ resync = max_sectors; - } else + } else if (resync > max_sectors) + resync = max_sectors; + else resync -= atomic_read(&mddev->recovery_active); if (resync == 0) { @@ -8025,7 +8068,8 @@ bool md_write_start(struct mddev *mddev, struct bio *bi) if (did_change) sysfs_notify_dirent_safe(mddev->sysfs_state); wait_event(mddev->sb_wait, - !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) && !mddev->suspended); + !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) || + mddev->suspended); if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) { percpu_ref_put(&mddev->writes_pending); return false; @@ -8096,7 +8140,6 @@ void md_allow_write(struct mddev *mddev) sysfs_notify_dirent_safe(mddev->sysfs_state); /* wait for the dirty state to be recorded in the metadata */ wait_event(mddev->sb_wait, - !test_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags) && !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)); } else spin_unlock(&mddev->lock); @@ -8463,16 +8506,19 @@ void md_do_sync(struct md_thread *thread) } else { if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) mddev->curr_resync = MaxSector; - rcu_read_lock(); - rdev_for_each_rcu(rdev, mddev) - if (rdev->raid_disk >= 0 && - mddev->delta_disks >= 0 && - !test_bit(Journal, &rdev->flags) && - !test_bit(Faulty, &rdev->flags) && - !test_bit(In_sync, &rdev->flags) && - rdev->recovery_offset < mddev->curr_resync) - rdev->recovery_offset = mddev->curr_resync; - rcu_read_unlock(); + if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && + test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) { + rcu_read_lock(); + rdev_for_each_rcu(rdev, mddev) + if (rdev->raid_disk >= 0 && + mddev->delta_disks >= 0 && + !test_bit(Journal, &rdev->flags) && + !test_bit(Faulty, &rdev->flags) && + !test_bit(In_sync, &rdev->flags) && + rdev->recovery_offset < mddev->curr_resync) + rdev->recovery_offset = mddev->curr_resync; + rcu_read_unlock(); + } } } skip: @@ -8799,6 +8845,16 @@ void md_check_recovery(struct mddev *mddev) unlock: wake_up(&mddev->sb_wait); mddev_unlock(mddev); + } else if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags) && mddev->sb_flags) { + /* Write superblock - thread that called mddev_suspend() + * holds reconfig_mutex for us. + */ + set_bit(MD_UPDATING_SB, &mddev->flags); + smp_mb__after_atomic(); + if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags)) + md_update_sb(mddev, 0); + clear_bit_unlock(MD_UPDATING_SB, &mddev->flags); + wake_up(&mddev->sb_wait); } } EXPORT_SYMBOL(md_check_recovery); @@ -9260,11 +9316,11 @@ static __exit void md_exit(void) subsys_initcall(md_init); module_exit(md_exit) -static int get_ro(char *buffer, struct kernel_param *kp) +static int get_ro(char *buffer, const struct kernel_param *kp) { return sprintf(buffer, "%d", start_readonly); } -static int set_ro(const char *val, struct kernel_param *kp) +static int set_ro(const char *val, const struct kernel_param *kp) { return kstrtouint(val, 10, (unsigned int *)&start_readonly); } |