diff options
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r-- | drivers/md/md.c | 71 |
1 files changed, 46 insertions, 25 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 775f1dde190a..927a43db5dfb 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -380,6 +380,10 @@ EXPORT_SYMBOL_GPL(md_new_event); static LIST_HEAD(all_mddevs); static DEFINE_SPINLOCK(all_mddevs_lock); +static bool is_md_suspended(struct mddev *mddev) +{ + return percpu_ref_is_dying(&mddev->active_io); +} /* Rather than calling directly into the personality make_request function, * IO requests come here first so that we can check if the device is * being suspended pending a reconfiguration. @@ -389,7 +393,7 @@ static DEFINE_SPINLOCK(all_mddevs_lock); */ static bool is_suspended(struct mddev *mddev, struct bio *bio) { - if (mddev->suspended) + if (is_md_suspended(mddev)) return true; if (bio_data_dir(bio) != WRITE) return false; @@ -405,12 +409,10 @@ static bool is_suspended(struct mddev *mddev, struct bio *bio) void md_handle_request(struct mddev *mddev, struct bio *bio) { check_suspended: - rcu_read_lock(); if (is_suspended(mddev, bio)) { DEFINE_WAIT(__wait); /* Bail out if REQ_NOWAIT is set for the bio */ if (bio->bi_opf & REQ_NOWAIT) { - rcu_read_unlock(); bio_wouldblock_error(bio); return; } @@ -419,23 +421,19 @@ check_suspended: TASK_UNINTERRUPTIBLE); if (!is_suspended(mddev, bio)) break; - rcu_read_unlock(); schedule(); - rcu_read_lock(); } finish_wait(&mddev->sb_wait, &__wait); } - atomic_inc(&mddev->active_io); - rcu_read_unlock(); + if (!percpu_ref_tryget_live(&mddev->active_io)) + goto check_suspended; if (!mddev->pers->make_request(mddev, bio)) { - atomic_dec(&mddev->active_io); - wake_up(&mddev->sb_wait); + percpu_ref_put(&mddev->active_io); goto check_suspended; } - if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended) - wake_up(&mddev->sb_wait); + percpu_ref_put(&mddev->active_io); } EXPORT_SYMBOL(md_handle_request); @@ -455,6 +453,8 @@ static void md_submit_bio(struct bio *bio) } bio = bio_split_to_limits(bio); + if (!bio) + return; if (mddev->ro == MD_RDONLY && unlikely(rw == WRITE)) { if (bio_sectors(bio) != 0) @@ -481,11 +481,10 @@ void mddev_suspend(struct mddev *mddev) lockdep_assert_held(&mddev->reconfig_mutex); if (mddev->suspended++) return; - synchronize_rcu(); wake_up(&mddev->sb_wait); set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags); - smp_mb__after_atomic(); - wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0); + percpu_ref_kill(&mddev->active_io); + wait_event(mddev->sb_wait, percpu_ref_is_zero(&mddev->active_io)); mddev->pers->quiesce(mddev, 1); clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags); wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags)); @@ -503,6 +502,7 @@ void mddev_resume(struct mddev *mddev) lockdep_assert_held(&mddev->reconfig_mutex); if (--mddev->suspended) return; + percpu_ref_resurrect(&mddev->active_io); wake_up(&mddev->sb_wait); mddev->pers->quiesce(mddev, 0); @@ -681,7 +681,6 @@ void mddev_init(struct mddev *mddev) timer_setup(&mddev->safemode_timer, md_safemode_timeout, 0); atomic_set(&mddev->active, 1); atomic_set(&mddev->openers, 0); - atomic_set(&mddev->active_io, 0); spin_lock_init(&mddev->lock); atomic_set(&mddev->flush_pending, 0); init_waitqueue_head(&mddev->sb_wait); @@ -3642,7 +3641,7 @@ EXPORT_SYMBOL_GPL(md_rdev_init); */ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor) { - static struct md_rdev *claim_rdev; /* just for claiming the bdev */ + static struct md_rdev claim_rdev; /* just for claiming the bdev */ struct md_rdev *rdev; sector_t size; int err; @@ -3660,7 +3659,7 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe rdev->bdev = blkdev_get_by_dev(newdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, - super_format == -2 ? claim_rdev : rdev); + super_format == -2 ? &claim_rdev : rdev); if (IS_ERR(rdev->bdev)) { pr_warn("md: could not open device unknown-block(%u,%u).\n", MAJOR(newdev), MINOR(newdev)); @@ -5758,6 +5757,12 @@ static void md_safemode_timeout(struct timer_list *t) } static int start_dirty_degraded; +static void active_io_release(struct percpu_ref *ref) +{ + struct mddev *mddev = container_of(ref, struct mddev, active_io); + + wake_up(&mddev->sb_wait); +} int md_run(struct mddev *mddev) { @@ -5838,10 +5843,15 @@ int md_run(struct mddev *mddev) nowait = nowait && bdev_nowait(rdev->bdev); } + err = percpu_ref_init(&mddev->active_io, active_io_release, + PERCPU_REF_ALLOW_REINIT, GFP_KERNEL); + if (err) + return err; + if (!bioset_initialized(&mddev->bio_set)) { err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS); if (err) - return err; + goto exit_active_io; } if (!bioset_initialized(&mddev->sync_set)) { err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS); @@ -6029,6 +6039,8 @@ abort: bioset_exit(&mddev->sync_set); exit_bio_set: bioset_exit(&mddev->bio_set); +exit_active_io: + percpu_ref_exit(&mddev->active_io); return err; } EXPORT_SYMBOL_GPL(md_run); @@ -6154,7 +6166,7 @@ static void md_clean(struct mddev *mddev) mddev->new_level = LEVEL_NONE; mddev->new_layout = 0; mddev->new_chunk_sectors = 0; - mddev->curr_resync = 0; + mddev->curr_resync = MD_RESYNC_NONE; atomic64_set(&mddev->resync_mismatches, 0); mddev->suspend_lo = mddev->suspend_hi = 0; mddev->sync_speed_min = mddev->sync_speed_max = 0; @@ -6217,7 +6229,7 @@ EXPORT_SYMBOL_GPL(md_stop_writes); static void mddev_detach(struct mddev *mddev) { md_bitmap_wait_behind_writes(mddev); - if (mddev->pers && mddev->pers->quiesce && !mddev->suspended) { + if (mddev->pers && mddev->pers->quiesce && !is_md_suspended(mddev)) { mddev->pers->quiesce(mddev, 1); mddev->pers->quiesce(mddev, 0); } @@ -6253,6 +6265,8 @@ void md_stop(struct mddev *mddev) */ __md_stop_writes(mddev); __md_stop(mddev); + percpu_ref_exit(&mddev->writes_pending); + percpu_ref_exit(&mddev->active_io); bioset_exit(&mddev->bio_set); bioset_exit(&mddev->sync_set); } @@ -7826,6 +7840,7 @@ static void md_free_disk(struct gendisk *disk) struct mddev *mddev = disk->private_data; percpu_ref_exit(&mddev->writes_pending); + percpu_ref_exit(&mddev->active_io); bioset_exit(&mddev->bio_set); bioset_exit(&mddev->sync_set); @@ -8529,7 +8544,7 @@ bool md_write_start(struct mddev *mddev, struct bio *bi) return true; wait_event(mddev->sb_wait, !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) || - mddev->suspended); + is_md_suspended(mddev)); if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) { percpu_ref_put(&mddev->writes_pending); return false; @@ -8613,12 +8628,15 @@ static void md_end_io_acct(struct bio *bio) { struct md_io_acct *md_io_acct = bio->bi_private; struct bio *orig_bio = md_io_acct->orig_bio; + struct mddev *mddev = md_io_acct->mddev; orig_bio->bi_status = bio->bi_status; bio_end_io_acct(orig_bio, md_io_acct->start_time); bio_put(bio); bio_endio(orig_bio); + + percpu_ref_put(&mddev->active_io); } /* @@ -8634,10 +8652,13 @@ void md_account_bio(struct mddev *mddev, struct bio **bio) if (!blk_queue_io_stat(bdev->bd_disk->queue)) return; + percpu_ref_get(&mddev->active_io); + clone = bio_alloc_clone(bdev, *bio, GFP_NOIO, &mddev->io_acct_set); md_io_acct = container_of(clone, struct md_io_acct, bio_clone); md_io_acct->orig_bio = *bio; md_io_acct->start_time = bio_start_io_acct(*bio); + md_io_acct->mddev = mddev; clone->bi_end_io = md_end_io_acct; clone->bi_private = md_io_acct; @@ -8881,7 +8902,7 @@ void md_do_sync(struct md_thread *thread) atomic_set(&mddev->recovery_active, 0); last_check = 0; - if (j>2) { + if (j >= MD_RESYNC_ACTIVE) { pr_debug("md: resuming %s of %s from checkpoint.\n", desc, mdname(mddev)); mddev->curr_resync = j; @@ -8953,7 +8974,7 @@ void md_do_sync(struct md_thread *thread) if (j > max_sectors) /* when skipping, extra large numbers can be returned. */ j = max_sectors; - if (j > 2) + if (j >= MD_RESYNC_ACTIVE) mddev->curr_resync = j; mddev->curr_mark_cnt = io_sectors; if (last_check == 0) @@ -9028,7 +9049,7 @@ void md_do_sync(struct md_thread *thread) mddev->pers->sync_request(mddev, max_sectors, &skipped); if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) && - mddev->curr_resync >= MD_RESYNC_ACTIVE) { + mddev->curr_resync > MD_RESYNC_ACTIVE) { if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { if (mddev->curr_resync >= mddev->recovery_cp) { @@ -9257,7 +9278,7 @@ void md_check_recovery(struct mddev *mddev) wake_up(&mddev->sb_wait); } - if (mddev->suspended) + if (is_md_suspended(mddev)) return; if (mddev->bitmap) |