diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-06-09 22:01:36 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-06-09 22:01:36 +0300 |
commit | d60dafdca4b463405e5586df923f05b10e9ac2f9 (patch) | |
tree | eb87cb614a64615eb0a1a0e32f62ad28e705e68c /drivers/md/md.c | |
parent | 1329c20433fb1298dd0bf94b59c1a11f27a1f2e2 (diff) | |
parent | 5a409b4f56d50b212334f338cb8465d65550cd85 (diff) | |
download | linux-d60dafdca4b463405e5586df923f05b10e9ac2f9.tar.xz |
Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md
Pull MD updates from Shaohua Li:
"A few fixes of MD for this merge window. Mostly bug fixes:
- raid5 stripe batch fix from Amy
- Read error handling for raid1 FailFast device from Gioh
- raid10 recovery NULL pointer dereference fix from Guoqing
- Support write hint for raid5 stripe cache from Mariusz
- Fixes for device hot add/remove from Neil and Yufen
- Improve flush bio scalability from Xiao"
* 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md:
MD: fix lock contention for flush bios
md/raid5: Assigning NULL to sh->batch_head before testing bit R5_Overlap of a stripe
md/raid1: add error handling of read error from FailFast device
md: fix NULL dereference of mddev->pers in remove_and_add_spares()
raid5: copy write hint from origin bio to stripe
md: fix two problems with setting the "re-add" device state.
raid10: check bio in r10buf_pool_free to void NULL pointer dereference
md: fix an error code format and remove unsed bio_sector
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r-- | drivers/md/md.c | 169 |
1 files changed, 115 insertions, 54 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 22203eba1e6e..29b0cd9ec951 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -132,6 +132,24 @@ static inline int speed_max(struct mddev *mddev) mddev->sync_speed_max : sysctl_speed_limit_max; } +static void * flush_info_alloc(gfp_t gfp_flags, void *data) +{ + return kzalloc(sizeof(struct flush_info), gfp_flags); +} +static void flush_info_free(void *flush_info, void *data) +{ + kfree(flush_info); +} + +static void * flush_bio_alloc(gfp_t gfp_flags, void *data) +{ + return kzalloc(sizeof(struct flush_bio), gfp_flags); +} +static void flush_bio_free(void *flush_bio, void *data) +{ + kfree(flush_bio); +} + static struct ctl_table_header *raid_table_header; static struct ctl_table raid_table[] = { @@ -414,30 +432,53 @@ static int md_congested(void *data, int bits) /* * Generic flush handling for md */ +static void submit_flushes(struct work_struct *ws) +{ + struct flush_info *fi = container_of(ws, struct flush_info, flush_work); + struct mddev *mddev = fi->mddev; + struct bio *bio = fi->bio; + + bio->bi_opf &= ~REQ_PREFLUSH; + md_handle_request(mddev, bio); + + mempool_free(fi, mddev->flush_pool); +} -static void md_end_flush(struct bio *bio) +static void md_end_flush(struct bio *fbio) { - struct md_rdev *rdev = bio->bi_private; - struct mddev *mddev = rdev->mddev; + struct flush_bio *fb = fbio->bi_private; + struct md_rdev *rdev = fb->rdev; + struct flush_info *fi = fb->fi; + struct bio *bio = fi->bio; + struct mddev *mddev = fi->mddev; rdev_dec_pending(rdev, mddev); - if (atomic_dec_and_test(&mddev->flush_pending)) { - /* The pre-request flush has finished */ - queue_work(md_wq, &mddev->flush_work); + if (atomic_dec_and_test(&fi->flush_pending)) { + if (bio->bi_iter.bi_size == 0) + /* an empty barrier - all done */ + bio_endio(bio); + else { + INIT_WORK(&fi->flush_work, submit_flushes); + queue_work(md_wq, &fi->flush_work); + } } - bio_put(bio); -} -static void md_submit_flush_data(struct work_struct *ws); + mempool_free(fb, mddev->flush_bio_pool); + bio_put(fbio); +} -static void submit_flushes(struct work_struct *ws) +void md_flush_request(struct mddev *mddev, struct bio *bio) { - struct mddev *mddev = container_of(ws, struct mddev, flush_work); struct md_rdev *rdev; + struct flush_info *fi; + + fi = mempool_alloc(mddev->flush_pool, GFP_NOIO); + + fi->bio = bio; + fi->mddev = mddev; + atomic_set(&fi->flush_pending, 1); - INIT_WORK(&mddev->flush_work, md_submit_flush_data); - atomic_set(&mddev->flush_pending, 1); rcu_read_lock(); rdev_for_each_rcu(rdev, mddev) if (rdev->raid_disk >= 0 && @@ -447,59 +488,39 @@ static void submit_flushes(struct work_struct *ws) * we reclaim rcu_read_lock */ struct bio *bi; + struct flush_bio *fb; atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending); rcu_read_unlock(); + + fb = mempool_alloc(mddev->flush_bio_pool, GFP_NOIO); + fb->fi = fi; + fb->rdev = rdev; + bi = bio_alloc_mddev(GFP_NOIO, 0, mddev); - bi->bi_end_io = md_end_flush; - bi->bi_private = rdev; bio_set_dev(bi, rdev->bdev); + bi->bi_end_io = md_end_flush; + bi->bi_private = fb; bi->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; - atomic_inc(&mddev->flush_pending); + + atomic_inc(&fi->flush_pending); submit_bio(bi); + rcu_read_lock(); rdev_dec_pending(rdev, mddev); } rcu_read_unlock(); - if (atomic_dec_and_test(&mddev->flush_pending)) - queue_work(md_wq, &mddev->flush_work); -} - -static void md_submit_flush_data(struct work_struct *ws) -{ - struct mddev *mddev = container_of(ws, struct mddev, flush_work); - struct bio *bio = mddev->flush_bio; - /* - * must reset flush_bio before calling into md_handle_request to avoid a - * deadlock, because other bios passed md_handle_request suspend check - * could wait for this and below md_handle_request could wait for those - * bios because of suspend check - */ - mddev->flush_bio = NULL; - wake_up(&mddev->sb_wait); - - if (bio->bi_iter.bi_size == 0) - /* an empty barrier - all done */ - bio_endio(bio); - else { - bio->bi_opf &= ~REQ_PREFLUSH; - md_handle_request(mddev, bio); + if (atomic_dec_and_test(&fi->flush_pending)) { + if (bio->bi_iter.bi_size == 0) + /* an empty barrier - all done */ + bio_endio(bio); + else { + INIT_WORK(&fi->flush_work, submit_flushes); + queue_work(md_wq, &fi->flush_work); + } } } - -void md_flush_request(struct mddev *mddev, struct bio *bio) -{ - spin_lock_irq(&mddev->lock); - wait_event_lock_irq(mddev->sb_wait, - !mddev->flush_bio, - mddev->lock); - mddev->flush_bio = bio; - spin_unlock_irq(&mddev->lock); - - INIT_WORK(&mddev->flush_work, submit_flushes); - queue_work(md_wq, &mddev->flush_work); -} EXPORT_SYMBOL(md_flush_request); static inline struct mddev *mddev_get(struct mddev *mddev) @@ -546,7 +567,6 @@ void mddev_init(struct mddev *mddev) atomic_set(&mddev->openers, 0); atomic_set(&mddev->active_io, 0); spin_lock_init(&mddev->lock); - atomic_set(&mddev->flush_pending, 0); init_waitqueue_head(&mddev->sb_wait); init_waitqueue_head(&mddev->recovery_wait); mddev->reshape_position = MaxSector; @@ -2844,7 +2864,8 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) err = 0; } } else if (cmd_match(buf, "re-add")) { - if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1)) { + if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1) && + rdev->saved_raid_disk >= 0) { /* clear_bit is performed _after_ all the devices * have their local Faulty bit cleared. If any writes * happen in the meantime in the local node, they @@ -5499,6 +5520,22 @@ int md_run(struct mddev *mddev) if (err) return err; } + if (mddev->flush_pool == NULL) { + mddev->flush_pool = mempool_create(NR_FLUSH_INFOS, flush_info_alloc, + flush_info_free, mddev); + if (!mddev->flush_pool) { + err = -ENOMEM; + goto abort; + } + } + if (mddev->flush_bio_pool == NULL) { + mddev->flush_bio_pool = mempool_create(NR_FLUSH_BIOS, flush_bio_alloc, + flush_bio_free, mddev); + if (!mddev->flush_bio_pool) { + err = -ENOMEM; + goto abort; + } + } spin_lock(&pers_lock); pers = find_pers(mddev->level, mddev->clevel); @@ -5654,6 +5691,18 @@ int md_run(struct mddev *mddev) sysfs_notify_dirent_safe(mddev->sysfs_action); sysfs_notify(&mddev->kobj, NULL, "degraded"); return 0; + +abort: + if (mddev->flush_bio_pool) { + mempool_destroy(mddev->flush_bio_pool); + mddev->flush_bio_pool = NULL; + } + if (mddev->flush_pool){ + mempool_destroy(mddev->flush_pool); + mddev->flush_pool = NULL; + } + + return err; } EXPORT_SYMBOL_GPL(md_run); @@ -5864,6 +5913,14 @@ void md_stop(struct mddev *mddev) * This is called from dm-raid */ __md_stop(mddev); + if (mddev->flush_bio_pool) { + mempool_destroy(mddev->flush_bio_pool); + mddev->flush_bio_pool = NULL; + } + if (mddev->flush_pool) { + mempool_destroy(mddev->flush_pool); + mddev->flush_pool = NULL; + } bioset_exit(&mddev->bio_set); bioset_exit(&mddev->sync_set); } @@ -6494,6 +6551,9 @@ static int hot_remove_disk(struct mddev *mddev, dev_t dev) char b[BDEVNAME_SIZE]; struct md_rdev *rdev; + if (!mddev->pers) + return -ENODEV; + rdev = find_rdev(mddev, dev); if (!rdev) return -ENXIO; @@ -8611,6 +8671,7 @@ static int remove_and_add_spares(struct mddev *mddev, if (mddev->pers->hot_remove_disk( mddev, rdev) == 0) { sysfs_unlink_rdev(mddev, rdev); + rdev->saved_raid_disk = rdev->raid_disk; rdev->raid_disk = -1; removed++; } |