diff options
-rw-r--r-- | drivers/md/md.c | 41 | ||||
-rw-r--r-- | drivers/md/md.h | 21 | ||||
-rw-r--r-- | drivers/md/raid1.c | 68 |
3 files changed, 129 insertions, 1 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 1f37a1adc926..9a9762c83cc8 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -124,6 +124,19 @@ static inline int speed_max(struct mddev *mddev) mddev->sync_speed_max : sysctl_speed_limit_max; } +static int rdev_init_wb(struct md_rdev *rdev) +{ + if (rdev->bdev->bd_queue->nr_hw_queues == 1) + return 0; + + spin_lock_init(&rdev->wb_list_lock); + INIT_LIST_HEAD(&rdev->wb_list); + init_waitqueue_head(&rdev->wb_io_wait); + set_bit(WBCollisionCheck, &rdev->flags); + + return 1; +} + static struct ctl_table_header *raid_table_header; static struct ctl_table raid_table[] = { @@ -5597,6 +5610,32 @@ int md_run(struct mddev *mddev) md_bitmap_destroy(mddev); goto abort; } + + if (mddev->bitmap_info.max_write_behind > 0) { + bool creat_pool = false; + + rdev_for_each(rdev, mddev) { + if (test_bit(WriteMostly, &rdev->flags) && + rdev_init_wb(rdev)) + creat_pool = true; + } + if (creat_pool && mddev->wb_info_pool == NULL) { + mddev->wb_info_pool = + mempool_create_kmalloc_pool(NR_WB_INFOS, + sizeof(struct wb_info)); + if (!mddev->wb_info_pool) { + err = -ENOMEM; + mddev_detach(mddev); + if (mddev->private) + pers->free(mddev, mddev->private); + mddev->private = NULL; + module_put(pers->owner); + md_bitmap_destroy(mddev); + goto abort; + } + } + } + if (mddev->queue) { bool nonrot = true; @@ -5825,6 +5864,8 @@ static void __md_stop_writes(struct mddev *mddev) mddev->in_sync = 1; md_update_sb(mddev, 1); } + mempool_destroy(mddev->wb_info_pool); + mddev->wb_info_pool = NULL; } void md_stop_writes(struct mddev *mddev) diff --git a/drivers/md/md.h b/drivers/md/md.h index 7c930c091193..d449d514cff9 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -109,6 +109,14 @@ struct md_rdev { * for reporting to userspace and storing * in superblock. */ + + /* + * The members for check collision of write behind IOs. + */ + struct list_head wb_list; + spinlock_t wb_list_lock; + wait_queue_head_t wb_io_wait; + struct work_struct del_work; /* used for delayed sysfs removal */ struct kernfs_node *sysfs_state; /* handle for 'state' @@ -193,6 +201,10 @@ enum flag_bits { * it didn't fail, so don't use FailFast * any more for metadata */ + WBCollisionCheck, /* + * multiqueue device should check if there + * is collision between write behind bios. + */ }; static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors, @@ -245,6 +257,14 @@ enum mddev_sb_flags { MD_SB_NEED_REWRITE, /* metadata write needs to be repeated */ }; +#define NR_WB_INFOS 8 +/* record current range of write behind IOs */ +struct wb_info { + sector_t lo; + sector_t hi; + struct list_head list; +}; + struct mddev { void *private; struct md_personality *pers; @@ -461,6 +481,7 @@ struct mddev { */ struct work_struct flush_work; struct work_struct event_work; /* used by dm to report failure event */ + mempool_t *wb_info_pool; void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev); struct md_cluster_info *cluster_info; unsigned int good_device_nr; /* good device num within cluster raid */ diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index a7860b5f33f2..3d44da663797 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -50,6 +50,57 @@ static void lower_barrier(struct r1conf *conf, sector_t sector_nr); #include "raid1-10.c" +static int check_and_add_wb(struct md_rdev *rdev, sector_t lo, sector_t hi) +{ + struct wb_info *wi, *temp_wi; + unsigned long flags; + int ret = 0; + struct mddev *mddev = rdev->mddev; + + wi = mempool_alloc(mddev->wb_info_pool, GFP_NOIO); + + spin_lock_irqsave(&rdev->wb_list_lock, flags); + list_for_each_entry(temp_wi, &rdev->wb_list, list) { + /* collision happened */ + if (hi > temp_wi->lo && lo < temp_wi->hi) { + ret = -EBUSY; + break; + } + } + + if (!ret) { + wi->lo = lo; + wi->hi = hi; + list_add(&wi->list, &rdev->wb_list); + } else + mempool_free(wi, mddev->wb_info_pool); + spin_unlock_irqrestore(&rdev->wb_list_lock, flags); + + return ret; +} + +static void remove_wb(struct md_rdev *rdev, sector_t lo, sector_t hi) +{ + struct wb_info *wi; + unsigned long flags; + int found = 0; + struct mddev *mddev = rdev->mddev; + + spin_lock_irqsave(&rdev->wb_list_lock, flags); + list_for_each_entry(wi, &rdev->wb_list, list) + if (hi == wi->hi && lo == wi->lo) { + list_del(&wi->list); + mempool_free(wi, mddev->wb_info_pool); + found = 1; + break; + } + + if (!found) + WARN_ON("The write behind IO is not recorded\n"); + spin_unlock_irqrestore(&rdev->wb_list_lock, flags); + wake_up(&rdev->wb_io_wait); +} + /* * for resync bio, r1bio pointer can be retrieved from the per-bio * 'struct resync_pages'. @@ -446,6 +497,12 @@ static void raid1_end_write_request(struct bio *bio) } if (behind) { + if (test_bit(WBCollisionCheck, &rdev->flags)) { + sector_t lo = r1_bio->sector; + sector_t hi = r1_bio->sector + r1_bio->sectors; + + remove_wb(rdev, lo, hi); + } if (test_bit(WriteMostly, &rdev->flags)) atomic_dec(&r1_bio->behind_remaining); @@ -1443,7 +1500,16 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set); if (r1_bio->behind_master_bio) { - if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags)) + struct md_rdev *rdev = conf->mirrors[i].rdev; + + if (test_bit(WBCollisionCheck, &rdev->flags)) { + sector_t lo = r1_bio->sector; + sector_t hi = r1_bio->sector + r1_bio->sectors; + + wait_event(rdev->wb_io_wait, + check_and_add_wb(rdev, lo, hi) == 0); + } + if (test_bit(WriteMostly, &rdev->flags)) atomic_inc(&r1_bio->behind_remaining); } |