From 3e148a3209792e04f63ec99701235c960765fc9a Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Wed, 19 Jun 2019 17:30:46 +0800 Subject: md/raid1: fix potential data inconsistency issue with write behind device For write-behind mode, we think write IO is complete once it has reached all the non-writemostly devices. It works fine for single queue devices. But for multiqueue device, if there are lots of IOs come from upper layer, then the write-behind device could issue those IOs to different queues, depends on the each queue's delay, so there is no guarantee that those IOs can arrive in order. To address the issue, we need to check the collision among write behind IOs, we can only continue without collision, otherwise wait for the completion of previous collisioned IO. And WBCollision is introduced for multiqueue device which is worked under write-behind mode. But this patch doesn't handle below cases which could have the data inconsistency issue as well, these cases will be handled in later patches. 1. modify max_write_behind by write backlog node. 2. add or remove array's bitmap dynamically. 3. the change of member disk. Reviewed-by: NeilBrown Signed-off-by: Guoqing Jiang Signed-off-by: Song Liu --- drivers/md/md.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 1f37a1adc926..9a9762c83cc8 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -124,6 +124,19 @@ static inline int speed_max(struct mddev *mddev) mddev->sync_speed_max : sysctl_speed_limit_max; } +static int rdev_init_wb(struct md_rdev *rdev) +{ + if (rdev->bdev->bd_queue->nr_hw_queues == 1) + return 0; + + spin_lock_init(&rdev->wb_list_lock); + INIT_LIST_HEAD(&rdev->wb_list); + init_waitqueue_head(&rdev->wb_io_wait); + set_bit(WBCollisionCheck, &rdev->flags); + + return 1; +} + static struct ctl_table_header *raid_table_header; static struct ctl_table raid_table[] = { @@ -5597,6 +5610,32 @@ int md_run(struct mddev *mddev) md_bitmap_destroy(mddev); goto abort; } + + if (mddev->bitmap_info.max_write_behind > 0) { + bool creat_pool = false; + + rdev_for_each(rdev, mddev) { + if (test_bit(WriteMostly, &rdev->flags) && + rdev_init_wb(rdev)) + creat_pool = true; + } + if (creat_pool && mddev->wb_info_pool == NULL) { + mddev->wb_info_pool = + mempool_create_kmalloc_pool(NR_WB_INFOS, + sizeof(struct wb_info)); + if (!mddev->wb_info_pool) { + err = -ENOMEM; + mddev_detach(mddev); + if (mddev->private) + pers->free(mddev, mddev->private); + mddev->private = NULL; + module_put(pers->owner); + md_bitmap_destroy(mddev); + goto abort; + } + } + } + if (mddev->queue) { bool nonrot = true; @@ -5825,6 +5864,8 @@ static void __md_stop_writes(struct mddev *mddev) mddev->in_sync = 1; md_update_sb(mddev, 1); } + mempool_destroy(mddev->wb_info_pool); + mddev->wb_info_pool = NULL; } void md_stop_writes(struct mddev *mddev) -- cgit v1.2.3