diff options
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r-- | drivers/md/md.c | 254 |
1 files changed, 199 insertions, 55 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 4e7c9f398bc6..4824d50526fa 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -125,74 +125,165 @@ static inline int speed_max(struct mddev *mddev) mddev->sync_speed_max : sysctl_speed_limit_max; } -static int rdev_init_wb(struct md_rdev *rdev) +static void rdev_uninit_serial(struct md_rdev *rdev) { - if (rdev->bdev->bd_queue->nr_hw_queues == 1) + if (!test_and_clear_bit(CollisionCheck, &rdev->flags)) + return; + + kvfree(rdev->serial); + rdev->serial = NULL; +} + +static void rdevs_uninit_serial(struct mddev *mddev) +{ + struct md_rdev *rdev; + + rdev_for_each(rdev, mddev) + rdev_uninit_serial(rdev); +} + +static int rdev_init_serial(struct md_rdev *rdev) +{ + /* serial_nums equals with BARRIER_BUCKETS_NR */ + int i, serial_nums = 1 << ((PAGE_SHIFT - ilog2(sizeof(atomic_t)))); + struct serial_in_rdev *serial = NULL; + + if (test_bit(CollisionCheck, &rdev->flags)) return 0; - spin_lock_init(&rdev->wb_list_lock); - INIT_LIST_HEAD(&rdev->wb_list); - init_waitqueue_head(&rdev->wb_io_wait); - set_bit(WBCollisionCheck, &rdev->flags); + serial = kvmalloc(sizeof(struct serial_in_rdev) * serial_nums, + GFP_KERNEL); + if (!serial) + return -ENOMEM; - return 1; + for (i = 0; i < serial_nums; i++) { + struct serial_in_rdev *serial_tmp = &serial[i]; + + spin_lock_init(&serial_tmp->serial_lock); + serial_tmp->serial_rb = RB_ROOT_CACHED; + init_waitqueue_head(&serial_tmp->serial_io_wait); + } + + rdev->serial = serial; + set_bit(CollisionCheck, &rdev->flags); + + return 0; +} + +static int rdevs_init_serial(struct mddev *mddev) +{ + struct md_rdev *rdev; + int ret = 0; + + rdev_for_each(rdev, mddev) { + ret = rdev_init_serial(rdev); + if (ret) + break; + } + + /* Free all resources if pool is not existed */ + if (ret && !mddev->serial_info_pool) + rdevs_uninit_serial(mddev); + + return ret; } /* - * Create wb_info_pool if rdev is the first multi-queue device flaged - * with writemostly, also write-behind mode is enabled. + * rdev needs to enable serial stuffs if it meets the conditions: + * 1. it is multi-queue device flaged with writemostly. + * 2. the write-behind mode is enabled. */ -void mddev_create_wb_pool(struct mddev *mddev, struct md_rdev *rdev, - bool is_suspend) +static int rdev_need_serial(struct md_rdev *rdev) { - if (mddev->bitmap_info.max_write_behind == 0) - return; + return (rdev && rdev->mddev->bitmap_info.max_write_behind > 0 && + rdev->bdev->bd_queue->nr_hw_queues != 1 && + test_bit(WriteMostly, &rdev->flags)); +} + +/* + * Init resource for rdev(s), then create serial_info_pool if: + * 1. rdev is the first device which return true from rdev_enable_serial. + * 2. rdev is NULL, means we want to enable serialization for all rdevs. + */ +void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev, + bool is_suspend) +{ + int ret = 0; - if (!test_bit(WriteMostly, &rdev->flags) || !rdev_init_wb(rdev)) + if (rdev && !rdev_need_serial(rdev) && + !test_bit(CollisionCheck, &rdev->flags)) return; - if (mddev->wb_info_pool == NULL) { + if (!is_suspend) + mddev_suspend(mddev); + + if (!rdev) + ret = rdevs_init_serial(mddev); + else + ret = rdev_init_serial(rdev); + if (ret) + goto abort; + + if (mddev->serial_info_pool == NULL) { unsigned int noio_flag; - if (!is_suspend) - mddev_suspend(mddev); noio_flag = memalloc_noio_save(); - mddev->wb_info_pool = mempool_create_kmalloc_pool(NR_WB_INFOS, - sizeof(struct wb_info)); + mddev->serial_info_pool = + mempool_create_kmalloc_pool(NR_SERIAL_INFOS, + sizeof(struct serial_info)); memalloc_noio_restore(noio_flag); - if (!mddev->wb_info_pool) - pr_err("can't alloc memory pool for writemostly\n"); - if (!is_suspend) - mddev_resume(mddev); + if (!mddev->serial_info_pool) { + rdevs_uninit_serial(mddev); + pr_err("can't alloc memory pool for serialization\n"); + } } + +abort: + if (!is_suspend) + mddev_resume(mddev); } -EXPORT_SYMBOL_GPL(mddev_create_wb_pool); /* - * destroy wb_info_pool if rdev is the last device flaged with WBCollisionCheck. + * Free resource from rdev(s), and destroy serial_info_pool under conditions: + * 1. rdev is the last device flaged with CollisionCheck. + * 2. when bitmap is destroyed while policy is not enabled. + * 3. for disable policy, the pool is destroyed only when no rdev needs it. */ -static void mddev_destroy_wb_pool(struct mddev *mddev, struct md_rdev *rdev) +void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev, + bool is_suspend) { - if (!test_and_clear_bit(WBCollisionCheck, &rdev->flags)) + if (rdev && !test_bit(CollisionCheck, &rdev->flags)) return; - if (mddev->wb_info_pool) { + if (mddev->serial_info_pool) { struct md_rdev *temp; - int num = 0; + int num = 0; /* used to track if other rdevs need the pool */ - /* - * Check if other rdevs need wb_info_pool. - */ - rdev_for_each(temp, mddev) - if (temp != rdev && - test_bit(WBCollisionCheck, &temp->flags)) + if (!is_suspend) + mddev_suspend(mddev); + rdev_for_each(temp, mddev) { + if (!rdev) { + if (!mddev->serialize_policy || + !rdev_need_serial(temp)) + rdev_uninit_serial(temp); + else + num++; + } else if (temp != rdev && + test_bit(CollisionCheck, &temp->flags)) num++; - if (!num) { - mddev_suspend(rdev->mddev); - mempool_destroy(mddev->wb_info_pool); - mddev->wb_info_pool = NULL; - mddev_resume(rdev->mddev); } + + if (rdev) + rdev_uninit_serial(rdev); + + if (num) + pr_info("The mempool could be used by other devices\n"); + else { + mempool_destroy(mddev->serial_info_pool); + mddev->serial_info_pool = NULL; + } + if (!is_suspend) + mddev_resume(mddev); } } @@ -2337,7 +2428,7 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev) pr_debug("md: bind<%s>\n", b); if (mddev->raid_disks) - mddev_create_wb_pool(mddev, rdev, false); + mddev_create_serial_pool(mddev, rdev, false); if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b))) goto fail; @@ -2375,7 +2466,7 @@ static void unbind_rdev_from_array(struct md_rdev *rdev) bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk); list_del_rcu(&rdev->same_set); pr_debug("md: unbind<%s>\n", bdevname(rdev->bdev,b)); - mddev_destroy_wb_pool(rdev->mddev, rdev); + mddev_destroy_serial_pool(rdev->mddev, rdev, false); rdev->mddev = NULL; sysfs_remove_link(&rdev->kobj, "block"); sysfs_put(rdev->sysfs_state); @@ -2888,10 +2979,10 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) } } else if (cmd_match(buf, "writemostly")) { set_bit(WriteMostly, &rdev->flags); - mddev_create_wb_pool(rdev->mddev, rdev, false); + mddev_create_serial_pool(rdev->mddev, rdev, false); err = 0; } else if (cmd_match(buf, "-writemostly")) { - mddev_destroy_wb_pool(rdev->mddev, rdev); + mddev_destroy_serial_pool(rdev->mddev, rdev, false); clear_bit(WriteMostly, &rdev->flags); err = 0; } else if (cmd_match(buf, "blocked")) { @@ -5277,6 +5368,57 @@ static struct md_sysfs_entry md_fail_last_dev = __ATTR(fail_last_dev, S_IRUGO | S_IWUSR, fail_last_dev_show, fail_last_dev_store); +static ssize_t serialize_policy_show(struct mddev *mddev, char *page) +{ + if (mddev->pers == NULL || (mddev->pers->level != 1)) + return sprintf(page, "n/a\n"); + else + return sprintf(page, "%d\n", mddev->serialize_policy); +} + +/* + * Setting serialize_policy to true to enforce write IO is not reordered + * for raid1. + */ +static ssize_t +serialize_policy_store(struct mddev *mddev, const char *buf, size_t len) +{ + int err; + bool value; + + err = kstrtobool(buf, &value); + if (err) + return err; + + if (value == mddev->serialize_policy) + return len; + + err = mddev_lock(mddev); + if (err) + return err; + if (mddev->pers == NULL || (mddev->pers->level != 1)) { + pr_err("md: serialize_policy is only effective for raid1\n"); + err = -EINVAL; + goto unlock; + } + + mddev_suspend(mddev); + if (value) + mddev_create_serial_pool(mddev, NULL, true); + else + mddev_destroy_serial_pool(mddev, NULL, true); + mddev->serialize_policy = value; + mddev_resume(mddev); +unlock: + mddev_unlock(mddev); + return err ?: len; +} + +static struct md_sysfs_entry md_serialize_policy = +__ATTR(serialize_policy, S_IRUGO | S_IWUSR, serialize_policy_show, + serialize_policy_store); + + static struct attribute *md_default_attrs[] = { &md_level.attr, &md_layout.attr, @@ -5294,6 +5436,7 @@ static struct attribute *md_default_attrs[] = { &max_corr_read_errors.attr, &md_consistency_policy.attr, &md_fail_last_dev.attr, + &md_serialize_policy.attr, NULL, }; @@ -5769,18 +5912,18 @@ int md_run(struct mddev *mddev) goto bitmap_abort; if (mddev->bitmap_info.max_write_behind > 0) { - bool creat_pool = false; + bool create_pool = false; rdev_for_each(rdev, mddev) { if (test_bit(WriteMostly, &rdev->flags) && - rdev_init_wb(rdev)) - creat_pool = true; - } - if (creat_pool && mddev->wb_info_pool == NULL) { - mddev->wb_info_pool = - mempool_create_kmalloc_pool(NR_WB_INFOS, - sizeof(struct wb_info)); - if (!mddev->wb_info_pool) { + rdev_init_serial(rdev)) + create_pool = true; + } + if (create_pool && mddev->serial_info_pool == NULL) { + mddev->serial_info_pool = + mempool_create_kmalloc_pool(NR_SERIAL_INFOS, + sizeof(struct serial_info)); + if (!mddev->serial_info_pool) { err = -ENOMEM; goto bitmap_abort; } @@ -6025,8 +6168,9 @@ static void __md_stop_writes(struct mddev *mddev) mddev->in_sync = 1; md_update_sb(mddev, 1); } - mempool_destroy(mddev->wb_info_pool); - mddev->wb_info_pool = NULL; + /* disable policy to guarantee rdevs free resources for serialization */ + mddev->serialize_policy = 0; + mddev_destroy_serial_pool(mddev, NULL, true); } void md_stop_writes(struct mddev *mddev) |