diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-10-07 19:45:43 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-10-07 19:45:43 +0300 |
commit | c23112e0395a89c8a52cd955442240de7fba46aa (patch) | |
tree | b9a8876cfaf5d86fc7a04be1560b2503d2a71969 /drivers | |
parent | 4dfddf503670d8def0fddb497e628130fc4522a8 (diff) | |
parent | bb086a89a406b5d877ee616f1490fcc81f8e1b2b (diff) | |
download | linux-c23112e0395a89c8a52cd955442240de7fba46aa.tar.xz |
Merge tag 'md/4.9-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md
Pull MD updates from Shaohua Li:
"This update includes:
- new AVX512 instruction based raid6 gen/recovery algorithm
- a couple of md-cluster related bug fixes
- fix a potential deadlock
- set nonrotational bit for raid array with SSD
- set correct max_hw_sectors for raid5/6, which hopefuly can improve
performance a little bit
- other minor fixes"
* tag 'md/4.9-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md:
md: set rotational bit
raid6/test/test.c: bug fix: Specify aligned(alignment) attributes to the char arrays
raid5: handle register_shrinker failure
raid5: fix to detect failure of register_shrinker
md: fix a potential deadlock
md/bitmap: fix wrong cleanup
raid5: allow arbitrary max_hw_sectors
lib/raid6: Add AVX512 optimized xor_syndrome functions
lib/raid6/test/Makefile: Add avx512 gen_syndrome and recovery functions
lib/raid6: Add AVX512 optimized recovery functions
lib/raid6: Add AVX512 optimized gen_syndrome functions
md-cluster: make resync lock also could be interruptted
md-cluster: introduce dlm_lock_sync_interruptible to fix tasks hang
md-cluster: convert the completion to wait queue
md-cluster: protect md_find_rdev_nr_rcu with rcu lock
md-cluster: clean related infos of cluster
md: changes for MD_STILL_CLOSED flag
md-cluster: remove some unnecessary dlm_unlock_sync
md-cluster: use FORCEUNLOCK in lockres_free
md-cluster: call md_kick_rdev_from_array once ack failed
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/md/bitmap.c | 4 | ||||
-rw-r--r-- | drivers/md/md-cluster.c | 99 | ||||
-rw-r--r-- | drivers/md/md.c | 44 | ||||
-rw-r--r-- | drivers/md/md.h | 5 | ||||
-rw-r--r-- | drivers/md/raid5.c | 11 |
5 files changed, 116 insertions, 47 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 13041ee37ad6..2d826927a3bf 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -1903,10 +1903,8 @@ int bitmap_copy_from_slot(struct mddev *mddev, int slot, struct bitmap_counts *counts; struct bitmap *bitmap = bitmap_create(mddev, slot); - if (IS_ERR(bitmap)) { - bitmap_free(bitmap); + if (IS_ERR(bitmap)) return PTR_ERR(bitmap); - } rv = bitmap_init_from_disk(bitmap, 0); if (rv) diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 34a840d9df76..2b13117fb918 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -10,6 +10,7 @@ #include <linux/module.h> +#include <linux/kthread.h> #include <linux/dlm.h> #include <linux/sched.h> #include <linux/raid/md_p.h> @@ -25,7 +26,8 @@ struct dlm_lock_resource { struct dlm_lksb lksb; char *name; /* lock name. */ uint32_t flags; /* flags to pass to dlm_lock() */ - struct completion completion; /* completion for synchronized locking */ + wait_queue_head_t sync_locking; /* wait queue for synchronized locking */ + bool sync_locking_done; void (*bast)(void *arg, int mode); /* blocking AST function pointer*/ struct mddev *mddev; /* pointing back to mddev. */ int mode; @@ -118,7 +120,8 @@ static void sync_ast(void *arg) struct dlm_lock_resource *res; res = arg; - complete(&res->completion); + res->sync_locking_done = true; + wake_up(&res->sync_locking); } static int dlm_lock_sync(struct dlm_lock_resource *res, int mode) @@ -130,7 +133,8 @@ static int dlm_lock_sync(struct dlm_lock_resource *res, int mode) 0, sync_ast, res, res->bast); if (ret) return ret; - wait_for_completion(&res->completion); + wait_event(res->sync_locking, res->sync_locking_done); + res->sync_locking_done = false; if (res->lksb.sb_status == 0) res->mode = mode; return res->lksb.sb_status; @@ -141,6 +145,44 @@ static int dlm_unlock_sync(struct dlm_lock_resource *res) return dlm_lock_sync(res, DLM_LOCK_NL); } +/* + * An variation of dlm_lock_sync, which make lock request could + * be interrupted + */ +static int dlm_lock_sync_interruptible(struct dlm_lock_resource *res, int mode, + struct mddev *mddev) +{ + int ret = 0; + + ret = dlm_lock(res->ls, mode, &res->lksb, + res->flags, res->name, strlen(res->name), + 0, sync_ast, res, res->bast); + if (ret) + return ret; + + wait_event(res->sync_locking, res->sync_locking_done + || kthread_should_stop() + || test_bit(MD_CLOSING, &mddev->flags)); + if (!res->sync_locking_done) { + /* + * the convert queue contains the lock request when request is + * interrupted, and sync_ast could still be run, so need to + * cancel the request and reset completion + */ + ret = dlm_unlock(res->ls, res->lksb.sb_lkid, DLM_LKF_CANCEL, + &res->lksb, res); + res->sync_locking_done = false; + if (unlikely(ret != 0)) + pr_info("failed to cancel previous lock request " + "%s return %d\n", res->name, ret); + return -EPERM; + } else + res->sync_locking_done = false; + if (res->lksb.sb_status == 0) + res->mode = mode; + return res->lksb.sb_status; +} + static struct dlm_lock_resource *lockres_init(struct mddev *mddev, char *name, void (*bastfn)(void *arg, int mode), int with_lvb) { @@ -151,7 +193,8 @@ static struct dlm_lock_resource *lockres_init(struct mddev *mddev, res = kzalloc(sizeof(struct dlm_lock_resource), GFP_KERNEL); if (!res) return NULL; - init_completion(&res->completion); + init_waitqueue_head(&res->sync_locking); + res->sync_locking_done = false; res->ls = cinfo->lockspace; res->mddev = mddev; res->mode = DLM_LOCK_IV; @@ -194,25 +237,21 @@ out_err: static void lockres_free(struct dlm_lock_resource *res) { - int ret; + int ret = 0; if (!res) return; - /* cancel a lock request or a conversion request that is blocked */ - res->flags |= DLM_LKF_CANCEL; -retry: - ret = dlm_unlock(res->ls, res->lksb.sb_lkid, 0, &res->lksb, res); - if (unlikely(ret != 0)) { - pr_info("%s: failed to unlock %s return %d\n", __func__, res->name, ret); - - /* if a lock conversion is cancelled, then the lock is put - * back to grant queue, need to ensure it is unlocked */ - if (ret == -DLM_ECANCEL) - goto retry; - } - res->flags &= ~DLM_LKF_CANCEL; - wait_for_completion(&res->completion); + /* + * use FORCEUNLOCK flag, so we can unlock even the lock is on the + * waiting or convert queue + */ + ret = dlm_unlock(res->ls, res->lksb.sb_lkid, DLM_LKF_FORCEUNLOCK, + &res->lksb, res); + if (unlikely(ret != 0)) + pr_err("failed to unlock %s return %d\n", res->name, ret); + else + wait_event(res->sync_locking, res->sync_locking_done); kfree(res->name); kfree(res->lksb.sb_lvbptr); @@ -279,7 +318,7 @@ static void recover_bitmaps(struct md_thread *thread) goto clear_bit; } - ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW); + ret = dlm_lock_sync_interruptible(bm_lockres, DLM_LOCK_PW, mddev); if (ret) { pr_err("md-cluster: Could not DLM lock %s: %d\n", str, ret); @@ -288,7 +327,7 @@ static void recover_bitmaps(struct md_thread *thread) ret = bitmap_copy_from_slot(mddev, slot, &lo, &hi, true); if (ret) { pr_err("md-cluster: Could not copy data from bitmap %d\n", slot); - goto dlm_unlock; + goto clear_bit; } if (hi > 0) { if (lo < mddev->recovery_cp) @@ -300,8 +339,6 @@ static void recover_bitmaps(struct md_thread *thread) md_wakeup_thread(mddev->thread); } } -dlm_unlock: - dlm_unlock_sync(bm_lockres); clear_bit: lockres_free(bm_lockres); clear_bit(slot, &cinfo->recovery_map); @@ -495,9 +532,10 @@ static void process_metadata_update(struct mddev *mddev, struct cluster_msg *msg static void process_remove_disk(struct mddev *mddev, struct cluster_msg *msg) { - struct md_rdev *rdev = md_find_rdev_nr_rcu(mddev, - le32_to_cpu(msg->raid_slot)); + struct md_rdev *rdev; + rcu_read_lock(); + rdev = md_find_rdev_nr_rcu(mddev, le32_to_cpu(msg->raid_slot)); if (rdev) { set_bit(ClusterRemove, &rdev->flags); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); @@ -506,18 +544,21 @@ static void process_remove_disk(struct mddev *mddev, struct cluster_msg *msg) else pr_warn("%s: %d Could not find disk(%d) to REMOVE\n", __func__, __LINE__, le32_to_cpu(msg->raid_slot)); + rcu_read_unlock(); } static void process_readd_disk(struct mddev *mddev, struct cluster_msg *msg) { - struct md_rdev *rdev = md_find_rdev_nr_rcu(mddev, - le32_to_cpu(msg->raid_slot)); + struct md_rdev *rdev; + rcu_read_lock(); + rdev = md_find_rdev_nr_rcu(mddev, le32_to_cpu(msg->raid_slot)); if (rdev && test_bit(Faulty, &rdev->flags)) clear_bit(Faulty, &rdev->flags); else pr_warn("%s: %d Could not find disk(%d) which is faulty", __func__, __LINE__, le32_to_cpu(msg->raid_slot)); + rcu_read_unlock(); } static int process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg) @@ -770,7 +811,6 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots) md_check_recovery(mddev); } - dlm_unlock_sync(bm_lockres); lockres_free(bm_lockres); } out: @@ -1006,7 +1046,7 @@ static void metadata_update_cancel(struct mddev *mddev) static int resync_start(struct mddev *mddev) { struct md_cluster_info *cinfo = mddev->cluster_info; - return dlm_lock_sync(cinfo->resync_lockres, DLM_LOCK_EX); + return dlm_lock_sync_interruptible(cinfo->resync_lockres, DLM_LOCK_EX, mddev); } static int resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi) @@ -1186,7 +1226,6 @@ static void unlock_all_bitmaps(struct mddev *mddev) if (cinfo->other_bitmap_lockres) { for (i = 0; i < mddev->bitmap_info.nodes - 1; i++) { if (cinfo->other_bitmap_lockres[i]) { - dlm_unlock_sync(cinfo->other_bitmap_lockres[i]); lockres_free(cinfo->other_bitmap_lockres[i]); } } diff --git a/drivers/md/md.c b/drivers/md/md.c index 915e84d631a2..eac84d8ff724 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5297,6 +5297,21 @@ int md_run(struct mddev *mddev) return err; } if (mddev->queue) { + bool nonrot = true; + + rdev_for_each(rdev, mddev) { + if (rdev->raid_disk >= 0 && + !blk_queue_nonrot(bdev_get_queue(rdev->bdev))) { + nonrot = false; + break; + } + } + if (mddev->degraded) + nonrot = false; + if (nonrot) + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mddev->queue); + else + queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, mddev->queue); mddev->queue->backing_dev_info.congested_data = mddev; mddev->queue->backing_dev_info.congested_fn = md_congested; } @@ -5454,12 +5469,14 @@ static void md_clean(struct mddev *mddev) mddev->degraded = 0; mddev->safemode = 0; mddev->private = NULL; + mddev->cluster_info = NULL; mddev->bitmap_info.offset = 0; mddev->bitmap_info.default_offset = 0; mddev->bitmap_info.default_space = 0; mddev->bitmap_info.chunksize = 0; mddev->bitmap_info.daemon_sleep = 0; mddev->bitmap_info.max_write_behind = 0; + mddev->bitmap_info.nodes = 0; } static void __md_stop_writes(struct mddev *mddev) @@ -5573,8 +5590,7 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev) mutex_lock(&mddev->open_mutex); if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) || mddev->sync_thread || - test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || - (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags))) { + test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) { printk("md: %s still in use.\n",mdname(mddev)); if (did_freeze) { clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); @@ -5636,8 +5652,7 @@ static int do_md_stop(struct mddev *mddev, int mode, if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) || mddev->sysfs_active || mddev->sync_thread || - test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || - (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags))) { + test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) { printk("md: %s still in use.\n",mdname(mddev)); mutex_unlock(&mddev->open_mutex); if (did_freeze) { @@ -6101,9 +6116,14 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info) export_rdev(rdev); if (mddev_is_clustered(mddev)) { - if (info->state & (1 << MD_DISK_CANDIDATE)) - md_cluster_ops->new_disk_ack(mddev, (err == 0)); - else { + if (info->state & (1 << MD_DISK_CANDIDATE)) { + if (!err) { + err = md_cluster_ops->new_disk_ack(mddev, + err == 0); + if (err) + md_kick_rdev_from_array(rdev); + } + } else { if (err) md_cluster_ops->add_new_disk_cancel(mddev); else @@ -6821,7 +6841,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, err = -EBUSY; goto out; } - set_bit(MD_STILL_CLOSED, &mddev->flags); + set_bit(MD_CLOSING, &mddev->flags); mutex_unlock(&mddev->open_mutex); sync_blockdev(bdev); } @@ -7070,9 +7090,13 @@ static int md_open(struct block_device *bdev, fmode_t mode) if ((err = mutex_lock_interruptible(&mddev->open_mutex))) goto out; + if (test_bit(MD_CLOSING, &mddev->flags)) { + mutex_unlock(&mddev->open_mutex); + return -ENODEV; + } + err = 0; atomic_inc(&mddev->openers); - clear_bit(MD_STILL_CLOSED, &mddev->flags); mutex_unlock(&mddev->open_mutex); check_disk_change(bdev); @@ -8873,7 +8897,9 @@ static void autostart_arrays(int part) list_del(&node_detected_dev->list); dev = node_detected_dev->dev; kfree(node_detected_dev); + mutex_unlock(&detected_devices_mutex); rdev = md_import_device(dev,0, 90); + mutex_lock(&detected_devices_mutex); if (IS_ERR(rdev)) continue; diff --git a/drivers/md/md.h b/drivers/md/md.h index 20c667579ede..2b2041773e79 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -201,9 +201,8 @@ struct mddev { #define MD_CHANGE_PENDING 2 /* switch from 'clean' to 'active' in progress */ #define MD_UPDATE_SB_FLAGS (1 | 2 | 4) /* If these are set, md_update_sb needed */ #define MD_ARRAY_FIRST_USE 3 /* First use of array, needs initialization */ -#define MD_STILL_CLOSED 4 /* If set, then array has not been opened since - * md_ioctl checked on it. - */ +#define MD_CLOSING 4 /* If set, we are closing the array, do not open + * it then */ #define MD_JOURNAL_CLEAN 5 /* A raid with journal is already clean */ #define MD_HAS_JOURNAL 6 /* The raid array has journal feature set */ #define MD_RELOAD_SB 7 /* Reload the superblock because another node diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 5287e79e0b78..92ac251e91e6 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -6370,7 +6370,7 @@ static void free_conf(struct r5conf *conf) { if (conf->log) r5l_exit_log(conf->log); - if (conf->shrinker.seeks) + if (conf->shrinker.nr_deferred) unregister_shrinker(&conf->shrinker); free_thread_groups(conf); @@ -6632,7 +6632,12 @@ static struct r5conf *setup_conf(struct mddev *mddev) conf->shrinker.count_objects = raid5_cache_count; conf->shrinker.batch = 128; conf->shrinker.flags = 0; - register_shrinker(&conf->shrinker); + if (register_shrinker(&conf->shrinker)) { + printk(KERN_ERR + "md/raid:%s: couldn't register shrinker.\n", + mdname(mddev)); + goto abort; + } sprintf(pers_name, "raid%d", mddev->new_level); conf->thread = md_register_thread(raid5d, mddev, pers_name); @@ -7028,6 +7033,8 @@ static int raid5_run(struct mddev *mddev) else queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); + + blk_queue_max_hw_sectors(mddev->queue, UINT_MAX); } if (journal_dev) { |