diff options
author | Guoqing Jiang <gqjiang@suse.com> | 2016-08-12 08:42:42 +0300 |
---|---|---|
committer | Shaohua Li <shli@fb.com> | 2016-09-21 19:09:44 +0300 |
commit | 7bcda7149dd6911bee15a51b22477f592f8b9620 (patch) | |
tree | 4ccd61106770c829e87222e6b486f5dd6b6a0f22 /drivers/md/md-cluster.c | |
parent | fccb60a42cdd863aa80f32214ae58ae13936c927 (diff) | |
download | linux-7bcda7149dd6911bee15a51b22477f592f8b9620.tar.xz |
md-cluster: introduce dlm_lock_sync_interruptible to fix tasks hang
When some node leaves cluster, then it's bitmap need to be
synced by another node, so "md*_recover" thread is triggered
for the purpose. However, with below steps. we can find tasks
hang happened either in B or C.
1. Node A create a resyncing cluster raid1, assemble it in
other two nodes (B and C).
2. stop array in B and C.
3. stop array in A.
linux44:~ # ps aux|grep md|grep D
root 5938 0.0 0.1 19852 1964 pts/0 D+ 14:52 0:00 mdadm -S md0
root 5939 0.0 0.0 0 0 ? D 14:52 0:00 [md0_recover]
linux44:~ # cat /proc/5939/stack
[<ffffffffa04cf321>] dlm_lock_sync+0x71/0x90 [md_cluster]
[<ffffffffa04d0705>] recover_bitmaps+0x125/0x220 [md_cluster]
[<ffffffffa052105d>] md_thread+0x16d/0x180 [md_mod]
[<ffffffff8107ad94>] kthread+0xb4/0xc0
[<ffffffff8152a518>] ret_from_fork+0x58/0x90
linux44:~ # cat /proc/5938/stack
[<ffffffff8107afde>] kthread_stop+0x6e/0x120
[<ffffffffa0519da0>] md_unregister_thread+0x40/0x80 [md_mod]
[<ffffffffa04cfd20>] leave+0x70/0x120 [md_cluster]
[<ffffffffa0525e24>] md_cluster_stop+0x14/0x30 [md_mod]
[<ffffffffa05269ab>] bitmap_free+0x14b/0x150 [md_mod]
[<ffffffffa0523f3b>] do_md_stop+0x35b/0x5a0 [md_mod]
[<ffffffffa0524e83>] md_ioctl+0x873/0x1590 [md_mod]
[<ffffffff81288464>] blkdev_ioctl+0x214/0x7d0
[<ffffffff811dd3dd>] block_ioctl+0x3d/0x40
[<ffffffff811b92d4>] do_vfs_ioctl+0x2d4/0x4b0
[<ffffffff811b9538>] SyS_ioctl+0x88/0xa0
[<ffffffff8152a5c9>] system_call_fastpath+0x16/0x1b
The problem is caused by recover_bitmaps can't reliably abort
when the thread is unregistered. So dlm_lock_sync_interruptible
is introduced to detect the thread's situation to fix the problem.
Reviewed-by: NeilBrown <neilb@suse.com>
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
Diffstat (limited to 'drivers/md/md-cluster.c')
-rw-r--r-- | drivers/md/md-cluster.c | 40 |
1 files changed, 39 insertions, 1 deletions
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index c94715159b9b..43b90485448d 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -10,6 +10,7 @@ #include <linux/module.h> +#include <linux/kthread.h> #include <linux/dlm.h> #include <linux/sched.h> #include <linux/raid/md_p.h> @@ -144,6 +145,43 @@ static int dlm_unlock_sync(struct dlm_lock_resource *res) return dlm_lock_sync(res, DLM_LOCK_NL); } +/* + * An variation of dlm_lock_sync, which make lock request could + * be interrupted + */ +static int dlm_lock_sync_interruptible(struct dlm_lock_resource *res, int mode, + struct mddev *mddev) +{ + int ret = 0; + + ret = dlm_lock(res->ls, mode, &res->lksb, + res->flags, res->name, strlen(res->name), + 0, sync_ast, res, res->bast); + if (ret) + return ret; + + wait_event(res->sync_locking, res->sync_locking_done + || kthread_should_stop()); + if (!res->sync_locking_done) { + /* + * the convert queue contains the lock request when request is + * interrupted, and sync_ast could still be run, so need to + * cancel the request and reset completion + */ + ret = dlm_unlock(res->ls, res->lksb.sb_lkid, DLM_LKF_CANCEL, + &res->lksb, res); + res->sync_locking_done = false; + if (unlikely(ret != 0)) + pr_info("failed to cancel previous lock request " + "%s return %d\n", res->name, ret); + return -EPERM; + } else + res->sync_locking_done = false; + if (res->lksb.sb_status == 0) + res->mode = mode; + return res->lksb.sb_status; +} + static struct dlm_lock_resource *lockres_init(struct mddev *mddev, char *name, void (*bastfn)(void *arg, int mode), int with_lvb) { @@ -279,7 +317,7 @@ static void recover_bitmaps(struct md_thread *thread) goto clear_bit; } - ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW); + ret = dlm_lock_sync_interruptible(bm_lockres, DLM_LOCK_PW, mddev); if (ret) { pr_err("md-cluster: Could not DLM lock %s: %d\n", str, ret); |