From f381c6a4bd0ae0fde2d6340f1b9bb0f58d915de6 Mon Sep 17 00:00:00 2001 From: Andrea Parri Date: Mon, 20 May 2019 19:23:56 +0200 Subject: bio: fix improper use of smp_mb__before_atomic() This barrier only applies to the read-modify-write operations; in particular, it does not apply to the atomic_set() primitive. Replace the barrier with an smp_mb(). Fixes: dac56212e8127 ("bio: skip atomic inc/dec of ->bi_cnt for most use cases") Cc: stable@vger.kernel.org Reported-by: "Paul E. McKenney" Reported-by: Peter Zijlstra Signed-off-by: Andrea Parri Reviewed-by: Ming Lei Cc: Jens Axboe Cc: Ming Lei Cc: linux-block@vger.kernel.org Cc: "Paul E. McKenney" Cc: Peter Zijlstra Signed-off-by: Jens Axboe --- include/linux/bio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index ea73df36529a..0f23b5682640 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -210,7 +210,7 @@ static inline void bio_cnt_set(struct bio *bio, unsigned int count) { if (count != 1) { bio->bi_flags |= (1 << BIO_REFFED); - smp_mb__before_atomic(); + smp_mb(); } atomic_set(&bio->__bi_cnt, count); } -- cgit v1.2.3 From 6869875fbc04042ad01654591da60862706e86e3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 21 May 2019 09:01:43 +0200 Subject: block: remove the bi_seg_{front,back}_size fields in struct bio At this point these fields aren't used for anything, so we can remove them. Reviewed-by: Ming Lei Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-merge.c | 94 ++++++----------------------------------------- include/linux/blk_types.h | 7 ---- 2 files changed, 12 insertions(+), 89 deletions(-) (limited to 'include') diff --git a/block/blk-merge.c b/block/blk-merge.c index eee2c02c50ce..17713d7d98d5 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -162,8 +162,7 @@ static unsigned get_max_segment_size(struct request_queue *q, * variables. */ static bool bvec_split_segs(struct request_queue *q, struct bio_vec *bv, - unsigned *nsegs, unsigned *last_seg_size, - unsigned *front_seg_size, unsigned *sectors, unsigned max_segs) + unsigned *nsegs, unsigned *sectors, unsigned max_segs) { unsigned len = bv->bv_len; unsigned total_len = 0; @@ -185,28 +184,12 @@ static bool bvec_split_segs(struct request_queue *q, struct bio_vec *bv, break; } - if (!new_nsegs) - return !!len; - - /* update front segment size */ - if (!*nsegs) { - unsigned first_seg_size; - - if (new_nsegs == 1) - first_seg_size = get_max_segment_size(q, bv->bv_offset); - else - first_seg_size = queue_max_segment_size(q); - - if (*front_seg_size < first_seg_size) - *front_seg_size = first_seg_size; + if (new_nsegs) { + *nsegs += new_nsegs; + if (sectors) + *sectors += total_len >> 9; } - /* update other varibles */ - *last_seg_size = seg_size; - *nsegs += new_nsegs; - if (sectors) - *sectors += total_len >> 9; - /* split in the middle of the bvec if len != 0 */ return !!len; } @@ -218,8 +201,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, { struct bio_vec bv, bvprv, *bvprvp = NULL; struct bvec_iter iter; - unsigned seg_size = 0, nsegs = 0, sectors = 0; - unsigned front_seg_size = bio->bi_seg_front_size; + unsigned nsegs = 0, sectors = 0; bool do_split = true; struct bio *new = NULL; const unsigned max_sectors = get_max_io_size(q, bio); @@ -243,8 +225,6 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, /* split in the middle of bvec */ bv.bv_len = (max_sectors - sectors) << 9; bvec_split_segs(q, &bv, &nsegs, - &seg_size, - &front_seg_size, §ors, max_segs); } goto split; @@ -258,12 +238,9 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, if (bv.bv_offset + bv.bv_len <= PAGE_SIZE) { nsegs++; - seg_size = bv.bv_len; sectors += bv.bv_len >> 9; - if (nsegs == 1 && seg_size > front_seg_size) - front_seg_size = seg_size; - } else if (bvec_split_segs(q, &bv, &nsegs, &seg_size, - &front_seg_size, §ors, max_segs)) { + } else if (bvec_split_segs(q, &bv, &nsegs, §ors, + max_segs)) { goto split; } } @@ -278,10 +255,6 @@ split: bio = new; } - bio->bi_seg_front_size = front_seg_size; - if (seg_size > bio->bi_seg_back_size) - bio->bi_seg_back_size = seg_size; - return do_split ? new : NULL; } @@ -336,17 +309,13 @@ EXPORT_SYMBOL(blk_queue_split); static unsigned int __blk_recalc_rq_segments(struct request_queue *q, struct bio *bio) { - struct bio_vec uninitialized_var(bv), bvprv = { NULL }; - unsigned int seg_size, nr_phys_segs; - unsigned front_seg_size; - struct bio *fbio, *bbio; + unsigned int nr_phys_segs = 0; struct bvec_iter iter; + struct bio_vec bv; if (!bio) return 0; - front_seg_size = bio->bi_seg_front_size; - switch (bio_op(bio)) { case REQ_OP_DISCARD: case REQ_OP_SECURE_ERASE: @@ -356,23 +325,11 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q, return 1; } - fbio = bio; - seg_size = 0; - nr_phys_segs = 0; for_each_bio(bio) { - bio_for_each_bvec(bv, bio, iter) { - bvec_split_segs(q, &bv, &nr_phys_segs, &seg_size, - &front_seg_size, NULL, UINT_MAX); - } - bbio = bio; - if (likely(bio->bi_iter.bi_size)) - bvprv = bv; + bio_for_each_bvec(bv, bio, iter) + bvec_split_segs(q, &bv, &nr_phys_segs, NULL, UINT_MAX); } - fbio->bi_seg_front_size = front_seg_size; - if (seg_size > bbio->bi_seg_back_size) - bbio->bi_seg_back_size = seg_size; - return nr_phys_segs; } @@ -392,24 +349,6 @@ void blk_recount_segments(struct request_queue *q, struct bio *bio) bio_set_flag(bio, BIO_SEG_VALID); } -static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, - struct bio *nxt) -{ - struct bio_vec end_bv = { NULL }, nxt_bv; - - if (bio->bi_seg_back_size + nxt->bi_seg_front_size > - queue_max_segment_size(q)) - return 0; - - if (!bio_has_data(bio)) - return 1; - - bio_get_last_bvec(bio, &end_bv); - bio_get_first_bvec(nxt, &nxt_bv); - - return biovec_phys_mergeable(q, &end_bv, &nxt_bv); -} - static inline struct scatterlist *blk_next_sg(struct scatterlist **sg, struct scatterlist *sglist) { @@ -669,8 +608,6 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, struct request *next) { int total_phys_segments; - unsigned int seg_size = - req->biotail->bi_seg_back_size + next->bio->bi_seg_front_size; if (req_gap_back_merge(req, next->bio)) return 0; @@ -683,13 +620,6 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, return 0; total_phys_segments = req->nr_phys_segments + next->nr_phys_segments; - if (blk_phys_contig_segment(q, req->biotail, next->bio)) { - if (req->nr_phys_segments == 1) - req->bio->bi_seg_front_size = seg_size; - if (next->nr_phys_segments == 1) - next->biotail->bi_seg_back_size = seg_size; - } - if (total_phys_segments > queue_max_segments(q)) return 0; diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index be418275763c..95202f80676c 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -159,13 +159,6 @@ struct bio { */ unsigned int bi_phys_segments; - /* - * To keep track of the max segment size, we account for the - * sizes of the first and last mergeable segments in this bio. - */ - unsigned int bi_seg_front_size; - unsigned int bi_seg_back_size; - struct bvec_iter bi_iter; atomic_t __bi_remaining; -- cgit v1.2.3 From 7996a8b5511a72465b0b286763c2d8f412b8874a Mon Sep 17 00:00:00 2001 From: Bob Liu Date: Tue, 21 May 2019 11:25:55 +0800 Subject: blk-mq: fix hang caused by freeze/unfreeze sequence The following is a description of a hang in blk_mq_freeze_queue_wait(). The hang happens on attempt to freeze a queue while another task does queue unfreeze. The root cause is an incorrect sequence of percpu_ref_resurrect() and percpu_ref_kill() and as a result those two can be swapped: CPU#0 CPU#1 ---------------- ----------------- q1 = blk_mq_init_queue(shared_tags) q2 = blk_mq_init_queue(shared_tags): blk_mq_add_queue_tag_set(shared_tags): blk_mq_update_tag_set_depth(shared_tags): list_for_each_entry() blk_mq_freeze_queue(q1) > percpu_ref_kill() > blk_mq_freeze_queue_wait() blk_cleanup_queue(q1) blk_mq_freeze_queue(q1) > percpu_ref_kill() ^^^^^^ freeze_depth can't guarantee the order blk_mq_unfreeze_queue() > percpu_ref_resurrect() > blk_mq_freeze_queue_wait() ^^^^^^ Hang here!!!! This wrong sequence raises kernel warning: percpu_ref_kill_and_confirm called more than once on blk_queue_usage_counter_release! WARNING: CPU: 0 PID: 11854 at lib/percpu-refcount.c:336 percpu_ref_kill_and_confirm+0x99/0xb0 But the most unpleasant effect is a hang of a blk_mq_freeze_queue_wait(), which waits for a zero of a q_usage_counter, which never happens because percpu-ref was reinited (instead of being killed) and stays in PERCPU state forever. How to reproduce: - "insmod null_blk.ko shared_tags=1 nr_devices=0 queue_mode=2" - cpu0: python Script.py 0; taskset the corresponding process running on cpu0 - cpu1: python Script.py 1; taskset the corresponding process running on cpu1 Script.py: ------ #!/usr/bin/python3 import os import sys while True: on = "echo 1 > /sys/kernel/config/nullb/%s/power" % sys.argv[1] off = "echo 0 > /sys/kernel/config/nullb/%s/power" % sys.argv[1] os.system(on) os.system(off) ------ This bug was first reported and fixed by Roman, previous discussion: [1] Message id: 1443287365-4244-7-git-send-email-akinobu.mita@gmail.com [2] Message id: 1443563240-29306-6-git-send-email-tj@kernel.org [3] https://patchwork.kernel.org/patch/9268199/ Reviewed-by: Hannes Reinecke Reviewed-by: Ming Lei Reviewed-by: Bart Van Assche Reviewed-by: Christoph Hellwig Signed-off-by: Roman Pen Signed-off-by: Bob Liu Signed-off-by: Jens Axboe --- block/blk-core.c | 3 ++- block/blk-mq.c | 19 ++++++++++--------- include/linux/blkdev.h | 7 ++++++- 3 files changed, 18 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index 419d600e6637..1bf83a0df0f6 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -413,7 +413,7 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) smp_rmb(); wait_event(q->mq_freeze_wq, - (atomic_read(&q->mq_freeze_depth) == 0 && + (!q->mq_freeze_depth && (pm || (blk_pm_request_resume(q), !blk_queue_pm_only(q)))) || blk_queue_dying(q)); @@ -503,6 +503,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) spin_lock_init(&q->queue_lock); init_waitqueue_head(&q->mq_freeze_wq); + mutex_init(&q->mq_freeze_lock); /* * Init percpu_ref in atomic mode so that it's faster to shutdown. diff --git a/block/blk-mq.c b/block/blk-mq.c index 08a6248d8536..32b8ad3d341b 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -144,13 +144,14 @@ void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part, void blk_freeze_queue_start(struct request_queue *q) { - int freeze_depth; - - freeze_depth = atomic_inc_return(&q->mq_freeze_depth); - if (freeze_depth == 1) { + mutex_lock(&q->mq_freeze_lock); + if (++q->mq_freeze_depth == 1) { percpu_ref_kill(&q->q_usage_counter); + mutex_unlock(&q->mq_freeze_lock); if (queue_is_mq(q)) blk_mq_run_hw_queues(q, false); + } else { + mutex_unlock(&q->mq_freeze_lock); } } EXPORT_SYMBOL_GPL(blk_freeze_queue_start); @@ -199,14 +200,14 @@ EXPORT_SYMBOL_GPL(blk_mq_freeze_queue); void blk_mq_unfreeze_queue(struct request_queue *q) { - int freeze_depth; - - freeze_depth = atomic_dec_return(&q->mq_freeze_depth); - WARN_ON_ONCE(freeze_depth < 0); - if (!freeze_depth) { + mutex_lock(&q->mq_freeze_lock); + q->mq_freeze_depth--; + WARN_ON_ONCE(q->mq_freeze_depth < 0); + if (!q->mq_freeze_depth) { percpu_ref_resurrect(&q->q_usage_counter); wake_up_all(&q->mq_freeze_wq); } + mutex_unlock(&q->mq_freeze_lock); } EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1aafeb923e7b..592669bcc536 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -542,7 +542,7 @@ struct request_queue { struct list_head unused_hctx_list; spinlock_t unused_hctx_lock; - atomic_t mq_freeze_depth; + int mq_freeze_depth; #if defined(CONFIG_BLK_DEV_BSG) struct bsg_class_device bsg_dev; @@ -554,6 +554,11 @@ struct request_queue { #endif struct rcu_head rcu_head; wait_queue_head_t mq_freeze_wq; + /* + * Protect concurrent access to q_usage_counter by + * percpu_ref_kill() and percpu_ref_reinit(). + */ + struct mutex mq_freeze_lock; struct percpu_ref q_usage_counter; struct blk_mq_tag_set *tag_set; -- cgit v1.2.3