summaryrefslogtreecommitdiff
path: root/block/blk-mq.c
diff options
context:
space:
mode:
authorMing Lei <ming.lei@redhat.com>2024-10-25 03:37:20 +0300
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2024-12-05 16:03:05 +0300
commita6fc2ba1c7e57cc4ef5a36f8fe56d810fd7b1a65 (patch)
treea6758f82d8686a48930f537e9212a20f0adf4060 /block/blk-mq.c
parent9c3d53f113426d2f0f49ca13afabca13f5b1a5af (diff)
downloadlinux-a6fc2ba1c7e57cc4ef5a36f8fe56d810fd7b1a65.tar.xz
block: model freeze & enter queue as lock for supporting lockdep
[ Upstream commit f1be1788a32e8fa63416ad4518bbd1a85a825c9d ] Recently we got several deadlock report[1][2][3] caused by blk_mq_freeze_queue and blk_enter_queue(). Turns out the two are just like acquiring read/write lock, so model them as read/write lock for supporting lockdep: 1) model q->q_usage_counter as two locks(io and queue lock) - queue lock covers sync with blk_enter_queue() - io lock covers sync with bio_enter_queue() 2) make the lockdep class/key as per-queue: - different subsystem has very different lock use pattern, shared lock class causes false positive easily - freeze_queue degrades to no lock in case that disk state becomes DEAD because bio_enter_queue() won't be blocked any more - freeze_queue degrades to no lock in case that request queue becomes dying because blk_enter_queue() won't be blocked any more 3) model blk_mq_freeze_queue() as acquire_exclusive & try_lock - it is exclusive lock, so dependency with blk_enter_queue() is covered - it is trylock because blk_mq_freeze_queue() are allowed to run concurrently 4) model blk_enter_queue() & bio_enter_queue() as acquire_read() - nested blk_enter_queue() are allowed - dependency with blk_mq_freeze_queue() is covered - blk_queue_exit() is often called from other contexts(such as irq), and it can't be annotated as lock_release(), so simply do it in blk_enter_queue(), this way still covered cases as many as possible With lockdep support, such kind of reports may be reported asap and needn't wait until the real deadlock is triggered. For example, lockdep report can be triggered in the report[3] with this patch applied. [1] occasional block layer hang when setting 'echo noop > /sys/block/sda/queue/scheduler' https://bugzilla.kernel.org/show_bug.cgi?id=219166 [2] del_gendisk() vs blk_queue_enter() race condition https://lore.kernel.org/linux-block/20241003085610.GK11458@google.com/ [3] queue_freeze & queue_enter deadlock in scsi https://lore.kernel.org/linux-block/ZxG38G9BuFdBpBHZ@fedora/T/#u Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Ming Lei <ming.lei@redhat.com> Link: https://lore.kernel.org/r/20241025003722.3630252-4-ming.lei@redhat.com Signed-off-by: Jens Axboe <axboe@kernel.dk> Stable-dep-of: 3802f73bd807 ("block: fix uaf for flush rq while iterating tags") Signed-off-by: Sasha Levin <sashal@kernel.org>
Diffstat (limited to 'block/blk-mq.c')
-rw-r--r--block/blk-mq.c26
1 files changed, 22 insertions, 4 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 0beed0ae0aeb..a111c5928b79 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -120,17 +120,29 @@ void blk_mq_in_flight_rw(struct request_queue *q, struct block_device *part,
inflight[1] = mi.inflight[1];
}
-void blk_freeze_queue_start(struct request_queue *q)
+bool __blk_freeze_queue_start(struct request_queue *q)
{
+ int freeze;
+
mutex_lock(&q->mq_freeze_lock);
if (++q->mq_freeze_depth == 1) {
percpu_ref_kill(&q->q_usage_counter);
mutex_unlock(&q->mq_freeze_lock);
if (queue_is_mq(q))
blk_mq_run_hw_queues(q, false);
+ freeze = true;
} else {
mutex_unlock(&q->mq_freeze_lock);
+ freeze = false;
}
+
+ return freeze;
+}
+
+void blk_freeze_queue_start(struct request_queue *q)
+{
+ if (__blk_freeze_queue_start(q))
+ blk_freeze_acquire_lock(q, false, false);
}
EXPORT_SYMBOL_GPL(blk_freeze_queue_start);
@@ -176,8 +188,10 @@ void blk_mq_freeze_queue(struct request_queue *q)
}
EXPORT_SYMBOL_GPL(blk_mq_freeze_queue);
-void __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic)
+bool __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic)
{
+ int unfreeze = false;
+
mutex_lock(&q->mq_freeze_lock);
if (force_atomic)
q->q_usage_counter.data->force_atomic = true;
@@ -186,13 +200,17 @@ void __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic)
if (!q->mq_freeze_depth) {
percpu_ref_resurrect(&q->q_usage_counter);
wake_up_all(&q->mq_freeze_wq);
+ unfreeze = true;
}
mutex_unlock(&q->mq_freeze_lock);
+
+ return unfreeze;
}
void blk_mq_unfreeze_queue(struct request_queue *q)
{
- __blk_mq_unfreeze_queue(q, false);
+ if (__blk_mq_unfreeze_queue(q, false))
+ blk_unfreeze_release_lock(q, false, false);
}
EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
@@ -205,7 +223,7 @@ EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
*/
void blk_freeze_queue_start_non_owner(struct request_queue *q)
{
- blk_freeze_queue_start(q);
+ __blk_freeze_queue_start(q);
}
EXPORT_SYMBOL_GPL(blk_freeze_queue_start_non_owner);