diff options
author | Yu Kuai <yukuai3@huawei.com> | 2023-01-19 14:03:50 +0300 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2023-03-10 11:29:05 +0300 |
commit | bfe46d2efe46c5c952f982e2ca94fe2ec5e58e2a (patch) | |
tree | b963962befc122ac02272b040dc2c8fdab667f1f /block | |
parent | 57a425badc05c2e87e9f25713e5c3c0298e4202c (diff) | |
download | linux-bfe46d2efe46c5c952f982e2ca94fe2ec5e58e2a.tar.xz |
blk-cgroup: synchronize pd_free_fn() from blkg_free_workfn() and blkcg_deactivate_policy()
[ Upstream commit f1c006f1c6850c14040f8337753a63119bba39b9 ]
Currently parent pd can be freed before child pd:
t1: remove cgroup C1
blkcg_destroy_blkgs
blkg_destroy
list_del_init(&blkg->q_node)
// remove blkg from queue list
percpu_ref_kill(&blkg->refcnt)
blkg_release
call_rcu
t2: from t1
__blkg_release
blkg_free
schedule_work
t4: deactivate policy
blkcg_deactivate_policy
pd_free_fn
// parent of C1 is freed first
t3: from t2
blkg_free_workfn
pd_free_fn
If policy(for example, ioc_timer_fn() from iocost) access parent pd from
child pd after pd_offline_fn(), then UAF can be triggered.
Fix the problem by delaying 'list_del_init(&blkg->q_node)' from
blkg_destroy() to blkg_free_workfn(), and using a new disk level mutex to
synchronize blkg_free_workfn() and blkcg_deactivate_policy().
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Acked-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20230119110350.2287325-4-yukuai1@huaweicloud.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Diffstat (limited to 'block')
-rw-r--r-- | block/blk-cgroup.c | 35 |
1 files changed, 29 insertions, 6 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index aa890e3e4e50..45881f8c7913 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -118,16 +118,32 @@ static void blkg_free_workfn(struct work_struct *work) { struct blkcg_gq *blkg = container_of(work, struct blkcg_gq, free_work); + struct request_queue *q = blkg->q; int i; + /* + * pd_free_fn() can also be called from blkcg_deactivate_policy(), + * in order to make sure pd_free_fn() is called in order, the deletion + * of the list blkg->q_node is delayed to here from blkg_destroy(), and + * blkcg_mutex is used to synchronize blkg_free_workfn() and + * blkcg_deactivate_policy(). + */ + if (q) + mutex_lock(&q->blkcg_mutex); + for (i = 0; i < BLKCG_MAX_POLS; i++) if (blkg->pd[i]) blkcg_policy[i]->pd_free_fn(blkg->pd[i]); if (blkg->parent) blkg_put(blkg->parent); - if (blkg->q) - blk_put_queue(blkg->q); + + if (q) { + list_del_init(&blkg->q_node); + mutex_unlock(&q->blkcg_mutex); + blk_put_queue(q); + } + free_percpu(blkg->iostat_cpu); percpu_ref_exit(&blkg->refcnt); kfree(blkg); @@ -458,9 +474,14 @@ static void blkg_destroy(struct blkcg_gq *blkg) lockdep_assert_held(&blkg->q->queue_lock); lockdep_assert_held(&blkcg->lock); - /* Something wrong if we are trying to remove same group twice */ - WARN_ON_ONCE(list_empty(&blkg->q_node)); - WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node)); + /* + * blkg stays on the queue list until blkg_free_workfn(), see details in + * blkg_free_workfn(), hence this function can be called from + * blkcg_destroy_blkgs() first and again from blkg_destroy_all() before + * blkg_free_workfn(). + */ + if (hlist_unhashed(&blkg->blkcg_node)) + return; for (i = 0; i < BLKCG_MAX_POLS; i++) { struct blkcg_policy *pol = blkcg_policy[i]; @@ -472,7 +493,6 @@ static void blkg_destroy(struct blkcg_gq *blkg) blkg->online = false; radix_tree_delete(&blkcg->blkg_tree, blkg->q->id); - list_del_init(&blkg->q_node); hlist_del_init_rcu(&blkg->blkcg_node); /* @@ -1273,6 +1293,7 @@ int blkcg_init_disk(struct gendisk *disk) int ret; INIT_LIST_HEAD(&q->blkg_list); + mutex_init(&q->blkcg_mutex); new_blkg = blkg_alloc(&blkcg_root, disk, GFP_KERNEL); if (!new_blkg) @@ -1510,6 +1531,7 @@ void blkcg_deactivate_policy(struct request_queue *q, if (queue_is_mq(q)) blk_mq_freeze_queue(q); + mutex_lock(&q->blkcg_mutex); spin_lock_irq(&q->queue_lock); __clear_bit(pol->plid, q->blkcg_pols); @@ -1528,6 +1550,7 @@ void blkcg_deactivate_policy(struct request_queue *q, } spin_unlock_irq(&q->queue_lock); + mutex_unlock(&q->blkcg_mutex); if (queue_is_mq(q)) blk_mq_unfreeze_queue(q); |