diff options
| author | Tejun Heo <tj@kernel.org> | 2026-04-21 09:03:26 +0300 |
|---|---|---|
| committer | Tejun Heo <tj@kernel.org> | 2026-04-21 09:10:50 +0300 |
| commit | 4fe985292709eeb6a4653c71660f893e26c2f2dd (patch) | |
| tree | 4fa69c4095a04f119e6919dca823e10756110891 /include/linux | |
| parent | 5897ca15d2c444af95eaae5f0a384401765afa00 (diff) | |
| download | linux-4fe985292709eeb6a4653c71660f893e26c2f2dd.tar.xz | |
rhashtable: Bounce deferred worker kick through irq_work
Inserts past 75% load call schedule_work(&ht->run_work) to kick an
async resize. If a caller holds a raw spinlock (e.g. an
insecure_elasticity user), schedule_work() under that lock records
caller_lock -> pool->lock -> pi_lock -> rq->__lock
A cycle forms if any of these locks is acquired in the reverse
direction elsewhere. sched_ext, the only current insecure_elasticity
user, hits this: it holds scx_sched_lock across rhashtable inserts of
sub-schedulers, while scx_bypass() takes rq->__lock -> scx_sched_lock.
Exercising the resize path produces:
Chain exists of:
&pool->lock --> &rq->__lock --> scx_sched_lock
Bounce the kick from the insert paths through irq_work so
schedule_work() runs from hard IRQ context with the caller's lock no
longer held. rht_deferred_worker()'s self-rearm on error stays on
schedule_work(&ht->run_work) - the worker runs in process context with
no caller lock held, and keeping the self-requeue on @run_work lets
cancel_work_sync() in rhashtable_free_and_destroy() drain it.
v3: Keep rht_deferred_worker()'s self-rearm on schedule_work(&run_work).
Routing it through irq_work in v2 broke cancel_work_sync()'s
self-requeue handling - an irq_work queued after irq_work_sync()
returned but while cancel_work_sync() was still waiting could fire
post-teardown.
v2: Bounce unconditionally instead of gating on insecure_elasticity,
as suggested by Herbert.
Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'include/linux')
| -rw-r--r-- | include/linux/rhashtable-types.h | 3 | ||||
| -rw-r--r-- | include/linux/rhashtable.h | 3 |
2 files changed, 5 insertions, 1 deletions
diff --git a/include/linux/rhashtable-types.h b/include/linux/rhashtable-types.h index 72082428d6c6..fc2f596a6df1 100644 --- a/include/linux/rhashtable-types.h +++ b/include/linux/rhashtable-types.h @@ -12,6 +12,7 @@ #include <linux/alloc_tag.h> #include <linux/atomic.h> #include <linux/compiler.h> +#include <linux/irq_work_types.h> #include <linux/mutex.h> #include <linux/workqueue_types.h> @@ -77,6 +78,7 @@ struct rhashtable_params { * @p: Configuration parameters * @rhlist: True if this is an rhltable * @run_work: Deferred worker to expand/shrink asynchronously + * @run_irq_work: Bounces the @run_work kick through hard IRQ context. * @mutex: Mutex to protect current/future table swapping * @lock: Spin lock to protect walker list * @nelems: Number of elements in table @@ -88,6 +90,7 @@ struct rhashtable { struct rhashtable_params p; bool rhlist; struct work_struct run_work; + struct irq_work run_irq_work; struct mutex mutex; spinlock_t lock; atomic_t nelems; diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 7def3f0f556b..ef5230cece36 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -20,6 +20,7 @@ #include <linux/err.h> #include <linux/errno.h> +#include <linux/irq_work.h> #include <linux/jhash.h> #include <linux/list_nulls.h> #include <linux/workqueue.h> @@ -847,7 +848,7 @@ slow_path: rht_assign_unlock(tbl, bkt, obj, flags); if (rht_grow_above_75(ht, tbl)) - schedule_work(&ht->run_work); + irq_work_queue(&ht->run_irq_work); data = NULL; out: |
