summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLi Chen <me@linux.beauty>2026-02-02 17:37:53 +0300
committerJens Axboe <axboe@kernel.dk>2026-02-02 18:10:23 +0300
commit38aa434ab9335ce2d178b7538cdf01d60b2014c3 (patch)
treeb8e82b33e08362432b0b8eca07580e22050da37e
parent806ae939c41e5da1d94a1e2b31f5702e96b6c3e3 (diff)
downloadlinux-38aa434ab9335ce2d178b7538cdf01d60b2014c3.tar.xz
io_uring/io-wq: add exit-on-idle state
io-wq uses an idle timeout to shrink the pool, but keeps the last worker around indefinitely to avoid churn. For tasks that used io_uring for file I/O and then stop using io_uring, this can leave an iou-wrk-* thread behind even after all io_uring instances are gone. This is unnecessary overhead and also gets in the way of process checkpoint/restore. Add an exit-on-idle state that makes all io-wq workers exit as soon as they become idle, and provide io_wq_set_exit_on_idle() to toggle it. Signed-off-by: Li Chen <me@linux.beauty> Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--io_uring/io-wq.c27
-rw-r--r--io_uring/io-wq.h1
2 files changed, 26 insertions, 2 deletions
diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c
index 3b55feb620d9..f0cec1a6d9c4 100644
--- a/io_uring/io-wq.c
+++ b/io_uring/io-wq.c
@@ -35,6 +35,7 @@ enum {
enum {
IO_WQ_BIT_EXIT = 0, /* wq exiting */
+ IO_WQ_BIT_EXIT_ON_IDLE = 1, /* allow all workers to exit on idle */
};
enum {
@@ -707,9 +708,13 @@ static int io_wq_worker(void *data)
raw_spin_lock(&acct->workers_lock);
/*
* Last sleep timed out. Exit if we're not the last worker,
- * or if someone modified our affinity.
+ * or if someone modified our affinity. If wq is marked
+ * idle-exit, drop the worker as well. This is used to avoid
+ * keeping io-wq workers around for tasks that no longer have
+ * any active io_uring instances.
*/
- if (last_timeout && (exit_mask || acct->nr_workers > 1)) {
+ if ((last_timeout && (exit_mask || acct->nr_workers > 1)) ||
+ test_bit(IO_WQ_BIT_EXIT_ON_IDLE, &wq->state)) {
acct->nr_workers--;
raw_spin_unlock(&acct->workers_lock);
__set_current_state(TASK_RUNNING);
@@ -967,6 +972,24 @@ static bool io_wq_worker_wake(struct io_worker *worker, void *data)
return false;
}
+void io_wq_set_exit_on_idle(struct io_wq *wq, bool enable)
+{
+ if (!wq->task)
+ return;
+
+ if (!enable) {
+ clear_bit(IO_WQ_BIT_EXIT_ON_IDLE, &wq->state);
+ return;
+ }
+
+ if (test_and_set_bit(IO_WQ_BIT_EXIT_ON_IDLE, &wq->state))
+ return;
+
+ rcu_read_lock();
+ io_wq_for_each_worker(wq, io_wq_worker_wake, NULL);
+ rcu_read_unlock();
+}
+
static void io_run_cancel(struct io_wq_work *work, struct io_wq *wq)
{
do {
diff --git a/io_uring/io-wq.h b/io_uring/io-wq.h
index 774abab54732..94b14742b703 100644
--- a/io_uring/io-wq.h
+++ b/io_uring/io-wq.h
@@ -41,6 +41,7 @@ struct io_wq_data {
struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data);
void io_wq_exit_start(struct io_wq *wq);
void io_wq_put_and_exit(struct io_wq *wq);
+void io_wq_set_exit_on_idle(struct io_wq *wq, bool enable);
void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
void io_wq_hash_work(struct io_wq_work *work, void *val);