summaryrefslogtreecommitdiff
path: root/io_uring/io_uring.c
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2025-12-11 13:25:41 +0300
committerJens Axboe <axboe@kernel.dk>2025-12-29 01:54:45 +0300
commit3c7d76d6128a0fef68e6540754bf85a44a29bb59 (patch)
tree1fed05d9c7c6ba8891b7a0517c725ea899be5844 /io_uring/io_uring.c
parentf8f9c1f4d0c7a64600e2ca312dec824a0bc2f1da (diff)
downloadlinux-3c7d76d6128a0fef68e6540754bf85a44a29bb59.tar.xz
io_uring: IOPOLL polling improvements
io_uring manages issued and pending IOPOLL read/write requests in a singly linked list. One downside of that is that individual items cannot easily be removed from that list, and as a result, io_uring will only complete a completed request N in that list if 0..N-1 are also complete. For homogenous IO this isn't necessarily an issue, but if different devices are involved in polling in the same ring, or if disparate IO from the same device is being polled for, this can defer completion of some requests unnecessarily. Move to a doubly linked list for iopoll completions instead, making it possible to easily complete whatever requests that were polled done successfully. Co-developed-by: Fengnan Chang <fengnanchang@gmail.com> Link: https://lore.kernel.org/io-uring/20251210085501.84261-1-changfengnan@bytedance.com/ Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'io_uring/io_uring.c')
-rw-r--r--io_uring/io_uring.c27
1 files changed, 8 insertions, 19 deletions
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 6cb24cdf8e68..05a660c97316 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -334,7 +334,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
init_waitqueue_head(&ctx->poll_wq);
spin_lock_init(&ctx->completion_lock);
raw_spin_lock_init(&ctx->timeout_lock);
- INIT_WQ_LIST(&ctx->iopoll_list);
+ INIT_LIST_HEAD(&ctx->iopoll_list);
INIT_LIST_HEAD(&ctx->defer_list);
INIT_LIST_HEAD(&ctx->timeout_list);
INIT_LIST_HEAD(&ctx->ltimeout_list);
@@ -1561,7 +1561,7 @@ __cold void io_iopoll_try_reap_events(struct io_ring_ctx *ctx)
return;
mutex_lock(&ctx->uring_lock);
- while (!wq_list_empty(&ctx->iopoll_list)) {
+ while (!list_empty(&ctx->iopoll_list)) {
/* let it sleep and repeat later if can't complete a request */
if (io_do_iopoll(ctx, true) == 0)
break;
@@ -1626,21 +1626,18 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned int min_events)
* forever, while the workqueue is stuck trying to acquire the
* very same mutex.
*/
- if (wq_list_empty(&ctx->iopoll_list) ||
- io_task_work_pending(ctx)) {
+ if (list_empty(&ctx->iopoll_list) || io_task_work_pending(ctx)) {
u32 tail = ctx->cached_cq_tail;
(void) io_run_local_work_locked(ctx, min_events);
- if (task_work_pending(current) ||
- wq_list_empty(&ctx->iopoll_list)) {
+ if (task_work_pending(current) || list_empty(&ctx->iopoll_list)) {
mutex_unlock(&ctx->uring_lock);
io_run_task_work();
mutex_lock(&ctx->uring_lock);
}
/* some requests don't go through iopoll_list */
- if (tail != ctx->cached_cq_tail ||
- wq_list_empty(&ctx->iopoll_list))
+ if (tail != ctx->cached_cq_tail || list_empty(&ctx->iopoll_list))
break;
}
ret = io_do_iopoll(ctx, !min_events);
@@ -1683,25 +1680,17 @@ static void io_iopoll_req_issued(struct io_kiocb *req, unsigned int issue_flags)
* how we do polling eventually, not spinning if we're on potentially
* different devices.
*/
- if (wq_list_empty(&ctx->iopoll_list)) {
+ if (list_empty(&ctx->iopoll_list)) {
ctx->poll_multi_queue = false;
} else if (!ctx->poll_multi_queue) {
struct io_kiocb *list_req;
- list_req = container_of(ctx->iopoll_list.first, struct io_kiocb,
- comp_list);
+ list_req = list_first_entry(&ctx->iopoll_list, struct io_kiocb, iopoll_node);
if (list_req->file != req->file)
ctx->poll_multi_queue = true;
}
- /*
- * For fast devices, IO may have already completed. If it has, add
- * it to the front so we find it first.
- */
- if (READ_ONCE(req->iopoll_completed))
- wq_list_add_head(&req->comp_list, &ctx->iopoll_list);
- else
- wq_list_add_tail(&req->comp_list, &ctx->iopoll_list);
+ list_add_tail(&req->iopoll_node, &ctx->iopoll_list);
if (unlikely(needs_lock)) {
/*