diff options
| author | Pavel Begunkov <asml.silence@gmail.com> | 2026-01-20 23:47:40 +0300 |
|---|---|---|
| committer | Jens Axboe <axboe@kernel.dk> | 2026-01-23 01:47:23 +0300 |
| commit | 5247c034a67f5a93cc1faa15e9867eec5b22f38a (patch) | |
| tree | dafc8859ac3cc7eaa08dfe3641ba1c8818267717 /io_uring/io_uring.c | |
| parent | 0105b0562a5ed6374f06e5cd4246a3f1311a65a0 (diff) | |
| download | linux-5247c034a67f5a93cc1faa15e9867eec5b22f38a.tar.xz | |
io_uring: introduce non-circular SQ
Outside of SQPOLL, normally SQ entries are consumed by the time the
submission syscall returns. For those cases we don't need a circular
buffer and the head/tail tracking, instead the kernel can assume that
entries always start from the beginning of the SQ at index 0. This patch
introduces a setup flag doing exactly that. It's a simpler and helps
to keeps SQEs hot in cache.
The feature is optional and enabled by setting IORING_SETUP_SQ_REWIND.
The flag is rejected if passed together with SQPOLL as it'd require
waiting for SQ before each submission. It also requires
IORING_SETUP_NO_SQARRAY, which can be supported but it's unlikely there
will be users, so leave more space for future optimisations.
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'io_uring/io_uring.c')
| -rw-r--r-- | io_uring/io_uring.c | 29 |
1 files changed, 22 insertions, 7 deletions
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index a50459238bee..0f88ec74e55d 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1945,12 +1945,16 @@ static void io_commit_sqring(struct io_ring_ctx *ctx) { struct io_rings *rings = ctx->rings; - /* - * Ensure any loads from the SQEs are done at this point, - * since once we write the new head, the application could - * write new data to them. - */ - smp_store_release(&rings->sq.head, ctx->cached_sq_head); + if (ctx->flags & IORING_SETUP_SQ_REWIND) { + ctx->cached_sq_head = 0; + } else { + /* + * Ensure any loads from the SQEs are done at this point, + * since once we write the new head, the application could + * write new data to them. + */ + smp_store_release(&rings->sq.head, ctx->cached_sq_head); + } } /* @@ -1996,10 +2000,15 @@ static bool io_get_sqe(struct io_ring_ctx *ctx, const struct io_uring_sqe **sqe) int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr) __must_hold(&ctx->uring_lock) { - unsigned int entries = io_sqring_entries(ctx); + unsigned int entries; unsigned int left; int ret; + if (ctx->flags & IORING_SETUP_SQ_REWIND) + entries = ctx->sq_entries; + else + entries = io_sqring_entries(ctx); + entries = min(nr, entries); if (unlikely(!entries)) return 0; @@ -2728,6 +2737,12 @@ static int io_uring_sanitise_params(struct io_uring_params *p) if (flags & ~IORING_SETUP_FLAGS) return -EINVAL; + if (flags & IORING_SETUP_SQ_REWIND) { + if ((flags & IORING_SETUP_SQPOLL) || + !(flags & IORING_SETUP_NO_SQARRAY)) + return -EINVAL; + } + /* There is no way to mmap rings without a real fd */ if ((flags & IORING_SETUP_REGISTERED_FD_ONLY) && !(flags & IORING_SETUP_NO_MMAP)) |
