summaryrefslogtreecommitdiff
path: root/io_uring/io_uring.c
diff options
context:
space:
mode:
authorPavel Begunkov <asml.silence@gmail.com>2026-01-20 23:47:40 +0300
committerJens Axboe <axboe@kernel.dk>2026-01-23 01:47:23 +0300
commit5247c034a67f5a93cc1faa15e9867eec5b22f38a (patch)
treedafc8859ac3cc7eaa08dfe3641ba1c8818267717 /io_uring/io_uring.c
parent0105b0562a5ed6374f06e5cd4246a3f1311a65a0 (diff)
downloadlinux-5247c034a67f5a93cc1faa15e9867eec5b22f38a.tar.xz
io_uring: introduce non-circular SQ
Outside of SQPOLL, normally SQ entries are consumed by the time the submission syscall returns. For those cases we don't need a circular buffer and the head/tail tracking, instead the kernel can assume that entries always start from the beginning of the SQ at index 0. This patch introduces a setup flag doing exactly that. It's a simpler and helps to keeps SQEs hot in cache. The feature is optional and enabled by setting IORING_SETUP_SQ_REWIND. The flag is rejected if passed together with SQPOLL as it'd require waiting for SQ before each submission. It also requires IORING_SETUP_NO_SQARRAY, which can be supported but it's unlikely there will be users, so leave more space for future optimisations. Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'io_uring/io_uring.c')
-rw-r--r--io_uring/io_uring.c29
1 files changed, 22 insertions, 7 deletions
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index a50459238bee..0f88ec74e55d 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1945,12 +1945,16 @@ static void io_commit_sqring(struct io_ring_ctx *ctx)
{
struct io_rings *rings = ctx->rings;
- /*
- * Ensure any loads from the SQEs are done at this point,
- * since once we write the new head, the application could
- * write new data to them.
- */
- smp_store_release(&rings->sq.head, ctx->cached_sq_head);
+ if (ctx->flags & IORING_SETUP_SQ_REWIND) {
+ ctx->cached_sq_head = 0;
+ } else {
+ /*
+ * Ensure any loads from the SQEs are done at this point,
+ * since once we write the new head, the application could
+ * write new data to them.
+ */
+ smp_store_release(&rings->sq.head, ctx->cached_sq_head);
+ }
}
/*
@@ -1996,10 +2000,15 @@ static bool io_get_sqe(struct io_ring_ctx *ctx, const struct io_uring_sqe **sqe)
int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
__must_hold(&ctx->uring_lock)
{
- unsigned int entries = io_sqring_entries(ctx);
+ unsigned int entries;
unsigned int left;
int ret;
+ if (ctx->flags & IORING_SETUP_SQ_REWIND)
+ entries = ctx->sq_entries;
+ else
+ entries = io_sqring_entries(ctx);
+
entries = min(nr, entries);
if (unlikely(!entries))
return 0;
@@ -2728,6 +2737,12 @@ static int io_uring_sanitise_params(struct io_uring_params *p)
if (flags & ~IORING_SETUP_FLAGS)
return -EINVAL;
+ if (flags & IORING_SETUP_SQ_REWIND) {
+ if ((flags & IORING_SETUP_SQPOLL) ||
+ !(flags & IORING_SETUP_NO_SQARRAY))
+ return -EINVAL;
+ }
+
/* There is no way to mmap rings without a real fd */
if ((flags & IORING_SETUP_REGISTERED_FD_ONLY) &&
!(flags & IORING_SETUP_NO_MMAP))