From a46435537a844d0f7b4b620baf962cad136422de Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 24 Feb 2026 11:36:09 -0700
Subject: io_uring/cmd_net: use READ_ONCE() for ->addr3 read

Any SQE read should use READ_ONCE(), to ensure the result is read once
and only once. Doesn't really matter for this case, but it's better to
keep these 100% consistent and always use READ_ONCE() for the prep side
of SQE handling.

Fixes: 5d24321e4c15 ("io_uring: Introduce getsockname io_uring cmd")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/cmd_net.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'io_uring')

diff --git a/io_uring/cmd_net.c b/io_uring/cmd_net.c
index 57ddaf874611..125a81c520a6 100644
--- a/io_uring/cmd_net.c
+++ b/io_uring/cmd_net.c
@@ -146,7 +146,7 @@ static int io_uring_cmd_getsockname(struct socket *sock,
 		return -EINVAL;
 
 	uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr));
-	ulen = u64_to_user_ptr(sqe->addr3);
+	ulen = u64_to_user_ptr(READ_ONCE(sqe->addr3));
 	peer = READ_ONCE(sqe->optlen);
 	if (peer > 1)
 		return -EINVAL;
-- 
cgit v1.2.3


From 85f6c439a69afe4fa8a688512e586971e97e273a Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Wed, 25 Feb 2026 10:35:57 +0000
Subject: io_uring/timeout: READ_ONCE sqe->addr

We should use READ_ONCE when reading from a SQE, make sure timeout gets
a stable timespec address.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/timeout.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'io_uring')

diff --git a/io_uring/timeout.c b/io_uring/timeout.c
index 84dda24f3eb2..cb61d4862fc6 100644
--- a/io_uring/timeout.c
+++ b/io_uring/timeout.c
@@ -462,7 +462,7 @@ int io_timeout_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 			tr->ltimeout = true;
 		if (tr->flags & ~(IORING_TIMEOUT_UPDATE_MASK|IORING_TIMEOUT_ABS))
 			return -EINVAL;
-		if (get_timespec64(&tr->ts, u64_to_user_ptr(sqe->addr2)))
+		if (get_timespec64(&tr->ts, u64_to_user_ptr(READ_ONCE(sqe->addr2))))
 			return -EFAULT;
 		if (tr->ts.tv_sec < 0 || tr->ts.tv_nsec < 0)
 			return -EINVAL;
@@ -557,7 +557,7 @@ static int __io_timeout_prep(struct io_kiocb *req,
 	data->req = req;
 	data->flags = flags;
 
-	if (get_timespec64(&data->ts, u64_to_user_ptr(sqe->addr)))
+	if (get_timespec64(&data->ts, u64_to_user_ptr(READ_ONCE(sqe->addr))))
 		return -EFAULT;
 
 	if (data->ts.tv_sec < 0 || data->ts.tv_nsec < 0)
-- 
cgit v1.2.3


From 3d17d76d1ffb139a7492317b196ee03c8eabc9dc Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 27 Feb 2026 09:07:45 -0800
Subject: io_uring/zcrx: don't set rx_page_size when not requested

The rx_buf_len parameter was recently added to the Rx zero-copy
implementation. The expectation is that when not set system will
maintain previous behavior and use the default buffer size (PAGE_SIZE).

This works correctly at the iouring level, but we don't preserve
the same "zero means default" semantics when registering the memory
provider on the netdev. mp_param.rx_page_size is unconditionally
set to PAGE_SIZE. This causes __net_mp_open_rxq() to check for
QCFG_RX_PAGE_SIZE support in the driver, and return -EOPNOTSUPP
for drivers that don't advertise it -- even though the user never
asked for large buffers.

Only set mp_param.rx_page_size when rx_buf_len was explicitly provided,
so that the default page size path works on all zcrx-capable drivers.
mlx5 and fbnic only support 4kB pages in the current release.

Fixes: 795663b4d160 ("io_uring/zcrx: implement large rx buffer support")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/zcrx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'io_uring')

diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 97984a73a95d..19b287d21f4b 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -837,7 +837,8 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
 	if (ret)
 		goto netdev_put_unlock;
 
-	mp_param.rx_page_size = 1U << ifq->niov_shift;
+	if (reg.rx_buf_len)
+		mp_param.rx_page_size = 1U << ifq->niov_shift;
 	mp_param.mp_ops = &io_uring_pp_zc_ops;
 	mp_param.mp_priv = ifq;
 	ret = __net_mp_open_rxq(ifq->netdev, reg.if_rxq, &mp_param, NULL);
-- 
cgit v1.2.3


From c36e28becd0586ac98318fd335e5e91d19cd2623 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 2 Mar 2026 14:32:04 +0000
Subject: io_uring/net: reject SEND_VECTORIZED when unsupported

IORING_SEND_VECTORIZED with registered buffers is not implemented but
could be. Don't silently ignore the flag in this case but reject it with
an error. It only affects sendzc as normal sends don't support
registered buffers.

Fixes: 6f02527729bd3 ("io_uring/net: Allow to do vectorized send")
Cc: stable@vger.kernel.org
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/net.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'io_uring')

diff --git a/io_uring/net.c b/io_uring/net.c
index 8576c6cb2236..d27adbe3f20b 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -375,6 +375,8 @@ static int io_send_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 		kmsg->msg.msg_namelen = addr_len;
 	}
 	if (sr->flags & IORING_RECVSEND_FIXED_BUF) {
+		if (sr->flags & IORING_SEND_VECTORIZED)
+			return -EINVAL;
 		req->flags |= REQ_F_IMPORT_BUFFER;
 		return 0;
 	}
-- 
cgit v1.2.3


From 531bb98a030cc1073bd7ed9a502c0a3a781e92ee Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Wed, 4 Mar 2026 12:37:43 +0000
Subject: io_uring/zcrx: use READ_ONCE with user shared RQEs

Refill queue entries are shared with the user space, use READ_ONCE when
reading them.

Fixes: 34a3e60821ab9 ("io_uring/zcrx: implement zerocopy receive pp memory provider");
Cc: stable@vger.kernel.org
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/zcrx.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'io_uring')

diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 19b287d21f4b..0461edebb042 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -927,11 +927,12 @@ static inline bool io_parse_rqe(struct io_uring_zcrx_rqe *rqe,
 				struct io_zcrx_ifq *ifq,
 				struct net_iov **ret_niov)
 {
+	__u64 off = READ_ONCE(rqe->off);
 	unsigned niov_idx, area_idx;
 	struct io_zcrx_area *area;
 
-	area_idx = rqe->off >> IORING_ZCRX_AREA_SHIFT;
-	niov_idx = (rqe->off & ~IORING_ZCRX_AREA_MASK) >> ifq->niov_shift;
+	area_idx = off >> IORING_ZCRX_AREA_SHIFT;
+	niov_idx = (off & ~IORING_ZCRX_AREA_MASK) >> ifq->niov_shift;
 
 	if (unlikely(rqe->__pad || area_idx))
 		return false;
-- 
cgit v1.2.3


From 3306a589e598b50a5bbdfe837371670b507043c0 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Mon, 9 Mar 2026 15:34:41 +0100
Subject: io_uring/register: fix comment about task_no_new_privs

The actual code is right, but the comment is the wrong way around.

Fixes: ed82f35b926b ("io_uring: allow registration of per-task restrictions")
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/register.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'io_uring')

diff --git a/io_uring/register.c b/io_uring/register.c
index 594b1f2ce875..a839b22fd392 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -202,7 +202,7 @@ static int io_register_restrictions_task(void __user *arg, unsigned int nr_args)
 		return -EPERM;
 	/*
 	 * Similar to seccomp, disallow setting a filter if task_no_new_privs
-	 * is true and we're not CAP_SYS_ADMIN.
+	 * is false and we're not CAP_SYS_ADMIN.
 	 */
 	if (!task_no_new_privs(current) &&
 	    !ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN))
@@ -238,7 +238,7 @@ static int io_register_bpf_filter_task(void __user *arg, unsigned int nr_args)
 
 	/*
 	 * Similar to seccomp, disallow setting a filter if task_no_new_privs
-	 * is true and we're not CAP_SYS_ADMIN.
+	 * is false and we're not CAP_SYS_ADMIN.
 	 */
 	if (!task_no_new_privs(current) &&
 	    !ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN))
-- 
cgit v1.2.3


From 785d4625d3e05bb0ac536ff4fd74d096cfe51714 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 9 Mar 2026 14:20:14 -0600
Subject: io_uring/bpf_filter: use bpf_prog_run_pin_on_cpu() to prevent
 migration

Since the caller, __io_uring_run_bpf_filters(), doesn't prevent
migration, it should use the migration disabling variant for running
the BPF program.

Fixes: d42eb05e60fe ("io_uring: add support for BPF filtering for opcode restrictions")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/bpf_filter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'io_uring')

diff --git a/io_uring/bpf_filter.c b/io_uring/bpf_filter.c
index 28a23e92ee81..9cc44764e0ac 100644
--- a/io_uring/bpf_filter.c
+++ b/io_uring/bpf_filter.c
@@ -85,7 +85,7 @@ int __io_uring_run_bpf_filters(struct io_bpf_filter __rcu **filters,
 	do {
 		if (filter == &dummy_filter)
 			return -EACCES;
-		ret = bpf_prog_run(filter->prog, &bpf_ctx);
+		ret = bpf_prog_run_pin_on_cpu(filter->prog, &bpf_ctx);
 		if (!ret)
 			return -EACCES;
 		filter = filter->next;
-- 
cgit v1.2.3


From 96189080265e6bb5dde3a4afbaf947af493e3f82 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 9 Mar 2026 14:21:37 -0600
Subject: io_uring: ensure ctx->rings is stable for task work flags
 manipulation

If DEFER_TASKRUN | SETUP_TASKRUN is used and task work is added while
the ring is being resized, it's possible for the OR'ing of
IORING_SQ_TASKRUN to happen in the small window of swapping into the
new rings and the old rings being freed.

Prevent this by adding a 2nd ->rings pointer, ->rings_rcu, which is
protected by RCU. The task work flags manipulation is inside RCU
already, and if the resize ring freeing is done post an RCU synchronize,
then there's no need to add locking to the fast path of task work
additions.

Note: this is only done for DEFER_TASKRUN, as that's the only setup mode
that supports ring resizing. If this ever changes, then they too need to
use the io_ctx_mark_taskrun() helper.

Link: https://lore.kernel.org/io-uring/20260309062759.482210-1-naup96721@gmail.com/
Cc: stable@vger.kernel.org
Fixes: 79cfe9e59c2a ("io_uring/register: add IORING_REGISTER_RESIZE_RINGS")
Reported-by: Hao-Yu Yang <naup96721@gmail.com>
Suggested-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h |  1 +
 io_uring/io_uring.c            |  2 ++
 io_uring/register.c            | 11 +++++++++++
 io_uring/tw.c                  | 22 ++++++++++++++++++++--
 4 files changed, 34 insertions(+), 2 deletions(-)

(limited to 'io_uring')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 3e4a82a6f817..dd1420bfcb73 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -388,6 +388,7 @@ struct io_ring_ctx {
 	 * regularly bounce b/w CPUs.
 	 */
 	struct {
+		struct io_rings	__rcu	*rings_rcu;
 		struct llist_head	work_llist;
 		struct llist_head	retry_llist;
 		unsigned long		check_cq;
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index ccab8562d273..20fdc442e014 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -2066,6 +2066,7 @@ static void io_rings_free(struct io_ring_ctx *ctx)
 	io_free_region(ctx->user, &ctx->sq_region);
 	io_free_region(ctx->user, &ctx->ring_region);
 	ctx->rings = NULL;
+	RCU_INIT_POINTER(ctx->rings_rcu, NULL);
 	ctx->sq_sqes = NULL;
 }
 
@@ -2703,6 +2704,7 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx,
 	if (ret)
 		return ret;
 	ctx->rings = rings = io_region_get_ptr(&ctx->ring_region);
+	rcu_assign_pointer(ctx->rings_rcu, rings);
 	if (!(ctx->flags & IORING_SETUP_NO_SQARRAY))
 		ctx->sq_array = (u32 *)((char *)rings + rl->sq_array_offset);
 
diff --git a/io_uring/register.c b/io_uring/register.c
index a839b22fd392..5f3eb018fb32 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -633,7 +633,15 @@ overflow:
 	ctx->sq_entries = p->sq_entries;
 	ctx->cq_entries = p->cq_entries;
 
+	/*
+	 * Just mark any flag we may have missed and that the application
+	 * should act on unconditionally. Worst case it'll be an extra
+	 * syscall.
+	 */
+	atomic_or(IORING_SQ_TASKRUN | IORING_SQ_NEED_WAKEUP, &n.rings->sq_flags);
 	ctx->rings = n.rings;
+	rcu_assign_pointer(ctx->rings_rcu, n.rings);
+
 	ctx->sq_sqes = n.sq_sqes;
 	swap_old(ctx, o, n, ring_region);
 	swap_old(ctx, o, n, sq_region);
@@ -642,6 +650,9 @@ overflow:
 out:
 	spin_unlock(&ctx->completion_lock);
 	mutex_unlock(&ctx->mmap_lock);
+	/* Wait for concurrent io_ctx_mark_taskrun() */
+	if (to_free == &o)
+		synchronize_rcu_expedited();
 	io_register_free_rings(ctx, to_free);
 
 	if (ctx->sq_data)
diff --git a/io_uring/tw.c b/io_uring/tw.c
index 1ee2b8ab07c8..2f2b4ac4b126 100644
--- a/io_uring/tw.c
+++ b/io_uring/tw.c
@@ -152,6 +152,21 @@ void tctx_task_work(struct callback_head *cb)
 	WARN_ON_ONCE(ret);
 }
 
+/*
+ * Sets IORING_SQ_TASKRUN in the sq_flags shared with userspace, using the
+ * RCU protected rings pointer to be safe against concurrent ring resizing.
+ */
+static void io_ctx_mark_taskrun(struct io_ring_ctx *ctx)
+{
+	lockdep_assert_in_rcu_read_lock();
+
+	if (ctx->flags & IORING_SETUP_TASKRUN_FLAG) {
+		struct io_rings *rings = rcu_dereference(ctx->rings_rcu);
+
+		atomic_or(IORING_SQ_TASKRUN, &rings->sq_flags);
+	}
+}
+
 void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
 {
 	struct io_ring_ctx *ctx = req->ctx;
@@ -206,8 +221,7 @@ void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
 	 */
 
 	if (!head) {
-		if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
-			atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags);
+		io_ctx_mark_taskrun(ctx);
 		if (ctx->has_evfd)
 			io_eventfd_signal(ctx, false);
 	}
@@ -231,6 +245,10 @@ void io_req_normal_work_add(struct io_kiocb *req)
 	if (!llist_add(&req->io_task_work.node, &tctx->task_list))
 		return;
 
+	/*
+	 * Doesn't need to use ->rings_rcu, as resizing isn't supported for
+	 * !DEFER_TASKRUN.
+	 */
 	if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
 		atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags);
 
-- 
cgit v1.2.3


From 177c69432161f6e4bab07ccacf8a1748a6898a6b Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 9 Mar 2026 14:35:49 -0600
Subject: io_uring/eventfd: use ctx->rings_rcu for flags checking

Similarly to what commit e78f7b70e837 did for local task work additions,
use ->rings_rcu under RCU rather than dereference ->rings directly. See
that commit for more details.

Cc: stable@vger.kernel.org
Fixes: 79cfe9e59c2a ("io_uring/register: add IORING_REGISTER_RESIZE_RINGS")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/eventfd.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'io_uring')

diff --git a/io_uring/eventfd.c b/io_uring/eventfd.c
index 78f8ab7db104..ab789e1ebe91 100644
--- a/io_uring/eventfd.c
+++ b/io_uring/eventfd.c
@@ -76,11 +76,15 @@ void io_eventfd_signal(struct io_ring_ctx *ctx, bool cqe_event)
 {
 	bool skip = false;
 	struct io_ev_fd *ev_fd;
-
-	if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
-		return;
+	struct io_rings *rings;
 
 	guard(rcu)();
+
+	rings = rcu_dereference(ctx->rings_rcu);
+	if (!rings)
+		return;
+	if (READ_ONCE(rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
+		return;
 	ev_fd = rcu_dereference(ctx->io_ev_fd);
 	/*
 	 * Check again if ev_fd exists in case an io_eventfd_unregister call
-- 
cgit v1.2.3


From 6f02c6b196036dbb6defb4647d8707d29b7fe95b Mon Sep 17 00:00:00 2001
From: Tom Ryan <ryan36005@gmail.com>
Date: Mon, 9 Mar 2026 22:20:02 -0700
Subject: io_uring: fix physical SQE bounds check for SQE_MIXED 128-byte ops

When IORING_SETUP_SQE_MIXED is used without IORING_SETUP_NO_SQARRAY,
the boundary check for 128-byte SQE operations in io_init_req()
validated the logical SQ head position rather than the physical SQE
index.

The existing check:

  !(ctx->cached_sq_head & (ctx->sq_entries - 1))

ensures the logical position isn't at the end of the ring, which is
correct for NO_SQARRAY rings where physical == logical. However, when
sq_array is present, an unprivileged user can remap any logical
position to an arbitrary physical index via sq_array. Setting
sq_array[N] = sq_entries - 1 places a 128-byte operation at the last
physical SQE slot, causing the 128-byte memcpy in
io_uring_cmd_sqe_copy() to read 64 bytes past the end of the SQE
array.

Replace the cached_sq_head alignment check with a direct validation
of the physical SQE index, which correctly handles both sq_array and
NO_SQARRAY cases.

Fixes: 1cba30bf9fdd ("io_uring: add support for IORING_SETUP_SQE_MIXED")
Signed-off-by: Tom Ryan <ryan36005@gmail.com>
Link: https://patch.msgid.link/20260310052003.72871-1-ryan36005@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/io_uring.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'io_uring')

diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 20fdc442e014..20ec8fdafcae 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1745,7 +1745,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
 		 * well as 2 contiguous entries.
 		 */
 		if (!(ctx->flags & IORING_SETUP_SQE_MIXED) || *left < 2 ||
-		    !(ctx->cached_sq_head & (ctx->sq_entries - 1)))
+		    (unsigned)(sqe - ctx->sq_sqes) >= ctx->sq_entries - 1)
 			return io_init_fail_req(req, -EINVAL);
 		/*
 		 * A 128b operation on a mixed SQ uses two entries, so we have
-- 
cgit v1.2.3


From c2c185be5c85d37215397c8e8781abf0a69bec1f Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Thu, 12 Mar 2026 08:59:25 -0600
Subject: io_uring/kbuf: check if target buffer list is still legacy on recycle

There's a gap between when the buffer was grabbed and when it
potentially gets recycled, where if the list is empty, someone could've
upgraded it to a ring provided type. This can happen if the request
is forced via io-wq. The legacy recycling is missing checking if the
buffer_list still exists, and if it's of the correct type. Add those
checks.

Cc: stable@vger.kernel.org
Fixes: c7fb19428d67 ("io_uring: add support for ring mapped supplied buffers")
Reported-by: Keenan Dong <keenanat2000@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/kbuf.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'io_uring')

diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
index dae5b4ab3819..e7f444953dfb 100644
--- a/io_uring/kbuf.c
+++ b/io_uring/kbuf.c
@@ -111,9 +111,18 @@ bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags)
 
 	buf = req->kbuf;
 	bl = io_buffer_get_list(ctx, buf->bgid);
-	list_add(&buf->list, &bl->buf_list);
-	bl->nbufs++;
+	/*
+	 * If the buffer list was upgraded to a ring-based one, or removed,
+	 * while the request was in-flight in io-wq, drop it.
+	 */
+	if (bl && !(bl->flags & IOBL_BUF_RING)) {
+		list_add(&buf->list, &bl->buf_list);
+		bl->nbufs++;
+	} else {
+		kfree(buf);
+	}
 	req->flags &= ~REQ_F_BUFFER_SELECTED;
+	req->kbuf = NULL;
 
 	io_ring_submit_unlock(ctx, issue_flags);
 	return true;
-- 
cgit v1.2.3