From e9447dc0b18db40f7c9807f8d0a218f0fa07517f Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Mon, 16 Dec 2024 15:46:11 -0500 Subject: io_uring/uring_cmd: Allocate async data through generic helper This abstracts away the cache details and simplify the code. Signed-off-by: Gabriel Krisman Bertazi Link: https://lore.kernel.org/r/20241216204615.759089-6-krisman@suse.de Signed-off-by: Jens Axboe --- io_uring/uring_cmd.c | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) (limited to 'io_uring/uring_cmd.c') diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index af842e9b4eb9..d6ff803dbbe1 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -16,22 +16,6 @@ #include "rsrc.h" #include "uring_cmd.h" -static struct uring_cache *io_uring_async_get(struct io_kiocb *req) -{ - struct io_ring_ctx *ctx = req->ctx; - struct uring_cache *cache; - - cache = io_alloc_cache_get(&ctx->uring_cache); - if (cache) { - req->flags |= REQ_F_ASYNC_DATA; - req->async_data = cache; - return cache; - } - if (!io_alloc_async_data(req)) - return req->async_data; - return NULL; -} - static void io_req_uring_cleanup(struct io_kiocb *req, unsigned int issue_flags) { struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd); @@ -185,8 +169,8 @@ static int io_uring_cmd_prep_setup(struct io_kiocb *req, struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd); struct uring_cache *cache; - cache = io_uring_async_get(req); - if (unlikely(!cache)) + cache = io_uring_alloc_async_data(&req->ctx->uring_cache, req, NULL); + if (!cache) return -ENOMEM; if (!(req->flags & REQ_F_FORCE_ASYNC)) { -- cgit v1.2.3 From dadf03cfd4eaa09f1d0e8b2521de1e11d3e3bec1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 3 Jan 2025 15:02:23 +0000 Subject: io_uring/cmd: rename struct uring_cache to io_uring_cmd_data In preparation for making this more generically available for ->uring_cmd() usage that needs stable command data, rename it and move it to io_uring/cmd.h instead. Signed-off-by: Jens Axboe Signed-off-by: David Sterba --- include/linux/io_uring/cmd.h | 4 ++++ io_uring/io_uring.c | 2 +- io_uring/opdef.c | 3 ++- io_uring/uring_cmd.c | 10 +++++----- io_uring/uring_cmd.h | 4 ---- 5 files changed, 12 insertions(+), 11 deletions(-) (limited to 'io_uring/uring_cmd.c') diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h index c189d36ad55e..24cff2b9b9d4 100644 --- a/include/linux/io_uring/cmd.h +++ b/include/linux/io_uring/cmd.h @@ -18,6 +18,10 @@ struct io_uring_cmd { u8 pdu[32]; /* available inline for free use */ }; +struct io_uring_cmd_data { + struct io_uring_sqe sqes[2]; +}; + static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe) { return sqe->cmd; diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index b2736e3491b8..8ae6bf746fcc 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -315,7 +315,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) ret |= io_alloc_cache_init(&ctx->rw_cache, IO_ALLOC_CACHE_MAX, sizeof(struct io_async_rw)); ret |= io_alloc_cache_init(&ctx->uring_cache, IO_ALLOC_CACHE_MAX, - sizeof(struct uring_cache)); + sizeof(struct io_uring_cmd_data)); spin_lock_init(&ctx->msg_lock); ret |= io_alloc_cache_init(&ctx->msg_cache, IO_ALLOC_CACHE_MAX, sizeof(struct io_kiocb)); diff --git a/io_uring/opdef.c b/io_uring/opdef.c index a2be3bbca5ff..c7746f67cc65 100644 --- a/io_uring/opdef.c +++ b/io_uring/opdef.c @@ -7,6 +7,7 @@ #include #include #include +#include #include "io_uring.h" #include "opdef.h" @@ -414,7 +415,7 @@ const struct io_issue_def io_issue_defs[] = { .plug = 1, .iopoll = 1, .iopoll_queue = 1, - .async_size = 2 * sizeof(struct io_uring_sqe), + .async_size = sizeof(struct io_uring_cmd_data), .prep = io_uring_cmd_prep, .issue = io_uring_cmd, }, diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index e2e8485932d6..eefc203a1214 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -16,10 +16,10 @@ #include "rsrc.h" #include "uring_cmd.h" -static struct uring_cache *io_uring_async_get(struct io_kiocb *req) +static struct io_uring_cmd_data *io_uring_async_get(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; - struct uring_cache *cache; + struct io_uring_cmd_data *cache; cache = io_alloc_cache_get(&ctx->uring_cache); if (cache) { @@ -35,7 +35,7 @@ static struct uring_cache *io_uring_async_get(struct io_kiocb *req) static void io_req_uring_cleanup(struct io_kiocb *req, unsigned int issue_flags) { struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd); - struct uring_cache *cache = req->async_data; + struct io_uring_cmd_data *cache = req->async_data; if (issue_flags & IO_URING_F_UNLOCKED) return; @@ -183,7 +183,7 @@ static int io_uring_cmd_prep_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd); - struct uring_cache *cache; + struct io_uring_cmd_data *cache; cache = io_uring_async_get(req); if (unlikely(!cache)) @@ -256,7 +256,7 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags) ret = file->f_op->uring_cmd(ioucmd, issue_flags); if (ret == -EAGAIN) { - struct uring_cache *cache = req->async_data; + struct io_uring_cmd_data *cache = req->async_data; if (ioucmd->sqe != (void *) cache) memcpy(cache, ioucmd->sqe, uring_sqe_size(req->ctx)); diff --git a/io_uring/uring_cmd.h b/io_uring/uring_cmd.h index a361f98664d2..515823ca68b8 100644 --- a/io_uring/uring_cmd.h +++ b/io_uring/uring_cmd.h @@ -1,9 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -struct uring_cache { - struct io_uring_sqe sqes[2]; -}; - int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags); int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); -- cgit v1.2.3 From 3347fa658a1baecd61b007787d031b729cd86537 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 3 Jan 2025 15:02:24 +0000 Subject: io_uring/cmd: add per-op data to struct io_uring_cmd_data In case an op handler for ->uring_cmd() needs stable storage for user data, it can allocate io_uring_cmd_data->op_data and use it for the duration of the request. When the request gets cleaned up, uring_cmd will free it automatically. Signed-off-by: Jens Axboe Signed-off-by: David Sterba --- include/linux/io_uring/cmd.h | 1 + io_uring/uring_cmd.c | 13 +++++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'io_uring/uring_cmd.c') diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h index 24cff2b9b9d4..3df6636ec3a3 100644 --- a/include/linux/io_uring/cmd.h +++ b/include/linux/io_uring/cmd.h @@ -20,6 +20,7 @@ struct io_uring_cmd { struct io_uring_cmd_data { struct io_uring_sqe sqes[2]; + void *op_data; }; static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe) diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index eefc203a1214..019d6f49ff20 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -23,12 +23,16 @@ static struct io_uring_cmd_data *io_uring_async_get(struct io_kiocb *req) cache = io_alloc_cache_get(&ctx->uring_cache); if (cache) { + cache->op_data = NULL; req->flags |= REQ_F_ASYNC_DATA; req->async_data = cache; return cache; } - if (!io_alloc_async_data(req)) - return req->async_data; + if (!io_alloc_async_data(req)) { + cache = req->async_data; + cache->op_data = NULL; + return cache; + } return NULL; } @@ -37,6 +41,11 @@ static void io_req_uring_cleanup(struct io_kiocb *req, unsigned int issue_flags) struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd); struct io_uring_cmd_data *cache = req->async_data; + if (cache->op_data) { + kfree(cache->op_data); + cache->op_data = NULL; + } + if (issue_flags & IO_URING_F_UNLOCKED) return; if (io_alloc_cache_put(&req->ctx->uring_cache, cache)) { -- cgit v1.2.3 From bab4b2cca027fbc4effc0ef60615a35bfee96ad0 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 15 Jan 2025 15:40:48 +0000 Subject: io_uring: reuse io_should_terminate_tw() for cmds io_uring_cmd_work() rolled a hard coded version of io_should_terminate_tw() to avoid conflicts, but now it's time to converge them. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/8a88dd6e4ed8e6c00c6552af0c20c9de02e458de.1736955455.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/uring_cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'io_uring/uring_cmd.c') diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index d6ff803dbbe1..d235043db21e 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -105,7 +105,7 @@ static void io_uring_cmd_work(struct io_kiocb *req, struct io_tw_state *ts) struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd); unsigned int flags = IO_URING_F_COMPLETE_DEFER; - if (current->flags & (PF_EXITING | PF_KTHREAD)) + if (io_should_terminate_tw()) flags |= IO_URING_F_TASK_DEAD; /* task_work executor checks the deffered list completion */ -- cgit v1.2.3 From d58d82bd0efd6c8edd452fc2f6c6dd052ec57cb2 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 22 Jan 2025 17:29:31 -0700 Subject: io_uring/uring_cmd: use cached cmd_op in io_uring_cmd_sock() io_uring_cmd_sock() does a normal read of cmd->sqe->cmd_op, where it really should be using a READ_ONCE() as ->sqe may still be pointing to the original SQE. Since the prep side already does this READ_ONCE() and stores it locally, use that value rather than re-read it. Fixes: 8e9fad0e70b7b ("io_uring: Add io_uring command support for sockets") Link: https://lore.kernel.org/r/20250121-uring-sockcmd-fix-v1-1-add742802a29@google.com Signed-off-by: Jens Axboe --- io_uring/uring_cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'io_uring/uring_cmd.c') diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index fc94c465a985..3993c9339ac7 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -350,7 +350,7 @@ int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags) if (!prot || !prot->ioctl) return -EOPNOTSUPP; - switch (cmd->sqe->cmd_op) { + switch (cmd->cmd_op) { case SOCKET_URING_OP_SIOCINQ: ret = prot->ioctl(sk, SIOCINQ, &arg); if (ret) -- cgit v1.2.3 From eaf72f7b414f5944585e7dee9c915c7f8f7f6344 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 22 Jan 2025 19:50:31 -0700 Subject: io_uring/uring_cmd: cleanup struct io_uring_cmd_data layout A few spots in uring_cmd assume that the SQEs copied are always at the start of the structure, and hence mix req->async_data and the struct itself. Clean that up and use the proper indices. Signed-off-by: Jens Axboe --- io_uring/uring_cmd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'io_uring/uring_cmd.c') diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index 3993c9339ac7..6a63ec4b5445 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -192,8 +192,8 @@ static int io_uring_cmd_prep_setup(struct io_kiocb *req, return 0; } - memcpy(req->async_data, sqe, uring_sqe_size(req->ctx)); - ioucmd->sqe = req->async_data; + memcpy(cache->sqes, sqe, uring_sqe_size(req->ctx)); + ioucmd->sqe = cache->sqes; return 0; } @@ -260,7 +260,7 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags) struct io_uring_cmd_data *cache = req->async_data; if (ioucmd->sqe != (void *) cache) - memcpy(cache, ioucmd->sqe, uring_sqe_size(req->ctx)); + memcpy(cache->sqes, ioucmd->sqe, uring_sqe_size(req->ctx)); return -EAGAIN; } else if (ret == -EIOCBQUEUED) { return -EIOCBQUEUED; -- cgit v1.2.3 From fa3595523d72d13508befd28cf2ca642cafc69f7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 22 Jan 2025 20:00:57 -0700 Subject: io_uring: get rid of alloc cache init_once handling init_once is called when an object doesn't come from the cache, and hence needs initial clearing of certain members. While the whole struct could get cleared by memset() in that case, a few of the cache members are large enough that this may cause unnecessary overhead if the caches used aren't large enough to satisfy the workload. For those cases, some churn of kmalloc+kfree is to be expected. Ensure that the 3 users that need clearing put the members they need cleared at the start of the struct, and wrap the rest of the struct in a struct group so the offset is known. While at it, improve the interaction with KASAN such that when/if KASAN writes to members inside the struct that should be retained over caching, it won't trip over itself. For rw and net, the retaining of the iovec over caching is disabled if KASAN is enabled. A helper will free and clear those members in that case. Signed-off-by: Jens Axboe --- include/linux/io_uring/cmd.h | 2 +- include/linux/io_uring_types.h | 3 ++- io_uring/alloc_cache.h | 43 +++++++++++++++++++++++++++++++++--------- io_uring/futex.c | 4 ++-- io_uring/io_uring.c | 12 +++++++----- io_uring/io_uring.h | 5 ++--- io_uring/net.c | 28 ++++++--------------------- io_uring/net.h | 20 ++++++++++++-------- io_uring/poll.c | 2 +- io_uring/rw.c | 27 ++++++-------------------- io_uring/rw.h | 27 +++++++++++++++----------- io_uring/uring_cmd.c | 11 ++--------- 12 files changed, 91 insertions(+), 93 deletions(-) (limited to 'io_uring/uring_cmd.c') diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h index a3ce553413de..abd0c8bd950b 100644 --- a/include/linux/io_uring/cmd.h +++ b/include/linux/io_uring/cmd.h @@ -19,8 +19,8 @@ struct io_uring_cmd { }; struct io_uring_cmd_data { - struct io_uring_sqe sqes[2]; void *op_data; + struct io_uring_sqe sqes[2]; }; static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe) diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 623d8e798a11..3def525a1da3 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -222,7 +222,8 @@ struct io_alloc_cache { void **entries; unsigned int nr_cached; unsigned int max_cached; - size_t elem_size; + unsigned int elem_size; + unsigned int init_clear; }; struct io_ring_ctx { diff --git a/io_uring/alloc_cache.h b/io_uring/alloc_cache.h index a3a8cfec32ce..cca96aff3277 100644 --- a/io_uring/alloc_cache.h +++ b/io_uring/alloc_cache.h @@ -6,6 +6,19 @@ */ #define IO_ALLOC_CACHE_MAX 128 +#if defined(CONFIG_KASAN) +static inline void io_alloc_cache_kasan(struct iovec **iov, int *nr) +{ + kfree(*iov); + *iov = NULL; + *nr = 0; +} +#else +static inline void io_alloc_cache_kasan(struct iovec **iov, int *nr) +{ +} +#endif + static inline bool io_alloc_cache_put(struct io_alloc_cache *cache, void *entry) { @@ -23,35 +36,47 @@ static inline void *io_alloc_cache_get(struct io_alloc_cache *cache) if (cache->nr_cached) { void *entry = cache->entries[--cache->nr_cached]; + /* + * If KASAN is enabled, always clear the initial bytes that + * must be zeroed post alloc, in case any of them overlap + * with KASAN storage. + */ +#if defined(CONFIG_KASAN) kasan_mempool_unpoison_object(entry, cache->elem_size); + if (cache->init_clear) + memset(entry, 0, cache->init_clear); +#endif return entry; } return NULL; } -static inline void *io_cache_alloc(struct io_alloc_cache *cache, gfp_t gfp, - void (*init_once)(void *obj)) +static inline void *io_cache_alloc(struct io_alloc_cache *cache, gfp_t gfp) { - if (unlikely(!cache->nr_cached)) { - void *obj = kmalloc(cache->elem_size, gfp); + void *obj; - if (obj && init_once) - init_once(obj); + obj = io_alloc_cache_get(cache); + if (obj) return obj; - } - return io_alloc_cache_get(cache); + + obj = kmalloc(cache->elem_size, gfp); + if (obj && cache->init_clear) + memset(obj, 0, cache->init_clear); + return obj; } /* returns false if the cache was initialized properly */ static inline bool io_alloc_cache_init(struct io_alloc_cache *cache, - unsigned max_nr, size_t size) + unsigned max_nr, unsigned int size, + unsigned int init_bytes) { cache->entries = kvmalloc_array(max_nr, sizeof(void *), GFP_KERNEL); if (cache->entries) { cache->nr_cached = 0; cache->max_cached = max_nr; cache->elem_size = size; + cache->init_clear = init_bytes; return false; } return true; diff --git a/io_uring/futex.c b/io_uring/futex.c index 30139cc150f2..3159a2b7eeca 100644 --- a/io_uring/futex.c +++ b/io_uring/futex.c @@ -36,7 +36,7 @@ struct io_futex_data { bool io_futex_cache_init(struct io_ring_ctx *ctx) { return io_alloc_cache_init(&ctx->futex_cache, IO_FUTEX_ALLOC_CACHE_MAX, - sizeof(struct io_futex_data)); + sizeof(struct io_futex_data), 0); } void io_futex_cache_free(struct io_ring_ctx *ctx) @@ -320,7 +320,7 @@ int io_futex_wait(struct io_kiocb *req, unsigned int issue_flags) } io_ring_submit_lock(ctx, issue_flags); - ifd = io_cache_alloc(&ctx->futex_cache, GFP_NOWAIT, NULL); + ifd = io_cache_alloc(&ctx->futex_cache, GFP_NOWAIT); if (!ifd) { ret = -ENOMEM; goto done_unlock; diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 7bfbc7c22367..263e504be4a8 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -315,16 +315,18 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) INIT_LIST_HEAD(&ctx->cq_overflow_list); INIT_LIST_HEAD(&ctx->io_buffers_cache); ret = io_alloc_cache_init(&ctx->apoll_cache, IO_POLL_ALLOC_CACHE_MAX, - sizeof(struct async_poll)); + sizeof(struct async_poll), 0); ret |= io_alloc_cache_init(&ctx->netmsg_cache, IO_ALLOC_CACHE_MAX, - sizeof(struct io_async_msghdr)); + sizeof(struct io_async_msghdr), + offsetof(struct io_async_msghdr, clear)); ret |= io_alloc_cache_init(&ctx->rw_cache, IO_ALLOC_CACHE_MAX, - sizeof(struct io_async_rw)); + sizeof(struct io_async_rw), + offsetof(struct io_async_rw, clear)); ret |= io_alloc_cache_init(&ctx->uring_cache, IO_ALLOC_CACHE_MAX, - sizeof(struct io_uring_cmd_data)); + sizeof(struct io_uring_cmd_data), 0); spin_lock_init(&ctx->msg_lock); ret |= io_alloc_cache_init(&ctx->msg_cache, IO_ALLOC_CACHE_MAX, - sizeof(struct io_kiocb)); + sizeof(struct io_kiocb), 0); ret |= io_futex_cache_init(ctx); if (ret) goto free_ref; diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index f65e3f3ede51..67adbb3c1bf5 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -226,10 +226,9 @@ static inline void io_req_set_res(struct io_kiocb *req, s32 res, u32 cflags) } static inline void *io_uring_alloc_async_data(struct io_alloc_cache *cache, - struct io_kiocb *req, - void (*init_once)(void *obj)) + struct io_kiocb *req) { - req->async_data = io_cache_alloc(cache, GFP_KERNEL, init_once); + req->async_data = io_cache_alloc(cache, GFP_KERNEL); if (req->async_data) req->flags |= REQ_F_ASYNC_DATA; return req->async_data; diff --git a/io_uring/net.c b/io_uring/net.c index 85f55fbc25c9..41eef286f8b9 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -137,7 +137,6 @@ static void io_netmsg_iovec_free(struct io_async_msghdr *kmsg) static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) { struct io_async_msghdr *hdr = req->async_data; - struct iovec *iov; /* can't recycle, ensure we free the iovec if we have one */ if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) { @@ -146,39 +145,25 @@ static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) } /* Let normal cleanup path reap it if we fail adding to the cache */ - iov = hdr->free_iov; + io_alloc_cache_kasan(&hdr->free_iov, &hdr->free_iov_nr); if (io_alloc_cache_put(&req->ctx->netmsg_cache, hdr)) { - if (iov) - kasan_mempool_poison_object(iov); req->async_data = NULL; req->flags &= ~REQ_F_ASYNC_DATA; } } -static void io_msg_async_data_init(void *obj) -{ - struct io_async_msghdr *hdr = (struct io_async_msghdr *)obj; - - hdr->free_iov = NULL; - hdr->free_iov_nr = 0; -} - static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; struct io_async_msghdr *hdr; - hdr = io_uring_alloc_async_data(&ctx->netmsg_cache, req, - io_msg_async_data_init); + hdr = io_uring_alloc_async_data(&ctx->netmsg_cache, req); if (!hdr) return NULL; /* If the async data was cached, we might have an iov cached inside. */ - if (hdr->free_iov) { - kasan_mempool_unpoison_object(hdr->free_iov, - hdr->free_iov_nr * sizeof(struct iovec)); + if (hdr->free_iov) req->flags |= REQ_F_NEED_CLEANUP; - } return hdr; } @@ -1813,11 +1798,10 @@ void io_netmsg_cache_free(const void *entry) { struct io_async_msghdr *kmsg = (struct io_async_msghdr *) entry; - if (kmsg->free_iov) { - kasan_mempool_unpoison_object(kmsg->free_iov, - kmsg->free_iov_nr * sizeof(struct iovec)); +#if !defined(CONFIG_KASAN) + if (kmsg->free_iov) io_netmsg_iovec_free(kmsg); - } +#endif kfree(kmsg); } #endif diff --git a/io_uring/net.h b/io_uring/net.h index 52bfee05f06a..b804c2b36e60 100644 --- a/io_uring/net.h +++ b/io_uring/net.h @@ -5,16 +5,20 @@ struct io_async_msghdr { #if defined(CONFIG_NET) - struct iovec fast_iov; - /* points to an allocated iov, if NULL we use fast_iov instead */ struct iovec *free_iov; + /* points to an allocated iov, if NULL we use fast_iov instead */ int free_iov_nr; - int namelen; - __kernel_size_t controllen; - __kernel_size_t payloadlen; - struct sockaddr __user *uaddr; - struct msghdr msg; - struct sockaddr_storage addr; + struct_group(clear, + int namelen; + struct iovec fast_iov; + __kernel_size_t controllen; + __kernel_size_t payloadlen; + struct sockaddr __user *uaddr; + struct msghdr msg; + struct sockaddr_storage addr; + ); +#else + struct_group(clear); #endif }; diff --git a/io_uring/poll.c b/io_uring/poll.c index cc01c40b43d3..356474c66f32 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -650,7 +650,7 @@ static struct async_poll *io_req_alloc_apoll(struct io_kiocb *req, kfree(apoll->double_poll); } else { if (!(issue_flags & IO_URING_F_UNLOCKED)) - apoll = io_cache_alloc(&ctx->apoll_cache, GFP_ATOMIC, NULL); + apoll = io_cache_alloc(&ctx->apoll_cache, GFP_ATOMIC); else apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC); if (!apoll) diff --git a/io_uring/rw.c b/io_uring/rw.c index a9a2733be842..991ecfbea88e 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -158,16 +158,13 @@ static void io_rw_iovec_free(struct io_async_rw *rw) static void io_rw_recycle(struct io_kiocb *req, unsigned int issue_flags) { struct io_async_rw *rw = req->async_data; - struct iovec *iov; if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) { io_rw_iovec_free(rw); return; } - iov = rw->free_iovec; + io_alloc_cache_kasan(&rw->free_iovec, &rw->free_iov_nr); if (io_alloc_cache_put(&req->ctx->rw_cache, rw)) { - if (iov) - kasan_mempool_poison_object(iov); req->async_data = NULL; req->flags &= ~REQ_F_ASYNC_DATA; } @@ -208,27 +205,16 @@ static void io_req_rw_cleanup(struct io_kiocb *req, unsigned int issue_flags) } } -static void io_rw_async_data_init(void *obj) -{ - struct io_async_rw *rw = (struct io_async_rw *)obj; - - rw->free_iovec = NULL; - rw->bytes_done = 0; -} - static int io_rw_alloc_async(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; struct io_async_rw *rw; - rw = io_uring_alloc_async_data(&ctx->rw_cache, req, io_rw_async_data_init); + rw = io_uring_alloc_async_data(&ctx->rw_cache, req); if (!rw) return -ENOMEM; - if (rw->free_iovec) { - kasan_mempool_unpoison_object(rw->free_iovec, - rw->free_iov_nr * sizeof(struct iovec)); + if (rw->free_iovec) req->flags |= REQ_F_NEED_CLEANUP; - } rw->bytes_done = 0; return 0; } @@ -1323,10 +1309,9 @@ void io_rw_cache_free(const void *entry) { struct io_async_rw *rw = (struct io_async_rw *) entry; - if (rw->free_iovec) { - kasan_mempool_unpoison_object(rw->free_iovec, - rw->free_iov_nr * sizeof(struct iovec)); +#if !defined(CONFIG_KASAN) + if (rw->free_iovec) io_rw_iovec_free(rw); - } +#endif kfree(rw); } diff --git a/io_uring/rw.h b/io_uring/rw.h index 2d7656bd268d..eaa59bd64870 100644 --- a/io_uring/rw.h +++ b/io_uring/rw.h @@ -9,19 +9,24 @@ struct io_meta_state { struct io_async_rw { size_t bytes_done; - struct iov_iter iter; - struct iov_iter_state iter_state; - struct iovec fast_iov; struct iovec *free_iovec; - int free_iov_nr; - /* wpq is for buffered io, while meta fields are used with direct io */ - union { - struct wait_page_queue wpq; - struct { - struct uio_meta meta; - struct io_meta_state meta_state; + struct_group(clear, + struct iov_iter iter; + struct iov_iter_state iter_state; + struct iovec fast_iov; + int free_iov_nr; + /* + * wpq is for buffered io, while meta fields are used with + * direct io + */ + union { + struct wait_page_queue wpq; + struct { + struct uio_meta meta; + struct io_meta_state meta_state; + }; }; - }; + ); }; int io_prep_read_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe); diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index 6a63ec4b5445..1f6a82128b47 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -168,23 +168,16 @@ void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, u64 res2, } EXPORT_SYMBOL_GPL(io_uring_cmd_done); -static void io_uring_cmd_init_once(void *obj) -{ - struct io_uring_cmd_data *data = obj; - - data->op_data = NULL; -} - static int io_uring_cmd_prep_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd); struct io_uring_cmd_data *cache; - cache = io_uring_alloc_async_data(&req->ctx->uring_cache, req, - io_uring_cmd_init_once); + cache = io_uring_alloc_async_data(&req->ctx->uring_cache, req); if (!cache) return -ENOMEM; + cache->op_data = NULL; if (!(req->flags & REQ_F_FORCE_ASYNC)) { /* defer memcpy until we need it */ -- cgit v1.2.3 From 34cae91215c6f65bed2a124fb9283da6ec0b8dd9 Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Wed, 12 Feb 2025 13:45:45 -0700 Subject: io_uring/uring_cmd: don't assume io_uring_cmd_data layout eaf72f7b414f ("io_uring/uring_cmd: cleanup struct io_uring_cmd_data layout") removed most of the places assuming struct io_uring_cmd_data has sqes as its first field. However, the EAGAIN case in io_uring_cmd() still compares ioucmd->sqe to the struct io_uring_cmd_data pointer using a void * cast. Since fa3595523d72 ("io_uring: get rid of alloc cache init_once handling"), sqes is no longer io_uring_cmd_data's first field. As a result, the pointers will always compare unequal and memcpy() may be called with the same source and destination. Replace the incorrect void * cast with the address of the sqes field. Signed-off-by: Caleb Sander Mateos Fixes: eaf72f7b414f ("io_uring/uring_cmd: cleanup struct io_uring_cmd_data layout") Link: https://lore.kernel.org/r/20250212204546.3751645-2-csander@purestorage.com Signed-off-by: Jens Axboe --- io_uring/uring_cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'io_uring/uring_cmd.c') diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index 1f6a82128b47..cfb22e1de0e7 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -252,7 +252,7 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags) if (ret == -EAGAIN) { struct io_uring_cmd_data *cache = req->async_data; - if (ioucmd->sqe != (void *) cache) + if (ioucmd->sqe != cache->sqes) memcpy(cache->sqes, ioucmd->sqe, uring_sqe_size(req->ctx)); return -EAGAIN; } else if (ret == -EIOCBQUEUED) { -- cgit v1.2.3 From e663da62ba8672aaa66843f1af8b20e3bb1a0515 Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Wed, 12 Feb 2025 13:45:46 -0700 Subject: io_uring/uring_cmd: switch sqe to async_data on EAGAIN 5eff57fa9f3a ("io_uring/uring_cmd: defer SQE copying until it's needed") moved the unconditional memcpy() of the uring_cmd SQE to async_data to 2 cases when the request goes async: - If REQ_F_FORCE_ASYNC is set to force the initial issue to go async - If ->uring_cmd() returns -EAGAIN in the initial non-blocking issue Unlike the REQ_F_FORCE_ASYNC case, in the EAGAIN case, io_uring_cmd() copies the SQE to async_data but neglects to update the io_uring_cmd's sqe field to point to async_data. As a result, sqe still points to the slot in the userspace-mapped SQ. At the end of io_submit_sqes(), the kernel advances the SQ head index, allowing userspace to reuse the slot for a new SQE. If userspace reuses the slot before the io_uring worker reissues the original SQE, the io_uring_cmd's SQE will be corrupted. Introduce a helper io_uring_cmd_cache_sqes() to copy the original SQE to the io_uring_cmd's async_data and point sqe there. Use it for both the REQ_F_FORCE_ASYNC and EAGAIN cases. This ensures the uring_cmd doesn't read from the SQ slot after it has been returned to userspace. Signed-off-by: Caleb Sander Mateos Fixes: 5eff57fa9f3a ("io_uring/uring_cmd: defer SQE copying until it's needed") Link: https://lore.kernel.org/r/20250212204546.3751645-3-csander@purestorage.com Signed-off-by: Jens Axboe --- io_uring/uring_cmd.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'io_uring/uring_cmd.c') diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index cfb22e1de0e7..bcfca18395c4 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -168,6 +168,15 @@ void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, u64 res2, } EXPORT_SYMBOL_GPL(io_uring_cmd_done); +static void io_uring_cmd_cache_sqes(struct io_kiocb *req) +{ + struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd); + struct io_uring_cmd_data *cache = req->async_data; + + memcpy(cache->sqes, ioucmd->sqe, uring_sqe_size(req->ctx)); + ioucmd->sqe = cache->sqes; +} + static int io_uring_cmd_prep_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) { @@ -179,14 +188,10 @@ static int io_uring_cmd_prep_setup(struct io_kiocb *req, return -ENOMEM; cache->op_data = NULL; - if (!(req->flags & REQ_F_FORCE_ASYNC)) { - /* defer memcpy until we need it */ - ioucmd->sqe = sqe; - return 0; - } - - memcpy(cache->sqes, sqe, uring_sqe_size(req->ctx)); - ioucmd->sqe = cache->sqes; + ioucmd->sqe = sqe; + /* defer memcpy until we need it */ + if (unlikely(req->flags & REQ_F_FORCE_ASYNC)) + io_uring_cmd_cache_sqes(req); return 0; } @@ -253,7 +258,7 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags) struct io_uring_cmd_data *cache = req->async_data; if (ioucmd->sqe != cache->sqes) - memcpy(cache->sqes, ioucmd->sqe, uring_sqe_size(req->ctx)); + io_uring_cmd_cache_sqes(req); return -EAGAIN; } else if (ret == -EIOCBQUEUED) { return -EIOCBQUEUED; -- cgit v1.2.3 From 0edf1283a9d1419a2095b4fcdd95c11ac00a191c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 12 Feb 2025 14:05:11 -0700 Subject: io_uring/uring_cmd: remove dead req_has_async_data() check Any uring_cmd always has async data allocated now, there's no reason to check and clear a cached copy of the SQE. Fixes: d10f19dff56e ("io_uring/uring_cmd: switch to always allocating async data") Signed-off-by: Jens Axboe --- io_uring/uring_cmd.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'io_uring/uring_cmd.c') diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index bcfca18395c4..8af7780407b7 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -54,9 +54,6 @@ bool io_uring_try_cancel_uring_cmd(struct io_ring_ctx *ctx, continue; if (cmd->flags & IORING_URING_CMD_CANCELABLE) { - /* ->sqe isn't available if no async data */ - if (!req_has_async_data(req)) - cmd->sqe = NULL; file->f_op->uring_cmd(cmd, IO_URING_F_CANCEL | IO_URING_F_COMPLETE_DEFER); ret = true; -- cgit v1.2.3 From d6211ebbdaa541af197b50b8dd8f22642ce0b87f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 13 Feb 2025 08:24:23 -0700 Subject: io_uring/uring_cmd: unconditionally copy SQEs at prep time This isn't generally necessary, but conditions have been observed where SQE data is accessed from the original SQE after prep has been done and outside of the initial issue. Opcode prep handlers must ensure that any SQE related data is stable beyond the prep phase, but uring_cmd is a bit special in how it handles the SQE which makes it susceptible to reading stale data. If the application has reused the SQE before the original completes, then that can lead to data corruption. Down the line we can relax this again once uring_cmd has been sanitized a bit, and avoid unnecessarily copying the SQE. Fixes: 5eff57fa9f3a ("io_uring/uring_cmd: defer SQE copying until it's needed") Reported-by: Caleb Sander Mateos Reviewed-by: Caleb Sander Mateos Reviewed-by: Li Zetao Signed-off-by: Jens Axboe --- io_uring/uring_cmd.c | 34 +++++++++++----------------------- 1 file changed, 11 insertions(+), 23 deletions(-) (limited to 'io_uring/uring_cmd.c') diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index 8af7780407b7..e6701b7aa147 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -165,15 +165,6 @@ void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, u64 res2, } EXPORT_SYMBOL_GPL(io_uring_cmd_done); -static void io_uring_cmd_cache_sqes(struct io_kiocb *req) -{ - struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd); - struct io_uring_cmd_data *cache = req->async_data; - - memcpy(cache->sqes, ioucmd->sqe, uring_sqe_size(req->ctx)); - ioucmd->sqe = cache->sqes; -} - static int io_uring_cmd_prep_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) { @@ -185,10 +176,15 @@ static int io_uring_cmd_prep_setup(struct io_kiocb *req, return -ENOMEM; cache->op_data = NULL; - ioucmd->sqe = sqe; - /* defer memcpy until we need it */ - if (unlikely(req->flags & REQ_F_FORCE_ASYNC)) - io_uring_cmd_cache_sqes(req); + /* + * Unconditionally cache the SQE for now - this is only needed for + * requests that go async, but prep handlers must ensure that any + * sqe data is stable beyond prep. Since uring_cmd is special in + * that it doesn't read in per-op data, play it safe and ensure that + * any SQE data is stable beyond prep. This can later get relaxed. + */ + memcpy(cache->sqes, sqe, uring_sqe_size(req->ctx)); + ioucmd->sqe = cache->sqes; return 0; } @@ -251,16 +247,8 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags) } ret = file->f_op->uring_cmd(ioucmd, issue_flags); - if (ret == -EAGAIN) { - struct io_uring_cmd_data *cache = req->async_data; - - if (ioucmd->sqe != cache->sqes) - io_uring_cmd_cache_sqes(req); - return -EAGAIN; - } else if (ret == -EIOCBQUEUED) { - return -EIOCBQUEUED; - } - + if (ret == -EAGAIN || ret == -EIOCBQUEUED) + return ret; if (ret < 0) req_set_fail(req); io_req_uring_cleanup(req, issue_flags); -- cgit v1.2.3