diff options
author | Jens Axboe <axboe@kernel.dk> | 2021-06-17 19:19:54 +0300 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2021-06-17 19:25:50 +0300 |
commit | fe76421d1da1dcdb3a2cd8428ac40106bff28bc0 (patch) | |
tree | dc590c0c33f23e65a0e11cceb7be870944d0360a /fs/io_uring.c | |
parent | 0e03496d1967abf1ebb151a24318c07d07f41f7f (diff) | |
download | linux-fe76421d1da1dcdb3a2cd8428ac40106bff28bc0.tar.xz |
io_uring: allow user configurable IO thread CPU affinity
io-wq defaults to per-node masks for IO workers. This works fine by
default, but isn't particularly handy for workloads that prefer more
specific affinities, for either performance or isolation reasons.
This adds IORING_REGISTER_IOWQ_AFF that allows the user to pass in a CPU
mask that is then applied to IO thread workers, and an
IORING_UNREGISTER_IOWQ_AFF that simply resets the masks back to the
default of per-node.
Note that no care is given to existing IO threads, they will need to go
through a reschedule before the affinity is correct if they are already
running or sleeping.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'fs/io_uring.c')
-rw-r--r-- | fs/io_uring.c | 51 |
1 files changed, 51 insertions, 0 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c index d916eb2cef09..46a25a7cb70a 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -9983,6 +9983,43 @@ static int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg, return -EINVAL; } +static int io_register_iowq_aff(struct io_ring_ctx *ctx, void __user *arg, + unsigned len) +{ + struct io_uring_task *tctx = current->io_uring; + cpumask_var_t new_mask; + int ret; + + if (!tctx || !tctx->io_wq) + return -EINVAL; + + if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) + return -ENOMEM; + + cpumask_clear(new_mask); + if (len > cpumask_size()) + len = cpumask_size(); + + if (copy_from_user(new_mask, arg, len)) { + free_cpumask_var(new_mask); + return -EFAULT; + } + + ret = io_wq_cpu_affinity(tctx->io_wq, new_mask); + free_cpumask_var(new_mask); + return ret; +} + +static int io_unregister_iowq_aff(struct io_ring_ctx *ctx) +{ + struct io_uring_task *tctx = current->io_uring; + + if (!tctx || !tctx->io_wq) + return -EINVAL; + + return io_wq_cpu_affinity(tctx->io_wq, NULL); +} + static bool io_register_op_must_quiesce(int op) { switch (op) { @@ -9998,6 +10035,8 @@ static bool io_register_op_must_quiesce(int op) case IORING_REGISTER_FILES_UPDATE2: case IORING_REGISTER_BUFFERS2: case IORING_REGISTER_BUFFERS_UPDATE: + case IORING_REGISTER_IOWQ_AFF: + case IORING_UNREGISTER_IOWQ_AFF: return false; default: return true; @@ -10137,6 +10176,18 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, ret = io_register_rsrc_update(ctx, arg, nr_args, IORING_RSRC_BUFFER); break; + case IORING_REGISTER_IOWQ_AFF: + ret = -EINVAL; + if (!arg || !nr_args) + break; + ret = io_register_iowq_aff(ctx, arg, nr_args); + break; + case IORING_UNREGISTER_IOWQ_AFF: + ret = -EINVAL; + if (arg || nr_args) + break; + ret = io_unregister_iowq_aff(ctx); + break; default: ret = -EINVAL; break; |