diff options
author | Pavel Begunkov <asml.silence@gmail.com> | 2024-08-07 17:18:14 +0300 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2024-08-25 17:27:01 +0300 |
commit | 2b8e976b984278edbeab3251d370e76d237699f9 (patch) | |
tree | f91366b1a924921fc52d4fa43c8868dd3f30709d /io_uring | |
parent | d29cb3726f03cdac7889f0109a7cb84f79e168a8 (diff) | |
download | linux-2b8e976b984278edbeab3251d370e76d237699f9.tar.xz |
io_uring: user registered clockid for wait timeouts
Add a new registration opcode IORING_REGISTER_CLOCK, which allows the
user to select which clock id it wants to use with CQ waiting timeouts.
It only allows a subset of all posix clocks and currently supports
CLOCK_MONOTONIC and CLOCK_BOOTTIME.
Suggested-by: Lewis Baker <lewissbaker@gmail.com>
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/98f2bc8a3c36cdf8f0e6a275245e81e903459703.1723039801.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'io_uring')
-rw-r--r-- | io_uring/io_uring.c | 8 | ||||
-rw-r--r-- | io_uring/io_uring.h | 8 | ||||
-rw-r--r-- | io_uring/napi.c | 2 | ||||
-rw-r--r-- | io_uring/register.c | 31 |
4 files changed, 46 insertions, 3 deletions
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 5282f9887440..20229e72b65c 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2377,7 +2377,8 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx, ret = 0; if (iowq->timeout == KTIME_MAX) schedule(); - else if (!schedule_hrtimeout(&iowq->timeout, HRTIMER_MODE_ABS)) + else if (!schedule_hrtimeout_range_clock(&iowq->timeout, 0, + HRTIMER_MODE_ABS, ctx->clockid)) ret = -ETIME; current->in_iowait = 0; return ret; @@ -2422,7 +2423,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags, iowq.timeout = timespec64_to_ktime(ts); if (!(flags & IORING_ENTER_ABS_TIMER)) - iowq.timeout = ktime_add(iowq.timeout, ktime_get()); + iowq.timeout = ktime_add(iowq.timeout, io_get_time(ctx)); } if (sig) { @@ -3424,6 +3425,9 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p, if (!ctx) return -ENOMEM; + ctx->clockid = CLOCK_MONOTONIC; + ctx->clock_offset = 0; + if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) && !(ctx->flags & IORING_SETUP_IOPOLL) && !(ctx->flags & IORING_SETUP_SQPOLL)) diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index c2acf6180845..9935819f12b7 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -437,6 +437,14 @@ static inline bool io_file_can_poll(struct io_kiocb *req) return false; } +static inline ktime_t io_get_time(struct io_ring_ctx *ctx) +{ + if (ctx->clockid == CLOCK_MONOTONIC) + return ktime_get(); + + return ktime_get_with_offset(ctx->clock_offset); +} + enum { IO_CHECK_CQ_OVERFLOW_BIT, IO_CHECK_CQ_DROPPED_BIT, diff --git a/io_uring/napi.c b/io_uring/napi.c index d78fcbecdd27..d0cf694d0172 100644 --- a/io_uring/napi.c +++ b/io_uring/napi.c @@ -283,7 +283,7 @@ void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq) iowq->napi_busy_poll_dt = READ_ONCE(ctx->napi_busy_poll_dt); if (iowq->timeout != KTIME_MAX) { - ktime_t dt = ktime_sub(iowq->timeout, ktime_get()); + ktime_t dt = ktime_sub(iowq->timeout, io_get_time(ctx)); iowq->napi_busy_poll_dt = min_t(u64, iowq->napi_busy_poll_dt, dt); } diff --git a/io_uring/register.c b/io_uring/register.c index e3c20be5a198..57cb85c42526 100644 --- a/io_uring/register.c +++ b/io_uring/register.c @@ -335,6 +335,31 @@ err: return ret; } +static int io_register_clock(struct io_ring_ctx *ctx, + struct io_uring_clock_register __user *arg) +{ + struct io_uring_clock_register reg; + + if (copy_from_user(®, arg, sizeof(reg))) + return -EFAULT; + if (memchr_inv(®.__resv, 0, sizeof(reg.__resv))) + return -EINVAL; + + switch (reg.clockid) { + case CLOCK_MONOTONIC: + ctx->clock_offset = 0; + break; + case CLOCK_BOOTTIME: + ctx->clock_offset = TK_OFFS_BOOT; + break; + default: + return -EINVAL; + } + + ctx->clockid = reg.clockid; + return 0; +} + static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, void __user *arg, unsigned nr_args) __releases(ctx->uring_lock) @@ -511,6 +536,12 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, break; ret = io_unregister_napi(ctx, arg); break; + case IORING_REGISTER_CLOCK: + ret = -EINVAL; + if (!arg || nr_args) + break; + ret = io_register_clock(ctx, arg); + break; default: ret = -EINVAL; break; |