summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMaoyi Xie <maoyixie.tju@gmail.com>2026-05-04 18:37:54 +0300
committerJens Axboe <axboe@kernel.dk>2026-05-06 13:58:56 +0300
commit9cc6bac1bebf8310d2950d1411a91479e86d69a1 (patch)
tree70a1d09443ebd04a84a547f1b6342f3620d4f6cb
parent04fe9aeb4f3c0999e6715385664c677469dfd8f4 (diff)
downloadlinux-9cc6bac1bebf8310d2950d1411a91479e86d69a1.tar.xz
io_uring/timeout: honour caller's time namespace for IORING_TIMEOUT_ABS
io_uring's IORING_OP_TIMEOUT and IORING_OP_LINK_TIMEOUT accept a timespec from the caller via io_parse_user_time(). With IORING_TIMEOUT_ABS, the timestamp is an absolute deadline on the selected clock. The clock is CLOCK_MONOTONIC by default. CLOCK_BOOTTIME and CLOCK_REALTIME are also selectable. A submitter inside a CLONE_NEWTIME time namespace observes CLOCK_MONOTONIC and CLOCK_BOOTTIME shifted by the namespace's offsets relative to the host. Every other ABS timer interface in the kernel converts the caller's absolute time to host view via timens_ktime_to_host() before arming an hrtimer: kernel/time/posix-timers.c -- timer_settime(TIMER_ABSTIME) kernel/time/posix-stubs.c -- clock_nanosleep(TIMER_ABSTIME) kernel/time/alarmtimer.c -- alarm_timer_nsleep(TIMER_ABSTIME) fs/timerfd.c -- timerfd_settime(TFD_TIMER_ABSTIME) io_parse_user_time() does not. As a result, an absolute timeout submitted from within a time namespace is interpreted in host view. That is generally a different point in time. It may already be in the past, causing the timer to fire immediately, or far in the future, causing the timer not to fire when expected. Reproducer: in unshare --user --time, with a -10s monotonic offset, submit IORING_OP_TIMEOUT with IORING_TIMEOUT_ABS and deadline = now + 1s. The CQE is delivered after <1ms instead of the expected ~1s. Apply timens_ktime_to_host() to the parsed time when IORING_TIMEOUT_ABS is set. Split the existing clock id resolver in io_timeout_get_clock() into a flags only helper io_flags_to_clock(), so io_parse_user_time() can resolve the clock without a struct io_timeout_data. timens_ktime_to_host() is a no-op for clocks not affected by time namespaces, e.g. CLOCK_REALTIME. It is also a no-op for callers in the initial time namespace. The fast path is unchanged. SQPOLL is also covered. The SQPOLL kernel thread is created via create_io_thread() with CLONE_THREAD and no CLONE_NEW* flag. copy_namespaces() therefore shares the submitter's nsproxy by reference. Inside the SQPOLL kthread, current->nsproxy->time_ns is the submitter's time_ns. timens_ktime_to_host() resolves correctly. Suggested-by: Pavel Begunkov <asml.silence@gmail.com> Suggested-by: Jens Axboe <axboe@kernel.dk> Signed-off-by: Maoyi Xie <maoyi.xie@ntu.edu.sg> Link: https://patch.msgid.link/20260504153755.1293932-2-maoyi.xie@ntu.edu.sg Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--io_uring/timeout.c35
1 files changed, 22 insertions, 13 deletions
diff --git a/io_uring/timeout.c b/io_uring/timeout.c
index 4cfdfc519770..e2595cae2b07 100644
--- a/io_uring/timeout.c
+++ b/io_uring/timeout.c
@@ -3,6 +3,7 @@
#include <linux/errno.h>
#include <linux/file.h>
#include <linux/io_uring.h>
+#include <linux/time_namespace.h>
#include <trace/events/io_uring.h>
@@ -35,6 +36,22 @@ struct io_timeout_rem {
bool ltimeout;
};
+static clockid_t io_flags_to_clock(unsigned flags)
+{
+ switch (flags & IORING_TIMEOUT_CLOCK_MASK) {
+ case IORING_TIMEOUT_BOOTTIME:
+ return CLOCK_BOOTTIME;
+ case IORING_TIMEOUT_REALTIME:
+ return CLOCK_REALTIME;
+ default:
+ /* can't happen, vetted at prep time */
+ WARN_ON_ONCE(1);
+ fallthrough;
+ case 0:
+ return CLOCK_MONOTONIC;
+ }
+}
+
static int io_parse_user_time(ktime_t *time, u64 arg, unsigned flags)
{
struct timespec64 ts;
@@ -43,7 +60,7 @@ static int io_parse_user_time(ktime_t *time, u64 arg, unsigned flags)
*time = ns_to_ktime(arg);
if (*time < 0)
return -EINVAL;
- return 0;
+ goto out;
}
if (get_timespec64(&ts, u64_to_user_ptr(arg)))
@@ -51,6 +68,9 @@ static int io_parse_user_time(ktime_t *time, u64 arg, unsigned flags)
if (ts.tv_sec < 0 || ts.tv_nsec < 0)
return -EINVAL;
*time = timespec64_to_ktime(ts);
+out:
+ if (flags & IORING_TIMEOUT_ABS)
+ *time = timens_ktime_to_host(io_flags_to_clock(flags), *time);
return 0;
}
@@ -399,18 +419,7 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
static clockid_t io_timeout_get_clock(struct io_timeout_data *data)
{
- switch (data->flags & IORING_TIMEOUT_CLOCK_MASK) {
- case IORING_TIMEOUT_BOOTTIME:
- return CLOCK_BOOTTIME;
- case IORING_TIMEOUT_REALTIME:
- return CLOCK_REALTIME;
- default:
- /* can't happen, vetted at prep time */
- WARN_ON_ONCE(1);
- fallthrough;
- case 0:
- return CLOCK_MONOTONIC;
- }
+ return io_flags_to_clock(data->flags);
}
static int io_linked_timeout_update(struct io_ring_ctx *ctx, __u64 user_data,