diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-07-30 00:12:52 +0300 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-07-30 00:12:52 +0300 |
| commit | 02dc9d15d7784afb42ffde0ae3d8156dd09c2ff7 (patch) | |
| tree | 9a20f399e63ccf69c0d2c89fcb65b19c6d5a3052 /include | |
| parent | d614399b281abf3980cc9b340a5066e9f4020b5d (diff) | |
| parent | cd3557a7618bf5c1935e9f66b58a329f1f1f4b27 (diff) | |
| download | linux-02dc9d15d7784afb42ffde0ae3d8156dd09c2ff7.tar.xz | |
Merge tag 'timers-ptp-2025-07-27' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull timekeeping and VDSO updates from Thomas Gleixner:
- Introduce support for auxiliary timekeepers
PTP clocks can be disconnected from the universal CLOCK_TAI reality
for various reasons including regularatory requirements for
functional safety redundancy.
The kernel so far only supports a single notion of time, which means
that all clocks are correlated in frequency and only differ by offset
to each other.
Access to non-correlated PTP clocks has been available so far only
through the file descriptor based "POSIX clock IDs", which are
subject to locking and have to go all the way out to the hardware.
The access is not only horribly slow, as it has to go all the way out
to the NIC/PTP hardware, but that also prevents the kernel to read
the time of such clocks e.g. from the network stack, where it is
required for TSN networking both on the transmit and receive side
unless the hardware provides offloading.
The auxiliary clocks provide a mechanism to support arbitrary clocks
which are not correlated to the system clock. This is not restricted
to the PTP use case on purpose as there is no kernel side association
of these clocks to a particular PTP device because that's a pure user
space configuration decision. Having them independent allows to
utilize them for other purposes and also enables them to be tested
without hardware dependencies.
To avoid pointless overhead these clocks have to be enabled
individualy via a new sysfs interface to reduce the overhead to a
single compare in the hotpath if they are enabled at the Kconfig
level at all.
These clocks utilize the existing timekeeping/NTP infrastructures,
which has been made possible over the recent releases by incrementaly
converting these infrastructures over from a single static instance
to a multi-instance pointer based implementation without any
performance regression reported.
The auxiliary clocks provide the same "emulation" of a "correct"
clock as the existing CLOCK_* variants do with an independent
instance of data and provide the same steering mechanism through the
existing sys_clock_adjtime() interface, which has been confirmed to
work by the chronyd(8) maintainer.
That allows to provide lockless kernel internal and VDSO support so
that applications and kernel internal functionalities can access
these clocks without restrictions and at the same performance as the
existing system clocks.
- Avoid double notifications in the adjtimex() syscall. Not a big
issue, but a trivial to avoid latency source.
* tag 'timers-ptp-2025-07-27' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (39 commits)
vdso/gettimeofday: Add support for auxiliary clocks
vdso/vsyscall: Update auxiliary clock data in the datapage
vdso: Introduce aux_clock_resolution_ns()
vdso/gettimeofday: Introduce vdso_get_timestamp()
vdso/gettimeofday: Introduce vdso_set_timespec()
vdso/gettimeofday: Introduce vdso_clockid_valid()
vdso/gettimeofday: Return bool from clock_gettime() helpers
vdso/gettimeofday: Return bool from clock_getres() helpers
vdso/helpers: Add helpers for seqlocks of single vdso_clock
vdso/vsyscall: Split up __arch_update_vsyscall() into __arch_update_vdso_clock()
vdso/vsyscall: Introduce a helper to fill clock configurations
timekeeping: Remove the temporary CLOCK_AUX workaround
timekeeping: Provide ktime_get_clock_ts64()
timekeeping: Provide interface to control auxiliary clocks
timekeeping: Provide update for auxiliary timekeepers
timekeeping: Provide adjtimex() for auxiliary clocks
timekeeping: Prepare do_adtimex() for auxiliary clocks
timekeeping: Make do_adjtimex() reusable
timekeeping: Add auxiliary clock support to __timekeeping_inject_offset()
timekeeping: Make timekeeping_inject_offset() reusable
...
Diffstat (limited to 'include')
| -rw-r--r-- | include/asm-generic/vdso/vsyscall.h | 6 | ||||
| -rw-r--r-- | include/linux/posix-timers.h | 5 | ||||
| -rw-r--r-- | include/linux/timekeeper_internal.h | 37 | ||||
| -rw-r--r-- | include/linux/timekeeping.h | 12 | ||||
| -rw-r--r-- | include/uapi/linux/time.h | 11 | ||||
| -rw-r--r-- | include/vdso/auxclock.h | 13 | ||||
| -rw-r--r-- | include/vdso/datapage.h | 5 | ||||
| -rw-r--r-- | include/vdso/helpers.h | 50 |
8 files changed, 122 insertions, 17 deletions
diff --git a/include/asm-generic/vdso/vsyscall.h b/include/asm-generic/vdso/vsyscall.h index b550afa15ecd..7fc0b560007d 100644 --- a/include/asm-generic/vdso/vsyscall.h +++ b/include/asm-generic/vdso/vsyscall.h @@ -22,11 +22,11 @@ static __always_inline const struct vdso_rng_data *__arch_get_vdso_u_rng_data(vo #endif /* CONFIG_GENERIC_VDSO_DATA_STORE */ -#ifndef __arch_update_vsyscall -static __always_inline void __arch_update_vsyscall(struct vdso_time_data *vdata) +#ifndef __arch_update_vdso_clock +static __always_inline void __arch_update_vdso_clock(struct vdso_clock *vc) { } -#endif /* __arch_update_vsyscall */ +#endif /* __arch_update_vdso_clock */ #ifndef __arch_sync_vdso_time_data static __always_inline void __arch_sync_vdso_time_data(struct vdso_time_data *vdata) diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index dd48c64b605e..4d3dbcef379e 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -37,6 +37,11 @@ static inline int clockid_to_fd(const clockid_t clk) return ~(clk >> 3); } +static inline bool clockid_aux_valid(clockid_t id) +{ + return IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS) && id >= CLOCK_AUX && id <= CLOCK_AUX_LAST; +} + #ifdef CONFIG_POSIX_TIMERS #include <linux/signal_types.h> diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index 785048a3b3e6..c27aac67cb3f 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -12,6 +12,22 @@ #include <linux/time.h> /** + * timekeeper_ids - IDs for various time keepers in the kernel + * @TIMEKEEPER_CORE: The central core timekeeper managing system time + * @TIMEKEEPER_AUX_FIRST: The first AUX timekeeper + * @TIMEKEEPER_AUX_LAST: The last AUX timekeeper + * @TIMEKEEPERS_MAX: The maximum number of timekeepers managed + */ +enum timekeeper_ids { + TIMEKEEPER_CORE, +#ifdef CONFIG_POSIX_AUX_CLOCKS + TIMEKEEPER_AUX_FIRST, + TIMEKEEPER_AUX_LAST = TIMEKEEPER_AUX_FIRST + MAX_AUX_CLOCKS - 1, +#endif + TIMEKEEPERS_MAX, +}; + +/** * struct tk_read_base - base structure for timekeeping readout * @clock: Current clocksource used for timekeeping. * @mask: Bitmask for two's complement subtraction of non 64bit clocks @@ -51,11 +67,14 @@ struct tk_read_base { * @offs_real: Offset clock monotonic -> clock realtime * @offs_boot: Offset clock monotonic -> clock boottime * @offs_tai: Offset clock monotonic -> clock tai + * @offs_aux: Offset clock monotonic -> clock AUX * @coarse_nsec: The nanoseconds part for coarse time getters + * @id: The timekeeper ID * @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds * @clock_was_set_seq: The sequence number of clock was set events * @cs_was_changed_seq: The sequence number of clocksource change events + * @clock_valid: Indicator for valid clock * @monotonic_to_boot: CLOCK_MONOTONIC to CLOCK_BOOTTIME offset * @cycle_interval: Number of clock cycles in one NTP interval * @xtime_interval: Number of clock shifted nano seconds in one NTP @@ -95,13 +114,16 @@ struct tk_read_base { * @monotonic_to_boottime is a timespec64 representation of @offs_boot to * accelerate the VDSO update for CLOCK_BOOTTIME. * + * @offs_aux is used by the auxiliary timekeepers which do not utilize any + * of the regular timekeeper offset fields. + * * The cacheline ordering of the structure is optimized for in kernel usage of * the ktime_get() and ktime_get_ts64() family of time accessors. Struct * timekeeper is prepended in the core timekeeping code with a sequence count, * which results in the following cacheline layout: * * 0: seqcount, tkr_mono - * 1: xtime_sec ... coarse_nsec + * 1: xtime_sec ... id * 2: tkr_raw, raw_sec * 3,4: Internal variables * @@ -121,8 +143,12 @@ struct timekeeper { struct timespec64 wall_to_monotonic; ktime_t offs_real; ktime_t offs_boot; - ktime_t offs_tai; + union { + ktime_t offs_tai; + ktime_t offs_aux; + }; u32 coarse_nsec; + enum timekeeper_ids id; /* Cacheline 2: */ struct tk_read_base tkr_raw; @@ -131,6 +157,7 @@ struct timekeeper { /* Cachline 3 and 4 (timekeeping internal variables): */ unsigned int clock_was_set_seq; u8 cs_was_changed_seq; + u8 clock_valid; struct timespec64 monotonic_to_boot; @@ -163,4 +190,10 @@ static inline void update_vsyscall_tz(void) } #endif +#if defined(CONFIG_GENERIC_GETTIMEOFDAY) && defined(CONFIG_POSIX_AUX_CLOCKS) +extern void vdso_time_update_aux(struct timekeeper *tk); +#else +static inline void vdso_time_update_aux(struct timekeeper *tk) { } +#endif + #endif /* _LINUX_TIMEKEEPER_INTERNAL_H */ diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 542773650200..aee2c1a46e47 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -44,6 +44,7 @@ extern void ktime_get_ts64(struct timespec64 *ts); extern void ktime_get_real_ts64(struct timespec64 *tv); extern void ktime_get_coarse_ts64(struct timespec64 *ts); extern void ktime_get_coarse_real_ts64(struct timespec64 *ts); +extern void ktime_get_clock_ts64(clockid_t id, struct timespec64 *ts); /* Multigrain timestamp interfaces */ extern void ktime_get_coarse_real_ts64_mg(struct timespec64 *ts); @@ -263,6 +264,17 @@ extern bool timekeeping_rtc_skipresume(void); extern void timekeeping_inject_sleeptime64(const struct timespec64 *delta); +/* + * Auxiliary clock interfaces + */ +#ifdef CONFIG_POSIX_AUX_CLOCKS +extern bool ktime_get_aux(clockid_t id, ktime_t *kt); +extern bool ktime_get_aux_ts64(clockid_t id, struct timespec64 *kt); +#else +static inline bool ktime_get_aux(clockid_t id, ktime_t *kt) { return false; } +static inline bool ktime_get_aux_ts64(clockid_t id, struct timespec64 *kt) { return false; } +#endif + /** * struct system_time_snapshot - simultaneous raw/real time capture with * counter value diff --git a/include/uapi/linux/time.h b/include/uapi/linux/time.h index 4f4b6e48e01c..16ca1ac206fd 100644 --- a/include/uapi/linux/time.h +++ b/include/uapi/linux/time.h @@ -64,6 +64,17 @@ struct timezone { #define CLOCK_TAI 11 #define MAX_CLOCKS 16 + +/* + * AUX clock support. AUXiliary clocks are dynamically configured by + * enabling a clock ID. These clock can be steered independently of the + * core timekeeper. The kernel can support up to 8 auxiliary clocks, but + * the actual limit depends on eventual architecture constraints vs. VDSO. + */ +#define CLOCK_AUX MAX_CLOCKS +#define MAX_AUX_CLOCKS 8 +#define CLOCK_AUX_LAST (CLOCK_AUX + MAX_AUX_CLOCKS - 1) + #define CLOCKS_MASK (CLOCK_REALTIME | CLOCK_MONOTONIC) #define CLOCKS_MONO CLOCK_MONOTONIC diff --git a/include/vdso/auxclock.h b/include/vdso/auxclock.h new file mode 100644 index 000000000000..6d6e74cbc400 --- /dev/null +++ b/include/vdso/auxclock.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _VDSO_AUXCLOCK_H +#define _VDSO_AUXCLOCK_H + +#include <uapi/linux/time.h> +#include <uapi/linux/types.h> + +static __always_inline u64 aux_clock_resolution_ns(void) +{ + return 1; +} + +#endif /* _VDSO_AUXCLOCK_H */ diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h index 1864e76e8f69..02533038640e 100644 --- a/include/vdso/datapage.h +++ b/include/vdso/datapage.h @@ -5,6 +5,7 @@ #ifndef __ASSEMBLY__ #include <linux/compiler.h> +#include <uapi/linux/bits.h> #include <uapi/linux/time.h> #include <uapi/linux/types.h> #include <uapi/asm-generic/errno-base.h> @@ -38,6 +39,7 @@ struct vdso_arch_data { #endif #define VDSO_BASES (CLOCK_TAI + 1) +#define VDSO_BASE_AUX 0 #define VDSO_HRES (BIT(CLOCK_REALTIME) | \ BIT(CLOCK_MONOTONIC) | \ BIT(CLOCK_BOOTTIME) | \ @@ -45,6 +47,7 @@ struct vdso_arch_data { #define VDSO_COARSE (BIT(CLOCK_REALTIME_COARSE) | \ BIT(CLOCK_MONOTONIC_COARSE)) #define VDSO_RAW (BIT(CLOCK_MONOTONIC_RAW)) +#define VDSO_AUX __GENMASK(CLOCK_AUX_LAST, CLOCK_AUX) #define CS_HRES_COARSE 0 #define CS_RAW 1 @@ -117,6 +120,7 @@ struct vdso_clock { * @arch_data: architecture specific data (optional, defaults * to an empty struct) * @clock_data: clocksource related data (array) + * @aux_clock_data: auxiliary clocksource related data (array) * @tz_minuteswest: minutes west of Greenwich * @tz_dsttime: type of DST correction * @hrtimer_res: hrtimer resolution @@ -133,6 +137,7 @@ struct vdso_time_data { struct arch_vdso_time_data arch_data; struct vdso_clock clock_data[CS_BASES]; + struct vdso_clock aux_clock_data[MAX_AUX_CLOCKS]; s32 tz_minuteswest; s32 tz_dsttime; diff --git a/include/vdso/helpers.h b/include/vdso/helpers.h index 0a98fed550ba..1a5ee9d9052c 100644 --- a/include/vdso/helpers.h +++ b/include/vdso/helpers.h @@ -28,17 +28,47 @@ static __always_inline u32 vdso_read_retry(const struct vdso_clock *vc, return seq != start; } -static __always_inline void vdso_write_begin(struct vdso_time_data *vd) +static __always_inline void vdso_write_seq_begin(struct vdso_clock *vc) { - struct vdso_clock *vc = vd->clock_data; + /* + * WRITE_ONCE() is required otherwise the compiler can validly tear + * updates to vc->seq and it is possible that the value seen by the + * reader is inconsistent. + */ + WRITE_ONCE(vc->seq, vc->seq + 1); +} +static __always_inline void vdso_write_seq_end(struct vdso_clock *vc) +{ /* * WRITE_ONCE() is required otherwise the compiler can validly tear - * updates to vd[x].seq and it is possible that the value seen by the + * updates to vc->seq and it is possible that the value seen by the * reader is inconsistent. */ - WRITE_ONCE(vc[CS_HRES_COARSE].seq, vc[CS_HRES_COARSE].seq + 1); - WRITE_ONCE(vc[CS_RAW].seq, vc[CS_RAW].seq + 1); + WRITE_ONCE(vc->seq, vc->seq + 1); +} + +static __always_inline void vdso_write_begin_clock(struct vdso_clock *vc) +{ + vdso_write_seq_begin(vc); + /* Ensure the sequence invalidation is visible before data is modified */ + smp_wmb(); +} + +static __always_inline void vdso_write_end_clock(struct vdso_clock *vc) +{ + /* Ensure the data update is visible before the sequence is set valid again */ + smp_wmb(); + vdso_write_seq_end(vc); +} + +static __always_inline void vdso_write_begin(struct vdso_time_data *vd) +{ + struct vdso_clock *vc = vd->clock_data; + + vdso_write_seq_begin(&vc[CS_HRES_COARSE]); + vdso_write_seq_begin(&vc[CS_RAW]); + /* Ensure the sequence invalidation is visible before data is modified */ smp_wmb(); } @@ -46,14 +76,10 @@ static __always_inline void vdso_write_end(struct vdso_time_data *vd) { struct vdso_clock *vc = vd->clock_data; + /* Ensure the data update is visible before the sequence is set valid again */ smp_wmb(); - /* - * WRITE_ONCE() is required otherwise the compiler can validly tear - * updates to vd[x].seq and it is possible that the value seen by the - * reader is inconsistent. - */ - WRITE_ONCE(vc[CS_HRES_COARSE].seq, vc[CS_HRES_COARSE].seq + 1); - WRITE_ONCE(vc[CS_RAW].seq, vc[CS_RAW].seq + 1); + vdso_write_seq_end(&vc[CS_HRES_COARSE]); + vdso_write_seq_end(&vc[CS_RAW]); } #endif /* !__ASSEMBLY__ */ |
