From d67f34c19a679436dd2963b588015e119279e7a8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 17 Sep 2018 14:45:34 +0200 Subject: clocksource: Provide clocksource_arch_init() Architectures have extra archdata in the clocksource, e.g. for VDSO support. There are no sanity checks or general initializations for this available. Add support for that. Signed-off-by: Thomas Gleixner Acked-by: Andy Lutomirski Acked-by: John Stultz Cc: Peter Zijlstra Cc: Matt Rickard Cc: Stephen Boyd Cc: Florian Weimer Cc: "K. Y. Srinivasan" Cc: Vitaly Kuznetsov Cc: devel@linuxdriverproject.org Cc: virtualization@lists.linux-foundation.org Cc: Paolo Bonzini Cc: Arnd Bergmann Cc: Juergen Gross Link: https://lkml.kernel.org/r/20180917130706.973042587@linutronix.de --- include/linux/clocksource.h | 5 +++++ kernel/time/Kconfig | 4 ++++ kernel/time/clocksource.c | 2 ++ 3 files changed, 11 insertions(+) diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 308918928767..6e6b86f9046d 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -241,6 +241,11 @@ static inline void __clocksource_update_freq_khz(struct clocksource *cs, u32 khz __clocksource_update_freq_scale(cs, 1000, khz); } +#ifdef CONFIG_ARCH_CLOCKSOURCE_INIT +extern void clocksource_arch_init(struct clocksource *cs); +#else +static inline void clocksource_arch_init(struct clocksource *cs) { } +#endif extern int timekeeping_notify(struct clocksource *clock); diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 78eabc41eaa6..58b981f4bb5d 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -12,6 +12,10 @@ config CLOCKSOURCE_WATCHDOG config ARCH_CLOCKSOURCE_DATA bool +# Architecture has extra clocksource init called from registration +config ARCH_CLOCKSOURCE_INIT + bool + # Clocksources require validation of the clocksource against the last # cycle update - x86/TSC misfeature config CLOCKSOURCE_VALIDATE_LAST_CYCLE diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 0e6e97a01942..ffe081623aec 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -937,6 +937,8 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq) { unsigned long flags; + clocksource_arch_init(cs); + /* Initialize mult/shift and max_idle_ns */ __clocksource_update_freq_scale(cs, scale, freq); -- cgit v1.2.3 From 2a21ad571b62440a4ad66d70b4b5d09c00029af4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 17 Sep 2018 14:45:35 +0200 Subject: x86/time: Implement clocksource_arch_init() Runtime validate the VCLOCK_MODE in clocksource::archdata and disable VCLOCK if invalid, which disables the VDSO but keeps the system running. Signed-off-by: Thomas Gleixner Acked-by: Andy Lutomirski Cc: Peter Zijlstra Cc: Matt Rickard Cc: Stephen Boyd Cc: John Stultz Cc: Florian Weimer Cc: "K. Y. Srinivasan" Cc: Vitaly Kuznetsov Cc: devel@linuxdriverproject.org Cc: virtualization@lists.linux-foundation.org Cc: Paolo Bonzini Cc: Arnd Bergmann Cc: Juergen Gross Link: https://lkml.kernel.org/r/20180917130707.069167446@linutronix.de --- arch/x86/Kconfig | 1 + arch/x86/kernel/time.c | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1a0be022f91d..adc8c96df914 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -48,6 +48,7 @@ config X86 select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI select ANON_INODES select ARCH_CLOCKSOURCE_DATA + select ARCH_CLOCKSOURCE_INIT select ARCH_DISCARD_MEMBLOCK select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_DEBUG_VIRTUAL diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index be01328eb755..1fa632e0829f 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -10,6 +10,7 @@ * */ +#include #include #include #include @@ -105,3 +106,18 @@ void __init time_init(void) { late_time_init = x86_late_time_init; } + +/* + * Sanity check the vdso related archdata content. + */ +void clocksource_arch_init(struct clocksource *cs) +{ + if (cs->archdata.vclock_mode == VCLOCK_NONE) + return; + + if (cs->archdata.vclock_mode > VCLOCK_MAX) { + pr_warn("clocksource %s registered with invalid vclock_mode %d. Disabling vclock.\n", + cs->name, cs->archdata.vclock_mode); + cs->archdata.vclock_mode = VCLOCK_NONE; + } +} -- cgit v1.2.3 From a51e996d48ac9fa0a1260a3822a14f3d570d3be7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 17 Sep 2018 14:45:36 +0200 Subject: x86/vdso: Enforce 64bit clocksource All VDSO clock sources are TSC based and use CLOCKSOURCE_MASK(64). There is no point in masking with all FF. Get rid of it and enforce the mask in the sanity checker. Signed-off-by: Thomas Gleixner Acked-by: Andy Lutomirski Cc: Peter Zijlstra Cc: Matt Rickard Cc: Stephen Boyd Cc: John Stultz Cc: Florian Weimer Cc: "K. Y. Srinivasan" Cc: Vitaly Kuznetsov Cc: devel@linuxdriverproject.org Cc: virtualization@lists.linux-foundation.org Cc: Paolo Bonzini Cc: Arnd Bergmann Cc: Juergen Gross Link: https://lkml.kernel.org/r/20180917130707.151963007@linutronix.de --- arch/x86/entry/vdso/vclock_gettime.c | 2 +- arch/x86/kernel/time.c | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index e48ca3afa091..6a950854034f 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -201,7 +201,7 @@ notrace static inline u64 vgetsns(int *mode) #endif else return 0; - v = (cycles - gtod->cycle_last) & gtod->mask; + v = cycles - gtod->cycle_last; return v * gtod->mult; } diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index 1fa632e0829f..b23f5420b26a 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -120,4 +120,10 @@ void clocksource_arch_init(struct clocksource *cs) cs->name, cs->archdata.vclock_mode); cs->archdata.vclock_mode = VCLOCK_NONE; } + + if (cs->mask != CLOCKSOURCE_MASK(64)) { + pr_warn("clocksource %s registered with invalid mask %016llx. Disabling vclock.\n", + cs->name, cs->mask); + cs->archdata.vclock_mode = VCLOCK_NONE; + } } -- cgit v1.2.3 From 77e9c678c54f2d9214796c1c5bd0c7c7ccedd932 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 17 Sep 2018 14:45:37 +0200 Subject: x86/vdso: Use unsigned int consistently for vsyscall_gtod_data:: Seq The sequence count in vgtod_data is unsigned int, but the call sites use unsigned long, which is a pointless exercise. Fix the call sites and replace 'unsigned' with unsinged 'int' while at it. Signed-off-by: Thomas Gleixner Acked-by: Andy Lutomirski Cc: Peter Zijlstra Cc: Matt Rickard Cc: Stephen Boyd Cc: John Stultz Cc: Florian Weimer Cc: "K. Y. Srinivasan" Cc: Vitaly Kuznetsov Cc: devel@linuxdriverproject.org Cc: virtualization@lists.linux-foundation.org Cc: Paolo Bonzini Cc: Arnd Bergmann Cc: Juergen Gross Link: https://lkml.kernel.org/r/20180917130707.236250416@linutronix.de --- arch/x86/entry/vdso/vclock_gettime.c | 8 ++++---- arch/x86/include/asm/vgtod.h | 10 +++++----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index 6a950854034f..b50ee064beff 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -208,7 +208,7 @@ notrace static inline u64 vgetsns(int *mode) /* Code size doesn't matter (vdso is 4k anyway) and this is faster. */ notrace static int __always_inline do_realtime(struct timespec *ts) { - unsigned long seq; + unsigned int seq; u64 ns; int mode; @@ -229,7 +229,7 @@ notrace static int __always_inline do_realtime(struct timespec *ts) notrace static int __always_inline do_monotonic(struct timespec *ts) { - unsigned long seq; + unsigned int seq; u64 ns; int mode; @@ -250,7 +250,7 @@ notrace static int __always_inline do_monotonic(struct timespec *ts) notrace static void do_realtime_coarse(struct timespec *ts) { - unsigned long seq; + unsigned int seq; do { seq = gtod_read_begin(gtod); ts->tv_sec = gtod->wall_time_coarse_sec; @@ -260,7 +260,7 @@ notrace static void do_realtime_coarse(struct timespec *ts) notrace static void do_monotonic_coarse(struct timespec *ts) { - unsigned long seq; + unsigned int seq; do { seq = gtod_read_begin(gtod); ts->tv_sec = gtod->monotonic_time_coarse_sec; diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 53748541c487..91cad1f01027 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -15,9 +15,9 @@ typedef unsigned long gtod_long_t; * so be carefull by modifying this structure. */ struct vsyscall_gtod_data { - unsigned seq; + unsigned int seq; - int vclock_mode; + int vclock_mode; u64 cycle_last; u64 mask; u32 mult; @@ -44,9 +44,9 @@ static inline bool vclock_was_used(int vclock) return READ_ONCE(vclocks_used) & (1 << vclock); } -static inline unsigned gtod_read_begin(const struct vsyscall_gtod_data *s) +static inline unsigned int gtod_read_begin(const struct vsyscall_gtod_data *s) { - unsigned ret; + unsigned int ret; repeat: ret = READ_ONCE(s->seq); @@ -59,7 +59,7 @@ repeat: } static inline int gtod_read_retry(const struct vsyscall_gtod_data *s, - unsigned start) + unsigned int start) { smp_rmb(); return unlikely(s->seq != start); -- cgit v1.2.3 From 49116f2081eeaf75316956705d46602b7eb735cc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 17 Sep 2018 14:45:38 +0200 Subject: x86/vdso: Introduce and use vgtod_ts It's desired to support more clocks in the VDSO, e.g. CLOCK_TAI. This results either in indirect calls due to the larger switch case, which then requires retpolines or when the compiler is forced to avoid jump tables it results in even more conditionals. To avoid both variants which are bad for performance the high resolution functions and the coarse grained functions will be collapsed into one for each. That requires to store the clock specific base time in an array. Introcude struct vgtod_ts for storage and convert the data store, the update function and the individual clock functions over to use it. The new storage does not longer use gtod_long_t for seconds depending on 32 or 64 bit compile because this needs to be the full 64bit value even for 32bit when a Y2038 function is added. No point in keeping the distinction alive in the internal representation. Signed-off-by: Thomas Gleixner Acked-by: Andy Lutomirski Cc: Peter Zijlstra Cc: Matt Rickard Cc: Stephen Boyd Cc: John Stultz Cc: Florian Weimer Cc: "K. Y. Srinivasan" Cc: Vitaly Kuznetsov Cc: devel@linuxdriverproject.org Cc: virtualization@lists.linux-foundation.org Cc: Paolo Bonzini Cc: Arnd Bergmann Cc: Juergen Gross Link: https://lkml.kernel.org/r/20180917130707.324679401@linutronix.de --- arch/x86/entry/vdso/vclock_gettime.c | 24 ++++++++++------- arch/x86/entry/vsyscall/vsyscall_gtod.c | 47 +++++++++++++++++---------------- arch/x86/include/asm/vgtod.h | 38 ++++++++++++++------------ 3 files changed, 60 insertions(+), 49 deletions(-) diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index b50ee064beff..2c73e7f57316 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -208,6 +208,7 @@ notrace static inline u64 vgetsns(int *mode) /* Code size doesn't matter (vdso is 4k anyway) and this is faster. */ notrace static int __always_inline do_realtime(struct timespec *ts) { + struct vgtod_ts *base = >od->basetime[CLOCK_REALTIME]; unsigned int seq; u64 ns; int mode; @@ -215,8 +216,8 @@ notrace static int __always_inline do_realtime(struct timespec *ts) do { seq = gtod_read_begin(gtod); mode = gtod->vclock_mode; - ts->tv_sec = gtod->wall_time_sec; - ns = gtod->wall_time_snsec; + ts->tv_sec = base->sec; + ns = base->nsec; ns += vgetsns(&mode); ns >>= gtod->shift; } while (unlikely(gtod_read_retry(gtod, seq))); @@ -229,6 +230,7 @@ notrace static int __always_inline do_realtime(struct timespec *ts) notrace static int __always_inline do_monotonic(struct timespec *ts) { + struct vgtod_ts *base = >od->basetime[CLOCK_MONOTONIC]; unsigned int seq; u64 ns; int mode; @@ -236,8 +238,8 @@ notrace static int __always_inline do_monotonic(struct timespec *ts) do { seq = gtod_read_begin(gtod); mode = gtod->vclock_mode; - ts->tv_sec = gtod->monotonic_time_sec; - ns = gtod->monotonic_time_snsec; + ts->tv_sec = base->sec; + ns = base->nsec; ns += vgetsns(&mode); ns >>= gtod->shift; } while (unlikely(gtod_read_retry(gtod, seq))); @@ -250,21 +252,25 @@ notrace static int __always_inline do_monotonic(struct timespec *ts) notrace static void do_realtime_coarse(struct timespec *ts) { + struct vgtod_ts *base = >od->basetime[CLOCK_REALTIME_COARSE]; unsigned int seq; + do { seq = gtod_read_begin(gtod); - ts->tv_sec = gtod->wall_time_coarse_sec; - ts->tv_nsec = gtod->wall_time_coarse_nsec; + ts->tv_sec = base->sec; + ts->tv_nsec = base->nsec; } while (unlikely(gtod_read_retry(gtod, seq))); } notrace static void do_monotonic_coarse(struct timespec *ts) { + struct vgtod_ts *base = >od->basetime[CLOCK_MONOTONIC_COARSE]; unsigned int seq; + do { seq = gtod_read_begin(gtod); - ts->tv_sec = gtod->monotonic_time_coarse_sec; - ts->tv_nsec = gtod->monotonic_time_coarse_nsec; + ts->tv_sec = base->sec; + ts->tv_nsec = base->nsec; } while (unlikely(gtod_read_retry(gtod, seq))); } @@ -320,7 +326,7 @@ int gettimeofday(struct timeval *, struct timezone *) notrace time_t __vdso_time(time_t *t) { /* This is atomic on x86 so we don't need any locks. */ - time_t result = READ_ONCE(gtod->wall_time_sec); + time_t result = READ_ONCE(gtod->basetime[CLOCK_REALTIME].sec); if (t) *t = result; diff --git a/arch/x86/entry/vsyscall/vsyscall_gtod.c b/arch/x86/entry/vsyscall/vsyscall_gtod.c index e1216dd95c04..31b9e5e0cfdf 100644 --- a/arch/x86/entry/vsyscall/vsyscall_gtod.c +++ b/arch/x86/entry/vsyscall/vsyscall_gtod.c @@ -31,6 +31,8 @@ void update_vsyscall(struct timekeeper *tk) { int vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode; struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data; + struct vgtod_ts *base; + u64 nsec; /* Mark the new vclock used. */ BUILD_BUG_ON(VCLOCK_MAX >= 32); @@ -45,34 +47,33 @@ void update_vsyscall(struct timekeeper *tk) vdata->mult = tk->tkr_mono.mult; vdata->shift = tk->tkr_mono.shift; - vdata->wall_time_sec = tk->xtime_sec; - vdata->wall_time_snsec = tk->tkr_mono.xtime_nsec; + base = &vdata->basetime[CLOCK_REALTIME]; + base->sec = tk->xtime_sec; + base->nsec = tk->tkr_mono.xtime_nsec; - vdata->monotonic_time_sec = tk->xtime_sec - + tk->wall_to_monotonic.tv_sec; - vdata->monotonic_time_snsec = tk->tkr_mono.xtime_nsec - + ((u64)tk->wall_to_monotonic.tv_nsec - << tk->tkr_mono.shift); - while (vdata->monotonic_time_snsec >= - (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) { - vdata->monotonic_time_snsec -= - ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift; - vdata->monotonic_time_sec++; + base = &vdata->basetime[CLOCK_MONOTONIC]; + base->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; + nsec = tk->tkr_mono.xtime_nsec; + nsec += ((u64)tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift); + while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) { + nsec -= ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift; + base->sec++; } + base->nsec = nsec; - vdata->wall_time_coarse_sec = tk->xtime_sec; - vdata->wall_time_coarse_nsec = (long)(tk->tkr_mono.xtime_nsec >> - tk->tkr_mono.shift); + base = &vdata->basetime[CLOCK_REALTIME_COARSE]; + base->sec = tk->xtime_sec; + base->nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; - vdata->monotonic_time_coarse_sec = - vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec; - vdata->monotonic_time_coarse_nsec = - vdata->wall_time_coarse_nsec + tk->wall_to_monotonic.tv_nsec; - - while (vdata->monotonic_time_coarse_nsec >= NSEC_PER_SEC) { - vdata->monotonic_time_coarse_nsec -= NSEC_PER_SEC; - vdata->monotonic_time_coarse_sec++; + base = &vdata->basetime[CLOCK_MONOTONIC_COARSE]; + base->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; + nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; + nsec += tk->wall_to_monotonic.tv_nsec; + while (nsec >= NSEC_PER_SEC) { + nsec -= NSEC_PER_SEC; + base->sec++; } + base->nsec = nsec; gtod_write_end(vdata); } diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 91cad1f01027..10e534a1a51a 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -5,33 +5,37 @@ #include #include +#include + #ifdef BUILD_VDSO32_64 typedef u64 gtod_long_t; #else typedef unsigned long gtod_long_t; #endif + +struct vgtod_ts { + u64 sec; + u64 nsec; +}; + +#define VGTOD_BASES (CLOCK_MONOTONIC_COARSE + 1) +#define VGTOD_HRES (BIT(CLOCK_REALTIME) | BIT(CLOCK_MONOTONIC)) +#define VGTOD_COARSE (BIT(CLOCK_REALTIME_COARSE) | BIT(CLOCK_MONOTONIC_COARSE)) + /* * vsyscall_gtod_data will be accessed by 32 and 64 bit code at the same time * so be carefull by modifying this structure. */ struct vsyscall_gtod_data { - unsigned int seq; - - int vclock_mode; - u64 cycle_last; - u64 mask; - u32 mult; - u32 shift; - - /* open coded 'struct timespec' */ - u64 wall_time_snsec; - gtod_long_t wall_time_sec; - gtod_long_t monotonic_time_sec; - u64 monotonic_time_snsec; - gtod_long_t wall_time_coarse_sec; - gtod_long_t wall_time_coarse_nsec; - gtod_long_t monotonic_time_coarse_sec; - gtod_long_t monotonic_time_coarse_nsec; + unsigned int seq; + + int vclock_mode; + u64 cycle_last; + u64 mask; + u32 mult; + u32 shift; + + struct vgtod_ts basetime[VGTOD_BASES]; int tz_minuteswest; int tz_dsttime; -- cgit v1.2.3 From e9a62f76f93c6f28b6c7e908eb12e5f1313870a2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 17 Sep 2018 14:45:39 +0200 Subject: x86/vdso: Collapse high resolution functions do_realtime() and do_monotonic() are now the same except for the storage array index. Hand the index in as an argument and collapse the functions. Signed-off-by: Thomas Gleixner Acked-by: Andy Lutomirski Cc: Peter Zijlstra Cc: Matt Rickard Cc: Stephen Boyd Cc: John Stultz Cc: Florian Weimer Cc: "K. Y. Srinivasan" Cc: Vitaly Kuznetsov Cc: devel@linuxdriverproject.org Cc: virtualization@lists.linux-foundation.org Cc: Paolo Bonzini Cc: Arnd Bergmann Cc: Juergen Gross Link: https://lkml.kernel.org/r/20180917130707.407955860@linutronix.de --- arch/x86/entry/vdso/vclock_gettime.c | 35 +++++++---------------------------- 1 file changed, 7 insertions(+), 28 deletions(-) diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index 2c73e7f57316..1351b76638fb 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -205,35 +205,12 @@ notrace static inline u64 vgetsns(int *mode) return v * gtod->mult; } -/* Code size doesn't matter (vdso is 4k anyway) and this is faster. */ -notrace static int __always_inline do_realtime(struct timespec *ts) +notrace static int do_hres(clockid_t clk, struct timespec *ts) { - struct vgtod_ts *base = >od->basetime[CLOCK_REALTIME]; + struct vgtod_ts *base = >od->basetime[clk]; unsigned int seq; - u64 ns; int mode; - - do { - seq = gtod_read_begin(gtod); - mode = gtod->vclock_mode; - ts->tv_sec = base->sec; - ns = base->nsec; - ns += vgetsns(&mode); - ns >>= gtod->shift; - } while (unlikely(gtod_read_retry(gtod, seq))); - - ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); - ts->tv_nsec = ns; - - return mode; -} - -notrace static int __always_inline do_monotonic(struct timespec *ts) -{ - struct vgtod_ts *base = >od->basetime[CLOCK_MONOTONIC]; - unsigned int seq; u64 ns; - int mode; do { seq = gtod_read_begin(gtod); @@ -278,11 +255,11 @@ notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) { switch (clock) { case CLOCK_REALTIME: - if (do_realtime(ts) == VCLOCK_NONE) + if (do_hres(CLOCK_REALTIME, ts) == VCLOCK_NONE) goto fallback; break; case CLOCK_MONOTONIC: - if (do_monotonic(ts) == VCLOCK_NONE) + if (do_hres(CLOCK_MONOTONIC, ts) == VCLOCK_NONE) goto fallback; break; case CLOCK_REALTIME_COARSE: @@ -305,7 +282,9 @@ int clock_gettime(clockid_t, struct timespec *) notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) { if (likely(tv != NULL)) { - if (unlikely(do_realtime((struct timespec *)tv) == VCLOCK_NONE)) + struct timespec *ts = (struct timespec *) tv; + + if (unlikely(do_hres(CLOCK_REALTIME, ts) == VCLOCK_NONE)) return vdso_fallback_gtod(tv, tz); tv->tv_usec /= 1000; } -- cgit v1.2.3 From 6deec5bdef4518bd6524a47be9d621ff650d3ba4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 17 Sep 2018 14:45:40 +0200 Subject: x86/vdso: Collapse coarse functions do_realtime_coarse() and do_monotonic_coarse() are now the same except for the storage array index. Hand the index in as an argument and collapse the functions. Signed-off-by: Thomas Gleixner Acked-by: Andy Lutomirski Cc: Peter Zijlstra Cc: Matt Rickard Cc: Stephen Boyd Cc: John Stultz Cc: Florian Weimer Cc: "K. Y. Srinivasan" Cc: Vitaly Kuznetsov Cc: devel@linuxdriverproject.org Cc: virtualization@lists.linux-foundation.org Cc: Paolo Bonzini Cc: Arnd Bergmann Cc: Juergen Gross Link: https://lkml.kernel.org/r/20180917130707.490733779@linutronix.de --- arch/x86/entry/vdso/vclock_gettime.c | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index 1351b76638fb..b27dea0e23af 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -227,21 +227,9 @@ notrace static int do_hres(clockid_t clk, struct timespec *ts) return mode; } -notrace static void do_realtime_coarse(struct timespec *ts) +notrace static void do_coarse(clockid_t clk, struct timespec *ts) { - struct vgtod_ts *base = >od->basetime[CLOCK_REALTIME_COARSE]; - unsigned int seq; - - do { - seq = gtod_read_begin(gtod); - ts->tv_sec = base->sec; - ts->tv_nsec = base->nsec; - } while (unlikely(gtod_read_retry(gtod, seq))); -} - -notrace static void do_monotonic_coarse(struct timespec *ts) -{ - struct vgtod_ts *base = >od->basetime[CLOCK_MONOTONIC_COARSE]; + struct vgtod_ts *base = >od->basetime[clk]; unsigned int seq; do { @@ -263,10 +251,10 @@ notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) goto fallback; break; case CLOCK_REALTIME_COARSE: - do_realtime_coarse(ts); + do_coarse(CLOCK_REALTIME_COARSE, ts); break; case CLOCK_MONOTONIC_COARSE: - do_monotonic_coarse(ts); + do_coarse(CLOCK_MONOTONIC_COARSE, ts); break; default: goto fallback; -- cgit v1.2.3 From f3e839384164cf86faedd185b8f6024f73050f5e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 17 Sep 2018 14:45:41 +0200 Subject: x86/vdso: Replace the clockid switch case Now that the time getter functions use the clockid as index into the storage array for the base time access, the switch case can be replaced. - Check for clockid >= MAX_CLOCKS and for negative clockid (CPU/FD) first and call the fallback function right away. - After establishing that clockid is < MAX_CLOCKS, convert the clockid to a bitmask - Check for the supported high resolution and coarse functions by anding the bitmask of supported clocks and check whether a bit is set. This completely avoids jump tables, reduces the number of conditionals and makes the VDSO extensible for other clock ids. Signed-off-by: Thomas Gleixner Acked-by: Andy Lutomirski Cc: Peter Zijlstra Cc: Matt Rickard Cc: Stephen Boyd Cc: John Stultz Cc: Florian Weimer Cc: "K. Y. Srinivasan" Cc: Vitaly Kuznetsov Cc: devel@linuxdriverproject.org Cc: virtualization@lists.linux-foundation.org Cc: Paolo Bonzini Cc: Arnd Bergmann Cc: Juergen Gross Link: https://lkml.kernel.org/r/20180917130707.574315796@linutronix.de --- arch/x86/entry/vdso/vclock_gettime.c | 38 +++++++++++++++++------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index b27dea0e23af..672e50e35d6c 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -241,29 +241,27 @@ notrace static void do_coarse(clockid_t clk, struct timespec *ts) notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) { - switch (clock) { - case CLOCK_REALTIME: - if (do_hres(CLOCK_REALTIME, ts) == VCLOCK_NONE) - goto fallback; - break; - case CLOCK_MONOTONIC: - if (do_hres(CLOCK_MONOTONIC, ts) == VCLOCK_NONE) - goto fallback; - break; - case CLOCK_REALTIME_COARSE: - do_coarse(CLOCK_REALTIME_COARSE, ts); - break; - case CLOCK_MONOTONIC_COARSE: - do_coarse(CLOCK_MONOTONIC_COARSE, ts); - break; - default: - goto fallback; - } + unsigned int msk; - return 0; -fallback: + /* Sort out negative (CPU/FD) and invalid clocks */ + if (unlikely((unsigned int) clock >= MAX_CLOCKS)) + return vdso_fallback_gettime(clock, ts); + + /* + * Convert the clockid to a bitmask and use it to check which + * clocks are handled in the VDSO directly. + */ + msk = 1U << clock; + if (likely(msk & VGTOD_HRES)) { + if (do_hres(clock, ts) != VCLOCK_NONE) + return 0; + } else if (msk & VGTOD_COARSE) { + do_coarse(clock, ts); + return 0; + } return vdso_fallback_gettime(clock, ts); } + int clock_gettime(clockid_t, struct timespec *) __attribute__((weak, alias("__vdso_clock_gettime"))); -- cgit v1.2.3 From 4f72adc5068294268387a81a6bf91d9bb07ecc5c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 17 Sep 2018 14:45:42 +0200 Subject: x86/vdso: Simplify the invalid vclock case The code flow for the vclocks is convoluted as it requires the vclocks which can be invalidated separately from the vsyscall_gtod_data sequence to store the fact in a separate variable. That's inefficient. Restructure the code so the vclock readout returns cycles and the conversion to nanoseconds is handled at the call site. If the clock gets invalidated or vclock is already VCLOCK_NONE, return U64_MAX as the cycle value, which is invalid for all clocks and leave the sequence loop immediately in that case by calling the fallback function directly. This allows to remove the gettimeofday fallback as it now uses the clock_gettime() fallback and does the nanoseconds to microseconds conversion in the same way as it does when the vclock is functional. It does not make a difference whether the division by 1000 happens in the kernel fallback or in userspace. Generates way better code and gains a few cycles back. Signed-off-by: Thomas Gleixner Acked-by: Andy Lutomirski Cc: Peter Zijlstra Cc: Matt Rickard Cc: Stephen Boyd Cc: John Stultz Cc: Florian Weimer Cc: "K. Y. Srinivasan" Cc: Vitaly Kuznetsov Cc: devel@linuxdriverproject.org Cc: virtualization@lists.linux-foundation.org Cc: Paolo Bonzini Cc: Arnd Bergmann Cc: Juergen Gross Link: https://lkml.kernel.org/r/20180917130707.657928937@linutronix.de --- arch/x86/entry/vdso/vclock_gettime.c | 82 +++++++++--------------------------- 1 file changed, 21 insertions(+), 61 deletions(-) diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index 672e50e35d6c..40105024a210 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -49,17 +49,6 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) return ret; } -notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) -{ - long ret; - - asm ("syscall" : "=a" (ret), "=m" (*tv), "=m" (*tz) : - "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : - "memory", "rcx", "r11"); - return ret; -} - - #else notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) @@ -77,21 +66,6 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) return ret; } -notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) -{ - long ret; - - asm ( - "mov %%ebx, %%edx \n" - "mov %[tv], %%ebx \n" - "call __kernel_vsyscall \n" - "mov %%edx, %%ebx \n" - : "=a" (ret), "=m" (*tv), "=m" (*tz) - : "0" (__NR_gettimeofday), [tv] "g" (tv), "c" (tz) - : "memory", "edx"); - return ret; -} - #endif #ifdef CONFIG_PARAVIRT_CLOCK @@ -100,7 +74,7 @@ static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void) return (const struct pvclock_vsyscall_time_info *)&pvclock_page; } -static notrace u64 vread_pvclock(int *mode) +static notrace u64 vread_pvclock(void) { const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti; u64 ret; @@ -132,10 +106,8 @@ static notrace u64 vread_pvclock(int *mode) do { version = pvclock_read_begin(pvti); - if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) { - *mode = VCLOCK_NONE; - return 0; - } + if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) + return U64_MAX; ret = __pvclock_read_cycles(pvti, rdtsc_ordered()); } while (pvclock_read_retry(pvti, version)); @@ -150,17 +122,12 @@ static notrace u64 vread_pvclock(int *mode) } #endif #ifdef CONFIG_HYPERV_TSCPAGE -static notrace u64 vread_hvclock(int *mode) +static notrace u64 vread_hvclock(void) { const struct ms_hyperv_tsc_page *tsc_pg = (const struct ms_hyperv_tsc_page *)&hvclock_page; - u64 current_tick = hv_read_tsc_page(tsc_pg); - - if (current_tick != U64_MAX) - return current_tick; - *mode = VCLOCK_NONE; - return 0; + return hv_read_tsc_page(tsc_pg); } #endif @@ -184,47 +151,42 @@ notrace static u64 vread_tsc(void) return last; } -notrace static inline u64 vgetsns(int *mode) +notrace static inline u64 vgetcyc(int mode) { - u64 v; - cycles_t cycles; - - if (gtod->vclock_mode == VCLOCK_TSC) - cycles = vread_tsc(); + if (mode == VCLOCK_TSC) + return vread_tsc(); #ifdef CONFIG_PARAVIRT_CLOCK - else if (gtod->vclock_mode == VCLOCK_PVCLOCK) - cycles = vread_pvclock(mode); + else if (mode == VCLOCK_PVCLOCK) + return vread_pvclock(); #endif #ifdef CONFIG_HYPERV_TSCPAGE - else if (gtod->vclock_mode == VCLOCK_HVCLOCK) - cycles = vread_hvclock(mode); + else if (mode == VCLOCK_HVCLOCK) + return vread_hvclock(); #endif - else - return 0; - v = cycles - gtod->cycle_last; - return v * gtod->mult; + return U64_MAX; } notrace static int do_hres(clockid_t clk, struct timespec *ts) { struct vgtod_ts *base = >od->basetime[clk]; unsigned int seq; - int mode; - u64 ns; + u64 cycles, ns; do { seq = gtod_read_begin(gtod); - mode = gtod->vclock_mode; ts->tv_sec = base->sec; ns = base->nsec; - ns += vgetsns(&mode); + cycles = vgetcyc(gtod->vclock_mode); + if (unlikely((s64)cycles < 0)) + return vdso_fallback_gettime(clk, ts); + ns += (cycles - gtod->cycle_last) * gtod->mult; ns >>= gtod->shift; } while (unlikely(gtod_read_retry(gtod, seq))); ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); ts->tv_nsec = ns; - return mode; + return 0; } notrace static void do_coarse(clockid_t clk, struct timespec *ts) @@ -253,8 +215,7 @@ notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) */ msk = 1U << clock; if (likely(msk & VGTOD_HRES)) { - if (do_hres(clock, ts) != VCLOCK_NONE) - return 0; + return do_hres(clock, ts); } else if (msk & VGTOD_COARSE) { do_coarse(clock, ts); return 0; @@ -270,8 +231,7 @@ notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) if (likely(tv != NULL)) { struct timespec *ts = (struct timespec *) tv; - if (unlikely(do_hres(CLOCK_REALTIME, ts) == VCLOCK_NONE)) - return vdso_fallback_gtod(tv, tz); + do_hres(CLOCK_REALTIME, ts); tv->tv_usec /= 1000; } if (unlikely(tz != NULL)) { -- cgit v1.2.3 From 3e89bf35ebf59c12e8c1476f6681fae0ebdcb2a7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 17 Sep 2018 14:45:43 +0200 Subject: x86/vdso: Move cycle_last handling into the caller Dereferencing gtod->cycle_last all over the place and foing the cycles < last comparison in the vclock read functions generates horrible code. Doing it at the call site is much better and gains a few cycles both for TSC and pvclock. Caveat: This adds the comparison to the hyperv vclock as well, but I have no way to test that. Signed-off-by: Thomas Gleixner Acked-by: Andy Lutomirski Cc: Peter Zijlstra Cc: Matt Rickard Cc: Stephen Boyd Cc: John Stultz Cc: Florian Weimer Cc: "K. Y. Srinivasan" Cc: Vitaly Kuznetsov Cc: devel@linuxdriverproject.org Cc: virtualization@lists.linux-foundation.org Cc: Paolo Bonzini Cc: Arnd Bergmann Cc: Juergen Gross Link: https://lkml.kernel.org/r/20180917130707.741440803@linutronix.de --- arch/x86/entry/vdso/vclock_gettime.c | 39 +++++++----------------------------- 1 file changed, 7 insertions(+), 32 deletions(-) diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index 40105024a210..b7ccbff26a3f 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -77,9 +77,8 @@ static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void) static notrace u64 vread_pvclock(void) { const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti; - u64 ret; - u64 last; u32 version; + u64 ret; /* * Note: The kernel and hypervisor must guarantee that cpu ID @@ -112,13 +111,7 @@ static notrace u64 vread_pvclock(void) ret = __pvclock_read_cycles(pvti, rdtsc_ordered()); } while (pvclock_read_retry(pvti, version)); - /* refer to vread_tsc() comment for rationale */ - last = gtod->cycle_last; - - if (likely(ret >= last)) - return ret; - - return last; + return ret; } #endif #ifdef CONFIG_HYPERV_TSCPAGE @@ -131,30 +124,10 @@ static notrace u64 vread_hvclock(void) } #endif -notrace static u64 vread_tsc(void) -{ - u64 ret = (u64)rdtsc_ordered(); - u64 last = gtod->cycle_last; - - if (likely(ret >= last)) - return ret; - - /* - * GCC likes to generate cmov here, but this branch is extremely - * predictable (it's just a function of time and the likely is - * very likely) and there's a data dependence, so force GCC - * to generate a branch instead. I don't barrier() because - * we don't actually need a barrier, and if this function - * ever gets inlined it will generate worse code. - */ - asm volatile (""); - return last; -} - notrace static inline u64 vgetcyc(int mode) { if (mode == VCLOCK_TSC) - return vread_tsc(); + return (u64)rdtsc_ordered(); #ifdef CONFIG_PARAVIRT_CLOCK else if (mode == VCLOCK_PVCLOCK) return vread_pvclock(); @@ -169,17 +142,19 @@ notrace static inline u64 vgetcyc(int mode) notrace static int do_hres(clockid_t clk, struct timespec *ts) { struct vgtod_ts *base = >od->basetime[clk]; + u64 cycles, last, ns; unsigned int seq; - u64 cycles, ns; do { seq = gtod_read_begin(gtod); ts->tv_sec = base->sec; ns = base->nsec; + last = gtod->cycle_last; cycles = vgetcyc(gtod->vclock_mode); if (unlikely((s64)cycles < 0)) return vdso_fallback_gettime(clk, ts); - ns += (cycles - gtod->cycle_last) * gtod->mult; + if (cycles > last) + ns += (cycles - last) * gtod->mult; ns >>= gtod->shift; } while (unlikely(gtod_read_retry(gtod, seq))); -- cgit v1.2.3 From 315f28fa3a7bfbbb6233d800dffa327e87f96129 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 17 Sep 2018 14:45:44 +0200 Subject: x66/vdso: Add CLOCK_TAI support With the storage array in place it's now trivial to support CLOCK_TAI in the vdso. Extend the base time storage array and add the update code. Signed-off-by: Thomas Gleixner Tested-by: Matt Rickard Acked-by: Andy Lutomirski Cc: Peter Zijlstra Cc: Stephen Boyd Cc: John Stultz Cc: Florian Weimer Cc: "K. Y. Srinivasan" Cc: Vitaly Kuznetsov Cc: devel@linuxdriverproject.org Cc: virtualization@lists.linux-foundation.org Cc: Paolo Bonzini Cc: Arnd Bergmann Cc: Juergen Gross Link: https://lkml.kernel.org/r/20180917130707.823878601@linutronix.de --- arch/x86/entry/vsyscall/vsyscall_gtod.c | 4 ++++ arch/x86/include/asm/vgtod.h | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/entry/vsyscall/vsyscall_gtod.c b/arch/x86/entry/vsyscall/vsyscall_gtod.c index 31b9e5e0cfdf..cfcdba082feb 100644 --- a/arch/x86/entry/vsyscall/vsyscall_gtod.c +++ b/arch/x86/entry/vsyscall/vsyscall_gtod.c @@ -51,6 +51,10 @@ void update_vsyscall(struct timekeeper *tk) base->sec = tk->xtime_sec; base->nsec = tk->tkr_mono.xtime_nsec; + base = &vdata->basetime[CLOCK_TAI]; + base->sec = tk->xtime_sec + (s64)tk->tai_offset; + base->nsec = tk->tkr_mono.xtime_nsec; + base = &vdata->basetime[CLOCK_MONOTONIC]; base->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; nsec = tk->tkr_mono.xtime_nsec; diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 10e534a1a51a..d17b092b9f1b 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -18,8 +18,8 @@ struct vgtod_ts { u64 nsec; }; -#define VGTOD_BASES (CLOCK_MONOTONIC_COARSE + 1) -#define VGTOD_HRES (BIT(CLOCK_REALTIME) | BIT(CLOCK_MONOTONIC)) +#define VGTOD_BASES (CLOCK_TAI + 1) +#define VGTOD_HRES (BIT(CLOCK_REALTIME) | BIT(CLOCK_MONOTONIC) | BIT(CLOCK_TAI)) #define VGTOD_COARSE (BIT(CLOCK_REALTIME_COARSE) | BIT(CLOCK_MONOTONIC_COARSE)) /* -- cgit v1.2.3 From 89fe0a1f1c694a3b0b3cfa8c0952d603753f36df Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 4 Oct 2018 14:44:43 -0700 Subject: x86/vdso: Remove "memory" clobbers in the vDSO syscall fallbacks When a vDSO clock function falls back to the syscall, no special barriers or ordering is needed, and the syscall fallbacks don't clobber any memory that is not explicitly listed in the asm constraints. Remove the "memory" clobber. This causes minor changes to the generated code, but otherwise has no obvious performance impact. I think it's nice to have, though, since it may help the optimizer in the future. Signed-off-by: Andy Lutomirski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/3a7438f5fb2422ed881683d2ccffd7f987b2dc44.1538689401.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/vdso/vclock_gettime.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index b7ccbff26a3f..18c8a78d1ec9 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -45,7 +45,7 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) long ret; asm ("syscall" : "=a" (ret), "=m" (*ts) : "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : - "memory", "rcx", "r11"); + "rcx", "r11"); return ret; } @@ -62,7 +62,7 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) "mov %%edx, %%ebx \n" : "=a" (ret), "=m" (*ts) : "0" (__NR_clock_gettime), [clock] "g" (clock), "c" (ts) - : "memory", "edx"); + : "edx"); return ret; } -- cgit v1.2.3 From bcc4a62a73cb65327d7268fbfa3a786d603f52dc Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 4 Oct 2018 14:44:45 -0700 Subject: x86/vdso: Document vgtod_ts better After reading do_hres() and do_course() and scratching my head a bit, I figured out why the arithmetic is strange. Document it. Signed-off-by: Andy Lutomirski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/f66f53d81150bbad47d7b282c9207a71a3ce1c16.1538689401.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/vgtod.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index d17b092b9f1b..69d05c6d47f5 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -13,6 +13,15 @@ typedef u64 gtod_long_t; typedef unsigned long gtod_long_t; #endif +/* + * There is one of these objects in the vvar page for each + * vDSO-accelerated clockid. For high-resolution clocks, this encodes + * the time corresponding to vsyscall_gtod_data.cycle_last. For coarse + * clocks, this encodes the actual time. + * + * To confuse the reader, for high-resolution clocks, nsec is left-shifted + * by vsyscall_gtod_data.shift. + */ struct vgtod_ts { u64 sec; u64 nsec; -- cgit v1.2.3 From 99c19e6a8fe4a95fa0dac191207a1d40461b1604 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 5 Oct 2018 11:02:43 -0700 Subject: x86/vdso: Rearrange do_hres() to improve code generation vgetcyc() is full of barriers, so fetching values out of the vvar page before vgetcyc() for use after vgetcyc() results in poor code generation. Put vgetcyc() first to avoid this problem. Also, pull the tv_sec division into the loop and put all the ts writes together. The old code wrote ts->tv_sec on each iteration before the syscall fallback check and then added in the offset afterwards, which forced the compiler to pointlessly copy base->sec to ts->tv_sec on each iteration. The new version seems to generate sensible code. Saves several cycles. With this patch applied, the result is faster than before the clock_gettime() rewrite. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/3c05644d010b72216aa286a6d20b5078d5fae5cd.1538762487.git.luto@kernel.org --- arch/x86/entry/vdso/vclock_gettime.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index 18c8a78d1ec9..007b3fe9d727 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -142,23 +142,27 @@ notrace static inline u64 vgetcyc(int mode) notrace static int do_hres(clockid_t clk, struct timespec *ts) { struct vgtod_ts *base = >od->basetime[clk]; - u64 cycles, last, ns; + u64 cycles, last, sec, ns; unsigned int seq; do { seq = gtod_read_begin(gtod); - ts->tv_sec = base->sec; + cycles = vgetcyc(gtod->vclock_mode); ns = base->nsec; last = gtod->cycle_last; - cycles = vgetcyc(gtod->vclock_mode); if (unlikely((s64)cycles < 0)) return vdso_fallback_gettime(clk, ts); if (cycles > last) ns += (cycles - last) * gtod->mult; ns >>= gtod->shift; + sec = base->sec; } while (unlikely(gtod_read_retry(gtod, seq))); - ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); + /* + * Do this outside the loop: a race inside the loop could result + * in __iter_div_u64_rem() being extremely slow. + */ + ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); ts->tv_nsec = ns; return 0; -- cgit v1.2.3