diff options
Diffstat (limited to 'lib/vdso')
-rw-r--r-- | lib/vdso/Kconfig | 6 | ||||
-rw-r--r-- | lib/vdso/gettimeofday.c | 204 |
2 files changed, 170 insertions, 40 deletions
diff --git a/lib/vdso/Kconfig b/lib/vdso/Kconfig index 9fe698ff62ec..d883ac299508 100644 --- a/lib/vdso/Kconfig +++ b/lib/vdso/Kconfig @@ -24,4 +24,10 @@ config GENERIC_COMPAT_VDSO help This config option enables the compat VDSO layer. +config GENERIC_VDSO_TIME_NS + bool + help + Selected by architectures which support time namespaces in the + VDSO + endif diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index 42bd8ab955fa..f8b8ec5e63ac 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -38,12 +38,22 @@ u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult) } #endif -static int do_hres(const struct vdso_data *vd, clockid_t clk, - struct __kernel_timespec *ts) +#ifdef CONFIG_TIME_NS +static int do_hres_timens(const struct vdso_data *vdns, clockid_t clk, + struct __kernel_timespec *ts) { - const struct vdso_timestamp *vdso_ts = &vd->basetime[clk]; - u64 cycles, last, sec, ns; + const struct vdso_data *vd = __arch_get_timens_vdso_data(); + const struct timens_offset *offs = &vdns->offset[clk]; + const struct vdso_timestamp *vdso_ts; + u64 cycles, last, ns; u32 seq; + s64 sec; + + if (clk != CLOCK_MONOTONIC_RAW) + vd = &vd[CS_HRES_COARSE]; + else + vd = &vd[CS_RAW]; + vdso_ts = &vd->basetime[clk]; do { seq = vdso_read_begin(vd); @@ -58,6 +68,10 @@ static int do_hres(const struct vdso_data *vd, clockid_t clk, sec = vdso_ts->sec; } while (unlikely(vdso_read_retry(vd, seq))); + /* Add the namespace offset */ + sec += offs->sec; + ns += offs->nsec; + /* * Do this outside the loop: a race inside the loop could result * in __iter_div_u64_rem() being extremely slow. @@ -67,18 +81,128 @@ static int do_hres(const struct vdso_data *vd, clockid_t clk, return 0; } +#else +static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void) +{ + return NULL; +} + +static int do_hres_timens(const struct vdso_data *vdns, clockid_t clk, + struct __kernel_timespec *ts) +{ + return -EINVAL; +} +#endif -static void do_coarse(const struct vdso_data *vd, clockid_t clk, - struct __kernel_timespec *ts) +static __always_inline int do_hres(const struct vdso_data *vd, clockid_t clk, + struct __kernel_timespec *ts) { const struct vdso_timestamp *vdso_ts = &vd->basetime[clk]; + u64 cycles, last, sec, ns; u32 seq; do { + /* + * Open coded to handle VCLOCK_TIMENS. Time namespace + * enabled tasks have a special VVAR page installed which + * has vd->seq set to 1 and vd->clock_mode set to + * VCLOCK_TIMENS. For non time namespace affected tasks + * this does not affect performance because if vd->seq is + * odd, i.e. a concurrent update is in progress the extra + * check for vd->clock_mode is just a few extra + * instructions while spin waiting for vd->seq to become + * even again. + */ + while (unlikely((seq = READ_ONCE(vd->seq)) & 1)) { + if (IS_ENABLED(CONFIG_TIME_NS) && + vd->clock_mode == VCLOCK_TIMENS) + return do_hres_timens(vd, clk, ts); + cpu_relax(); + } + smp_rmb(); + + cycles = __arch_get_hw_counter(vd->clock_mode); + ns = vdso_ts->nsec; + last = vd->cycle_last; + if (unlikely((s64)cycles < 0)) + return -1; + + ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult); + ns >>= vd->shift; + sec = vdso_ts->sec; + } while (unlikely(vdso_read_retry(vd, seq))); + + /* + * Do this outside the loop: a race inside the loop could result + * in __iter_div_u64_rem() being extremely slow. + */ + ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); + ts->tv_nsec = ns; + + return 0; +} + +#ifdef CONFIG_TIME_NS +static int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk, + struct __kernel_timespec *ts) +{ + const struct vdso_data *vd = __arch_get_timens_vdso_data(); + const struct vdso_timestamp *vdso_ts = &vd->basetime[clk]; + const struct timens_offset *offs = &vdns->offset[clk]; + u64 nsec; + s64 sec; + s32 seq; + + do { seq = vdso_read_begin(vd); + sec = vdso_ts->sec; + nsec = vdso_ts->nsec; + } while (unlikely(vdso_read_retry(vd, seq))); + + /* Add the namespace offset */ + sec += offs->sec; + nsec += offs->nsec; + + /* + * Do this outside the loop: a race inside the loop could result + * in __iter_div_u64_rem() being extremely slow. + */ + ts->tv_sec = sec + __iter_div_u64_rem(nsec, NSEC_PER_SEC, &nsec); + ts->tv_nsec = nsec; + return 0; +} +#else +static int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk, + struct __kernel_timespec *ts) +{ + return -1; +} +#endif + +static __always_inline int do_coarse(const struct vdso_data *vd, clockid_t clk, + struct __kernel_timespec *ts) +{ + const struct vdso_timestamp *vdso_ts = &vd->basetime[clk]; + u32 seq; + + do { + /* + * Open coded to handle VCLOCK_TIMENS. See comment in + * do_hres(). + */ + while ((seq = READ_ONCE(vd->seq)) & 1) { + if (IS_ENABLED(CONFIG_TIME_NS) && + vd->clock_mode == VCLOCK_TIMENS) + return do_coarse_timens(vd, clk, ts); + cpu_relax(); + } + smp_rmb(); + ts->tv_sec = vdso_ts->sec; ts->tv_nsec = vdso_ts->nsec; } while (unlikely(vdso_read_retry(vd, seq))); + + return 0; } static __maybe_unused int @@ -96,15 +220,16 @@ __cvdso_clock_gettime_common(clockid_t clock, struct __kernel_timespec *ts) * clocks are handled in the VDSO directly. */ msk = 1U << clock; - if (likely(msk & VDSO_HRES)) { - return do_hres(&vd[CS_HRES_COARSE], clock, ts); - } else if (msk & VDSO_COARSE) { - do_coarse(&vd[CS_HRES_COARSE], clock, ts); - return 0; - } else if (msk & VDSO_RAW) { - return do_hres(&vd[CS_RAW], clock, ts); - } - return -1; + if (likely(msk & VDSO_HRES)) + vd = &vd[CS_HRES_COARSE]; + else if (msk & VDSO_COARSE) + return do_coarse(&vd[CS_HRES_COARSE], clock, ts); + else if (msk & VDSO_RAW) + vd = &vd[CS_RAW]; + else + return -1; + + return do_hres(vd, clock, ts); } static __maybe_unused int @@ -117,6 +242,7 @@ __cvdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts) return 0; } +#ifdef BUILD_VDSO32 static __maybe_unused int __cvdso_clock_gettime32(clockid_t clock, struct old_timespec32 *res) { @@ -125,20 +251,16 @@ __cvdso_clock_gettime32(clockid_t clock, struct old_timespec32 *res) ret = __cvdso_clock_gettime_common(clock, &ts); -#ifdef VDSO_HAS_32BIT_FALLBACK if (unlikely(ret)) return clock_gettime32_fallback(clock, res); -#else - if (unlikely(ret)) - ret = clock_gettime_fallback(clock, &ts); -#endif - if (likely(!ret)) { - res->tv_sec = ts.tv_sec; - res->tv_nsec = ts.tv_nsec; - } + /* For ret == 0 */ + res->tv_sec = ts.tv_sec; + res->tv_nsec = ts.tv_nsec; + return ret; } +#endif /* BUILD_VDSO32 */ static __maybe_unused int __cvdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) @@ -156,6 +278,10 @@ __cvdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) } if (unlikely(tz != NULL)) { + if (IS_ENABLED(CONFIG_TIME_NS) && + vd->clock_mode == VCLOCK_TIMENS) + vd = __arch_get_timens_vdso_data(); + tz->tz_minuteswest = vd[CS_HRES_COARSE].tz_minuteswest; tz->tz_dsttime = vd[CS_HRES_COARSE].tz_dsttime; } @@ -167,7 +293,12 @@ __cvdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) static __maybe_unused __kernel_old_time_t __cvdso_time(__kernel_old_time_t *time) { const struct vdso_data *vd = __arch_get_vdso_data(); - __kernel_old_time_t t = READ_ONCE(vd[CS_HRES_COARSE].basetime[CLOCK_REALTIME].sec); + __kernel_old_time_t t; + + if (IS_ENABLED(CONFIG_TIME_NS) && vd->clock_mode == VCLOCK_TIMENS) + vd = __arch_get_timens_vdso_data(); + + t = READ_ONCE(vd[CS_HRES_COARSE].basetime[CLOCK_REALTIME].sec); if (time) *time = t; @@ -181,7 +312,6 @@ static __maybe_unused int __cvdso_clock_getres_common(clockid_t clock, struct __kernel_timespec *res) { const struct vdso_data *vd = __arch_get_vdso_data(); - u64 hrtimer_res; u32 msk; u64 ns; @@ -189,27 +319,24 @@ int __cvdso_clock_getres_common(clockid_t clock, struct __kernel_timespec *res) if (unlikely((u32) clock >= MAX_CLOCKS)) return -1; - hrtimer_res = READ_ONCE(vd[CS_HRES_COARSE].hrtimer_res); + if (IS_ENABLED(CONFIG_TIME_NS) && vd->clock_mode == VCLOCK_TIMENS) + vd = __arch_get_timens_vdso_data(); + /* * Convert the clockid to a bitmask and use it to check which * clocks are handled in the VDSO directly. */ msk = 1U << clock; - if (msk & VDSO_HRES) { + if (msk & (VDSO_HRES | VDSO_RAW)) { /* * Preserves the behaviour of posix_get_hrtimer_res(). */ - ns = hrtimer_res; + ns = READ_ONCE(vd[CS_HRES_COARSE].hrtimer_res); } else if (msk & VDSO_COARSE) { /* * Preserves the behaviour of posix_get_coarse_res(). */ ns = LOW_RES_NSEC; - } else if (msk & VDSO_RAW) { - /* - * Preserves the behaviour of posix_get_hrtimer_res(). - */ - ns = hrtimer_res; } else { return -1; } @@ -231,6 +358,7 @@ int __cvdso_clock_getres(clockid_t clock, struct __kernel_timespec *res) return 0; } +#ifdef BUILD_VDSO32 static __maybe_unused int __cvdso_clock_getres_time32(clockid_t clock, struct old_timespec32 *res) { @@ -239,18 +367,14 @@ __cvdso_clock_getres_time32(clockid_t clock, struct old_timespec32 *res) ret = __cvdso_clock_getres_common(clock, &ts); -#ifdef VDSO_HAS_32BIT_FALLBACK if (unlikely(ret)) return clock_getres32_fallback(clock, res); -#else - if (unlikely(ret)) - ret = clock_getres_fallback(clock, &ts); -#endif - if (likely(!ret && res)) { + if (likely(res)) { res->tv_sec = ts.tv_sec; res->tv_nsec = ts.tv_nsec; } return ret; } +#endif /* BUILD_VDSO32 */ #endif /* VDSO_HAS_CLOCK_GETRES */ |