From fbd90375d7531927d312766b548376d909811b4d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 22 Jul 2009 13:40:14 +0200 Subject: hrtimer: Remove cb_entry from struct hrtimer It's unused, remove it. Signed-off-by: Peter Zijlstra Signed-off-by: Thomas Gleixner LKML-Reference: --- include/linux/hrtimer.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 54648e625efd..40e7d54fc424 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -91,7 +91,6 @@ enum hrtimer_restart { * @function: timer expiry callback function * @base: pointer to the timer base (per cpu and per clock) * @state: state information (See bit values above) - * @cb_entry: list head to enqueue an expired timer into the callback list * @start_site: timer statistics field to store the site where the timer * was started * @start_comm: timer statistics field to store the name of the process which @@ -108,7 +107,6 @@ struct hrtimer { enum hrtimer_restart (*function)(struct hrtimer *); struct hrtimer_clock_base *base; unsigned long state; - struct list_head cb_entry; #ifdef CONFIG_TIMER_STATS int start_pid; void *start_site; -- cgit v1.2.3 From 31089c13bcb18d2cd2a3ddfbe3a28666346f237e Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 14 Aug 2009 15:47:18 +0200 Subject: timekeeping: Introduce timekeeping_leap_insert Move the adjustment of xtime, wall_to_monotonic and the update of the vsyscall variables to the timekeeping code. Signed-off-by: John Stultz Signed-off-by: Martin Schwidefsky LKML-Reference: <20090814134807.609730216@de.ibm.com> Signed-off-by: Thomas Gleixner --- include/linux/time.h | 1 + kernel/time/ntp.c | 7 ++----- kernel/time/timekeeping.c | 7 +++++++ 3 files changed, 10 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/time.h b/include/linux/time.h index ea16c1a01d51..e7c844558884 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -147,6 +147,7 @@ extern struct timespec timespec_trunc(struct timespec t, unsigned gran); extern int timekeeping_valid_for_hres(void); extern void update_wall_time(void); extern void update_xtime_cache(u64 nsec); +extern void timekeeping_leap_insert(int leapsecond); struct tms; extern void do_sys_times(struct tms *); diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 7fc64375ff43..4800f933910e 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -194,8 +194,7 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer) case TIME_OK: break; case TIME_INS: - xtime.tv_sec--; - wall_to_monotonic.tv_sec++; + timekeeping_leap_insert(-1); time_state = TIME_OOP; printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n"); @@ -203,9 +202,8 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer) res = HRTIMER_RESTART; break; case TIME_DEL: - xtime.tv_sec++; + timekeeping_leap_insert(1); time_tai--; - wall_to_monotonic.tv_sec--; time_state = TIME_WAIT; printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n"); @@ -219,7 +217,6 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer) time_state = TIME_OK; break; } - update_vsyscall(&xtime, clock); write_sequnlock(&xtime_lock); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 02c0b2c9c674..b8b70fb545fc 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -58,6 +58,13 @@ void update_xtime_cache(u64 nsec) struct clocksource *clock; +/* must hold xtime_lock */ +void timekeeping_leap_insert(int leapsecond) +{ + xtime.tv_sec += leapsecond; + wall_to_monotonic.tv_sec -= leapsecond; + update_vsyscall(&xtime, clock); +} #ifdef CONFIG_GENERIC_TIME /** -- cgit v1.2.3 From a0f7d48bfb95a4c5172a2756dbc4b82afc8e9ae4 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 14 Aug 2009 15:47:19 +0200 Subject: timekeeping: Remove clocksource inline functions The three inline functions clocksource_read, clocksource_enable and clocksource_disable are simple wrappers of an indirect call plus the copy from and to the mult_orig value. The functions are exclusively used by the timekeeping code which has intimate knowledge of the clocksource anyway. Therefore remove the inline functions. No functional change. Signed-off-by: Martin Schwidefsky Acked-by: John Stultz Cc: Daniel Walker LKML-Reference: <20090814134807.903108946@de.ibm.com> Signed-off-by: Thomas Gleixner --- include/linux/clocksource.h | 58 --------------------------------------------- kernel/time/timekeeping.c | 41 ++++++++++++++++++++++---------- 2 files changed, 28 insertions(+), 71 deletions(-) (limited to 'include') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 1219be4fb42e..a1ef46f61c81 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -267,64 +267,6 @@ static inline u32 clocksource_hz2mult(u32 hz, u32 shift_constant) return (u32)tmp; } -/** - * clocksource_read: - Access the clocksource's current cycle value - * @cs: pointer to clocksource being read - * - * Uses the clocksource to return the current cycle_t value - */ -static inline cycle_t clocksource_read(struct clocksource *cs) -{ - return cs->read(cs); -} - -/** - * clocksource_enable: - enable clocksource - * @cs: pointer to clocksource - * - * Enables the specified clocksource. The clocksource callback - * function should start up the hardware and setup mult and field - * members of struct clocksource to reflect hardware capabilities. - */ -static inline int clocksource_enable(struct clocksource *cs) -{ - int ret = 0; - - if (cs->enable) - ret = cs->enable(cs); - - /* - * The frequency may have changed while the clocksource - * was disabled. If so the code in ->enable() must update - * the mult value to reflect the new frequency. Make sure - * mult_orig follows this change. - */ - cs->mult_orig = cs->mult; - - return ret; -} - -/** - * clocksource_disable: - disable clocksource - * @cs: pointer to clocksource - * - * Disables the specified clocksource. The clocksource callback - * function should power down the now unused hardware block to - * save power. - */ -static inline void clocksource_disable(struct clocksource *cs) -{ - /* - * Save mult_orig in mult so clocksource_enable() can - * restore the value regardless if ->enable() updates - * the value of mult or not. - */ - cs->mult = cs->mult_orig; - - if (cs->disable) - cs->disable(cs); -} - /** * cyc2ns - converts clocksource cycles to nanoseconds * @cs: Pointer to clocksource diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index b8b70fb545fc..016a2591d719 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -79,7 +79,7 @@ static void clocksource_forward_now(void) cycle_t cycle_now, cycle_delta; s64 nsec; - cycle_now = clocksource_read(clock); + cycle_now = clock->read(clock); cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; clock->cycle_last = cycle_now; @@ -114,7 +114,7 @@ void getnstimeofday(struct timespec *ts) *ts = xtime; /* read clocksource: */ - cycle_now = clocksource_read(clock); + cycle_now = clock->read(clock); /* calculate the delta since the last update_wall_time: */ cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; @@ -146,7 +146,7 @@ ktime_t ktime_get(void) nsecs = xtime.tv_nsec + wall_to_monotonic.tv_nsec; /* read clocksource: */ - cycle_now = clocksource_read(clock); + cycle_now = clock->read(clock); /* calculate the delta since the last update_wall_time: */ cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; @@ -186,7 +186,7 @@ void ktime_get_ts(struct timespec *ts) tomono = wall_to_monotonic; /* read clocksource: */ - cycle_now = clocksource_read(clock); + cycle_now = clock->read(clock); /* calculate the delta since the last update_wall_time: */ cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; @@ -274,16 +274,29 @@ static void change_clocksource(void) clocksource_forward_now(); - if (clocksource_enable(new)) + if (new->enable && !new->enable(new)) return; + /* + * The frequency may have changed while the clocksource + * was disabled. If so the code in ->enable() must update + * the mult value to reflect the new frequency. Make sure + * mult_orig follows this change. + */ + new->mult_orig = new->mult; new->raw_time = clock->raw_time; old = clock; clock = new; - clocksource_disable(old); + /* + * Save mult_orig in mult so that the value can be restored + * regardless if ->enable() updates the value of mult or not. + */ + old->mult = old->mult_orig; + if (old->disable) + old->disable(old); clock->cycle_last = 0; - clock->cycle_last = clocksource_read(clock); + clock->cycle_last = clock->read(clock); clock->error = 0; clock->xtime_nsec = 0; clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); @@ -373,7 +386,7 @@ void getrawmonotonic(struct timespec *ts) seq = read_seqbegin(&xtime_lock); /* read clocksource: */ - cycle_now = clocksource_read(clock); + cycle_now = clock->read(clock); /* calculate the delta since the last update_wall_time: */ cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; @@ -435,9 +448,12 @@ void __init timekeeping_init(void) ntp_init(); clock = clocksource_get_next(); - clocksource_enable(clock); + if (clock->enable) + clock->enable(clock); + /* set mult_orig on enable */ + clock->mult_orig = clock->mult; clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); - clock->cycle_last = clocksource_read(clock); + clock->cycle_last = clock->read(clock); xtime.tv_sec = sec; xtime.tv_nsec = 0; @@ -477,8 +493,7 @@ static int timekeeping_resume(struct sys_device *dev) } update_xtime_cache(0); /* re-base the last cycle value */ - clock->cycle_last = 0; - clock->cycle_last = clocksource_read(clock); + clock->cycle_last = clock->read(clock); clock->error = 0; timekeeping_suspended = 0; write_sequnlock_irqrestore(&xtime_lock, flags); @@ -630,7 +645,7 @@ void update_wall_time(void) return; #ifdef CONFIG_GENERIC_TIME - offset = (clocksource_read(clock) - clock->cycle_last) & clock->mask; + offset = (clock->read(clock) - clock->cycle_last) & clock->mask; #else offset = clock->cycle_interval; #endif -- cgit v1.2.3 From f1b82746c1e93daf24e1ab9bfbd39bcdb2e7018b Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 14 Aug 2009 15:47:21 +0200 Subject: clocksource: Cleanup clocksource selection If a non high-resolution clocksource is first set as override clock and then registered it becomes active even if the system is in one-shot mode. Move the override check from sysfs_override_clocksource to the clocksource selection. That fixes the bug and simplifies the code. The check in clocksource_register for double registration of the same clocksource is removed without replacement. To find the initial clocksource a new weak function in jiffies.c is defined that returns the jiffies clocksource. The architecture code can then override the weak function with a more suitable clocksource, e.g. the TOD clock on s390. [ tglx: Folded in a fix from John Stultz ] Signed-off-by: Martin Schwidefsky Acked-by: John Stultz Cc: Daniel Walker LKML-Reference: <20090814134808.388024160@de.ibm.com> Signed-off-by: Thomas Gleixner --- arch/s390/kernel/time.c | 4 ++ include/linux/clocksource.h | 2 + kernel/time/clocksource.c | 134 +++++++++++++++++--------------------------- kernel/time/jiffies.c | 6 +- kernel/time/timekeeping.c | 4 +- 5 files changed, 64 insertions(+), 86 deletions(-) (limited to 'include') diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index d4c8e9c47c81..afefe514df0f 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -205,6 +205,10 @@ static struct clocksource clocksource_tod = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; +struct clocksource * __init clocksource_default_clock(void) +{ + return &clocksource_tod; +} void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) { diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index a1ef46f61c81..f263b3abf46e 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -322,6 +323,7 @@ extern void clocksource_touch_watchdog(void); extern struct clocksource* clocksource_get_next(void); extern void clocksource_change_rating(struct clocksource *cs, int rating); extern void clocksource_resume(void); +extern struct clocksource * __init __weak clocksource_default_clock(void); #ifdef CONFIG_GENERIC_TIME_VSYSCALL extern void update_vsyscall(struct timespec *ts, struct clocksource *c); diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 7466cb811251..e91662e87cde 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -21,7 +21,6 @@ * * TODO WishList: * o Allow clocksource drivers to be unregistered - * o get rid of clocksource_jiffies extern */ #include @@ -107,12 +106,9 @@ u64 timecounter_cyc2time(struct timecounter *tc, } EXPORT_SYMBOL(timecounter_cyc2time); -/* XXX - Would like a better way for initializing curr_clocksource */ -extern struct clocksource clocksource_jiffies; - /*[Clocksource internal variables]--------- * curr_clocksource: - * currently selected clocksource. Initialized to clocksource_jiffies. + * currently selected clocksource. * next_clocksource: * pending next selected clocksource. * clocksource_list: @@ -123,9 +119,8 @@ extern struct clocksource clocksource_jiffies; * override_name: * Name of the user-specified clocksource. */ -static struct clocksource *curr_clocksource = &clocksource_jiffies; +static struct clocksource *curr_clocksource; static struct clocksource *next_clocksource; -static struct clocksource *clocksource_override; static LIST_HEAD(clocksource_list); static DEFINE_SPINLOCK(clocksource_lock); static char override_name[32]; @@ -320,6 +315,7 @@ void clocksource_touch_watchdog(void) clocksource_resume_watchdog(); } +#ifdef CONFIG_GENERIC_TIME /** * clocksource_get_next - Returns the selected clocksource * @@ -339,56 +335,65 @@ struct clocksource *clocksource_get_next(void) } /** - * select_clocksource - Selects the best registered clocksource. + * clocksource_select - Select the best clocksource available * * Private function. Must hold clocksource_lock when called. * * Select the clocksource with the best rating, or the clocksource, * which is selected by userspace override. */ -static struct clocksource *select_clocksource(void) +static void clocksource_select(void) { - struct clocksource *next; + struct clocksource *best, *cs; if (list_empty(&clocksource_list)) - return NULL; + return; + /* First clocksource on the list has the best rating. */ + best = list_first_entry(&clocksource_list, struct clocksource, list); + /* Check for the override clocksource. */ + list_for_each_entry(cs, &clocksource_list, list) { + if (strcmp(cs->name, override_name) != 0) + continue; + /* + * Check to make sure we don't switch to a non-highres + * capable clocksource if the tick code is in oneshot + * mode (highres or nohz) + */ + if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && + tick_oneshot_mode_active()) { + /* Override clocksource cannot be used. */ + printk(KERN_WARNING "Override clocksource %s is not " + "HRT compatible. Cannot switch while in " + "HRT/NOHZ mode\n", cs->name); + override_name[0] = 0; + } else + /* Override clocksource can be used. */ + best = cs; + break; + } + if (curr_clocksource != best) + next_clocksource = best; +} - if (clocksource_override) - next = clocksource_override; - else - next = list_entry(clocksource_list.next, struct clocksource, - list); +#else /* CONFIG_GENERIC_TIME */ - if (next == curr_clocksource) - return NULL; +static void clocksource_select(void) { } - return next; -} +#endif /* * Enqueue the clocksource sorted by rating */ -static int clocksource_enqueue(struct clocksource *c) +static void clocksource_enqueue(struct clocksource *cs) { - struct list_head *tmp, *entry = &clocksource_list; - - list_for_each(tmp, &clocksource_list) { - struct clocksource *cs; + struct list_head *entry = &clocksource_list; + struct clocksource *tmp; - cs = list_entry(tmp, struct clocksource, list); - if (cs == c) - return -EBUSY; + list_for_each_entry(tmp, &clocksource_list, list) /* Keep track of the place, where to insert */ - if (cs->rating >= c->rating) - entry = tmp; - } - list_add(&c->list, entry); - - if (strlen(c->name) == strlen(override_name) && - !strcmp(c->name, override_name)) - clocksource_override = c; - - return 0; + if (tmp->rating >= cs->rating) + entry = &tmp->list; + list_add(&cs->list, entry); } /** @@ -397,19 +402,16 @@ static int clocksource_enqueue(struct clocksource *c) * * Returns -EBUSY if registration fails, zero otherwise. */ -int clocksource_register(struct clocksource *c) +int clocksource_register(struct clocksource *cs) { unsigned long flags; - int ret; spin_lock_irqsave(&clocksource_lock, flags); - ret = clocksource_enqueue(c); - if (!ret) - next_clocksource = select_clocksource(); + clocksource_enqueue(cs); + clocksource_select(); spin_unlock_irqrestore(&clocksource_lock, flags); - if (!ret) - clocksource_check_watchdog(c); - return ret; + clocksource_check_watchdog(cs); + return 0; } EXPORT_SYMBOL(clocksource_register); @@ -425,7 +427,7 @@ void clocksource_change_rating(struct clocksource *cs, int rating) list_del(&cs->list); cs->rating = rating; clocksource_enqueue(cs); - next_clocksource = select_clocksource(); + clocksource_select(); spin_unlock_irqrestore(&clocksource_lock, flags); } @@ -438,9 +440,7 @@ void clocksource_unregister(struct clocksource *cs) spin_lock_irqsave(&clocksource_lock, flags); list_del(&cs->list); - if (clocksource_override == cs) - clocksource_override = NULL; - next_clocksource = select_clocksource(); + clocksource_select(); spin_unlock_irqrestore(&clocksource_lock, flags); } @@ -478,9 +478,7 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev, struct sysdev_attribute *attr, const char *buf, size_t count) { - struct clocksource *ovr = NULL; size_t ret = count; - int len; /* strings from sysfs write are not 0 terminated! */ if (count >= sizeof(override_name)) @@ -495,37 +493,7 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev, if (count > 0) memcpy(override_name, buf, count); override_name[count] = 0; - - len = strlen(override_name); - if (len) { - struct clocksource *cs; - - ovr = clocksource_override; - /* try to select it: */ - list_for_each_entry(cs, &clocksource_list, list) { - if (strlen(cs->name) == len && - !strcmp(cs->name, override_name)) - ovr = cs; - } - } - - /* - * Check to make sure we don't switch to a non-highres capable - * clocksource if the tick code is in oneshot mode (highres or nohz) - */ - if (tick_oneshot_mode_active() && ovr && - !(ovr->flags & CLOCK_SOURCE_VALID_FOR_HRES)) { - printk(KERN_WARNING "%s clocksource is not HRT compatible. " - "Cannot switch while in HRT/NOHZ mode\n", ovr->name); - ovr = NULL; - override_name[0] = 0; - } - - /* Reselect, when the override name has changed */ - if (ovr != clocksource_override) { - clocksource_override = ovr; - next_clocksource = select_clocksource(); - } + clocksource_select(); spin_unlock_irq(&clocksource_lock); diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index c3f6c30816e3..5404a8456909 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c @@ -61,7 +61,6 @@ struct clocksource clocksource_jiffies = { .read = jiffies_read, .mask = 0xffffffff, /*32bits*/ .mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */ - .mult_orig = NSEC_PER_JIFFY << JIFFIES_SHIFT, .shift = JIFFIES_SHIFT, }; @@ -71,3 +70,8 @@ static int __init init_jiffies_clocksource(void) } core_initcall(init_jiffies_clocksource); + +struct clocksource * __init __weak clocksource_default_clock(void) +{ + return &clocksource_jiffies; +} diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index b5673016089f..325a9b63265a 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -269,7 +269,7 @@ static void change_clocksource(void) new = clocksource_get_next(); - if (clock == new) + if (!new || clock == new) return; clocksource_forward_now(); @@ -446,7 +446,7 @@ void __init timekeeping_init(void) ntp_init(); - clock = clocksource_get_next(); + clock = clocksource_default_clock(); if (clock->enable) clock->enable(clock); /* set mult_orig on enable */ -- cgit v1.2.3 From c55c87c892c1875deace0c8fc28787335277fdf2 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 14 Aug 2009 15:47:25 +0200 Subject: clocksource: Move watchdog downgrade to a work queue thread Move the downgrade of an unstable clocksource from the timer interrupt context into the process context of a work queue thread. This is needed to be able to do the clocksource switch with stop_machine. Signed-off-by: Martin Schwidefsky Cc: Ingo Molnar Acked-by: John Stultz Cc: Daniel Walker LKML-Reference: <20090814134809.354926067@de.ibm.com> Signed-off-by: Thomas Gleixner --- include/linux/clocksource.h | 1 + kernel/time/clocksource.c | 56 +++++++++++++++++++++++++++++++-------------- 2 files changed, 40 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index f263b3abf46e..19ad43af62d0 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -213,6 +213,7 @@ extern struct clocksource *clock; /* current clocksource */ #define CLOCK_SOURCE_WATCHDOG 0x10 #define CLOCK_SOURCE_VALID_FOR_HRES 0x20 +#define CLOCK_SOURCE_UNSTABLE 0x40 /* simplify initialization of mask field */ #define CLOCKSOURCE_MASK(bits) (cycle_t)((bits) < 64 ? ((1ULL<<(bits))-1) : -1) diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 56aaa749645d..f1508019bfb4 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -143,10 +143,13 @@ fs_initcall(clocksource_done_booting); static LIST_HEAD(watchdog_list); static struct clocksource *watchdog; static struct timer_list watchdog_timer; +static struct work_struct watchdog_work; static DEFINE_SPINLOCK(watchdog_lock); static cycle_t watchdog_last; static int watchdog_running; +static void clocksource_watchdog_work(struct work_struct *work); + /* * Interval: 0.5sec Threshold: 0.0625s */ @@ -158,15 +161,16 @@ static void clocksource_unstable(struct clocksource *cs, int64_t delta) printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n", cs->name, delta); cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG); - clocksource_change_rating(cs, 0); - list_del(&cs->wd_list); + cs->flags |= CLOCK_SOURCE_UNSTABLE; + schedule_work(&watchdog_work); } static void clocksource_watchdog(unsigned long data) { - struct clocksource *cs, *tmp; + struct clocksource *cs; cycle_t csnow, wdnow; int64_t wd_nsec, cs_nsec; + int next_cpu; spin_lock(&watchdog_lock); if (!watchdog_running) @@ -176,7 +180,12 @@ static void clocksource_watchdog(unsigned long data) wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask); watchdog_last = wdnow; - list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) { + list_for_each_entry(cs, &watchdog_list, wd_list) { + + /* Clocksource already marked unstable? */ + if (cs->flags & CLOCK_SOURCE_UNSTABLE) + continue; + csnow = cs->read(cs); /* Clocksource initialized ? */ @@ -207,19 +216,15 @@ static void clocksource_watchdog(unsigned long data) } } - if (!list_empty(&watchdog_list)) { - /* - * Cycle through CPUs to check if the CPUs stay - * synchronized to each other. - */ - int next_cpu = cpumask_next(raw_smp_processor_id(), - cpu_online_mask); - - if (next_cpu >= nr_cpu_ids) - next_cpu = cpumask_first(cpu_online_mask); - watchdog_timer.expires += WATCHDOG_INTERVAL; - add_timer_on(&watchdog_timer, next_cpu); - } + /* + * Cycle through CPUs to check if the CPUs stay synchronized + * to each other. + */ + next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask); + if (next_cpu >= nr_cpu_ids) + next_cpu = cpumask_first(cpu_online_mask); + watchdog_timer.expires += WATCHDOG_INTERVAL; + add_timer_on(&watchdog_timer, next_cpu); out: spin_unlock(&watchdog_lock); } @@ -228,6 +233,7 @@ static inline void clocksource_start_watchdog(void) { if (watchdog_running || !watchdog || list_empty(&watchdog_list)) return; + INIT_WORK(&watchdog_work, clocksource_watchdog_work); init_timer(&watchdog_timer); watchdog_timer.function = clocksource_watchdog; watchdog_last = watchdog->read(watchdog); @@ -313,6 +319,22 @@ static void clocksource_dequeue_watchdog(struct clocksource *cs) spin_unlock_irqrestore(&watchdog_lock, flags); } +static void clocksource_watchdog_work(struct work_struct *work) +{ + struct clocksource *cs, *tmp; + unsigned long flags; + + spin_lock_irqsave(&watchdog_lock, flags); + list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) + if (cs->flags & CLOCK_SOURCE_UNSTABLE) { + list_del_init(&cs->wd_list); + clocksource_change_rating(cs, 0); + } + /* Check if the watchdog timer needs to be stopped. */ + clocksource_stop_watchdog(); + spin_unlock(&watchdog_lock); +} + #else /* CONFIG_CLOCKSOURCE_WATCHDOG */ static void clocksource_enqueue_watchdog(struct clocksource *cs) -- cgit v1.2.3 From 155ec60226ae0ae2aadaa57c951a58a359331030 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 14 Aug 2009 15:47:26 +0200 Subject: timekeeping: Introduce struct timekeeper Add struct timekeeper to keep the internal values timekeeping.c needs in regard to the currently selected clock source. This moves the timekeeping intervals, xtime_nsec and the ntp error value from struct clocksource to struct timekeeper. The raw_time is removed from the clocksource as well. It gets treated like xtime as a global variable. Eventually xtime raw_time should be moved to struct timekeeper. [ tglx: minor cleanup ] Signed-off-by: Martin Schwidefsky Cc: Ingo Molnar Acked-by: John Stultz Cc: Daniel Walker LKML-Reference: <20090814134809.613209842@de.ibm.com> Signed-off-by: Thomas Gleixner --- arch/s390/kernel/time.c | 1 - include/linux/clocksource.h | 54 +--------- kernel/time/clocksource.c | 6 +- kernel/time/timekeeping.c | 235 +++++++++++++++++++++++++++++--------------- 4 files changed, 164 insertions(+), 132 deletions(-) (limited to 'include') diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index afefe514df0f..e76c2e7a8b9a 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -280,7 +280,6 @@ void __init time_init(void) now = get_clock(); tod_to_timeval(now - TOD_UNIX_EPOCH, &xtime); clocksource_tod.cycle_last = now; - clocksource_tod.raw_time = xtime; tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, &ts); set_normalized_timespec(&wall_to_monotonic, -ts.tv_sec, -ts.tv_nsec); write_sequnlock_irqrestore(&xtime_lock, flags); diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 19ad43af62d0..e12e3095e2fb 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -155,8 +155,6 @@ extern u64 timecounter_cyc2time(struct timecounter *tc, * @flags: flags describing special properties * @vread: vsyscall based read * @resume: resume function for the clocksource, if necessary - * @cycle_interval: Used internally by timekeeping core, please ignore. - * @xtime_interval: Used internally by timekeeping core, please ignore. */ struct clocksource { /* @@ -182,19 +180,12 @@ struct clocksource { #define CLKSRC_FSYS_MMIO_SET(mmio, addr) do { } while (0) #endif - /* timekeeping specific data, ignore */ - cycle_t cycle_interval; - u64 xtime_interval; - u32 raw_interval; /* * Second part is written at each timer interrupt * Keep it in a different cache line to dirty no * more than one cache line. */ cycle_t cycle_last ____cacheline_aligned_in_smp; - u64 xtime_nsec; - s64 error; - struct timespec raw_time; #ifdef CONFIG_CLOCKSOURCE_WATCHDOG /* Watchdog related data, used by the framework */ @@ -203,8 +194,6 @@ struct clocksource { #endif }; -extern struct clocksource *clock; /* current clocksource */ - /* * Clock source flags bits:: */ @@ -270,50 +259,15 @@ static inline u32 clocksource_hz2mult(u32 hz, u32 shift_constant) } /** - * cyc2ns - converts clocksource cycles to nanoseconds - * @cs: Pointer to clocksource - * @cycles: Cycles + * clocksource_cyc2ns - converts clocksource cycles to nanoseconds * - * Uses the clocksource and ntp ajdustment to convert cycle_ts to nanoseconds. + * Converts cycles to nanoseconds, using the given mult and shift. * * XXX - This could use some mult_lxl_ll() asm optimization */ -static inline s64 cyc2ns(struct clocksource *cs, cycle_t cycles) +static inline s64 clocksource_cyc2ns(cycle_t cycles, u32 mult, u32 shift) { - u64 ret = (u64)cycles; - ret = (ret * cs->mult) >> cs->shift; - return ret; -} - -/** - * clocksource_calculate_interval - Calculates a clocksource interval struct - * - * @c: Pointer to clocksource. - * @length_nsec: Desired interval length in nanoseconds. - * - * Calculates a fixed cycle/nsec interval for a given clocksource/adjustment - * pair and interval request. - * - * Unless you're the timekeeping code, you should not be using this! - */ -static inline void clocksource_calculate_interval(struct clocksource *c, - unsigned long length_nsec) -{ - u64 tmp; - - /* Do the ns -> cycle conversion first, using original mult */ - tmp = length_nsec; - tmp <<= c->shift; - tmp += c->mult_orig/2; - do_div(tmp, c->mult_orig); - - c->cycle_interval = (cycle_t)tmp; - if (c->cycle_interval == 0) - c->cycle_interval = 1; - - /* Go back from cycles -> shifted ns, this time use ntp adjused mult */ - c->xtime_interval = (u64)c->cycle_interval * c->mult; - c->raw_interval = ((u64)c->cycle_interval * c->mult_orig) >> c->shift; + return ((u64) cycles * mult) >> shift; } diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index f1508019bfb4..f18c9a6bdcf4 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -177,7 +177,8 @@ static void clocksource_watchdog(unsigned long data) goto out; wdnow = watchdog->read(watchdog); - wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask); + wd_nsec = clocksource_cyc2ns((wdnow - watchdog_last) & watchdog->mask, + watchdog->mult, watchdog->shift); watchdog_last = wdnow; list_for_each_entry(cs, &watchdog_list, wd_list) { @@ -196,7 +197,8 @@ static void clocksource_watchdog(unsigned long data) } /* Check the deviation from the watchdog clocksource. */ - cs_nsec = cyc2ns(cs, (csnow - cs->wd_last) & cs->mask); + cs_nsec = clocksource_cyc2ns((csnow - cs->wd_last) & + cs->mask, cs->mult, cs->shift); cs->wd_last = csnow; if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) { clocksource_unstable(cs, cs_nsec - wd_nsec); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 325a9b63265a..7af45cbf6b13 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -19,6 +19,65 @@ #include #include +/* Structure holding internal timekeeping values. */ +struct timekeeper { + /* Current clocksource used for timekeeping. */ + struct clocksource *clock; + + /* Number of clock cycles in one NTP interval. */ + cycle_t cycle_interval; + /* Number of clock shifted nano seconds in one NTP interval. */ + u64 xtime_interval; + /* Raw nano seconds accumulated per NTP interval. */ + u32 raw_interval; + + /* Clock shifted nano seconds remainder not stored in xtime.tv_nsec. */ + u64 xtime_nsec; + /* Difference between accumulated time and NTP time in ntp + * shifted nano seconds. */ + s64 ntp_error; +}; + +struct timekeeper timekeeper; + +/** + * timekeeper_setup_internals - Set up internals to use clocksource clock. + * + * @clock: Pointer to clocksource. + * + * Calculates a fixed cycle/nsec interval for a given clocksource/adjustment + * pair and interval request. + * + * Unless you're the timekeeping code, you should not be using this! + */ +static void timekeeper_setup_internals(struct clocksource *clock) +{ + cycle_t interval; + u64 tmp; + + timekeeper.clock = clock; + clock->cycle_last = clock->read(clock); + + /* Do the ns -> cycle conversion first, using original mult */ + tmp = NTP_INTERVAL_LENGTH; + tmp <<= clock->shift; + tmp += clock->mult_orig/2; + do_div(tmp, clock->mult_orig); + if (tmp == 0) + tmp = 1; + + interval = (cycle_t) tmp; + timekeeper.cycle_interval = interval; + + /* Go back from cycles -> shifted ns */ + timekeeper.xtime_interval = (u64) interval * clock->mult; + timekeeper.raw_interval = + ((u64) interval * clock->mult_orig) >> clock->shift; + + timekeeper.xtime_nsec = 0; + + timekeeper.ntp_error = 0; +} /* * This read-write spinlock protects us from races in SMP while @@ -46,6 +105,11 @@ struct timespec xtime __attribute__ ((aligned (16))); struct timespec wall_to_monotonic __attribute__ ((aligned (16))); static unsigned long total_sleep_time; /* seconds */ +/* + * The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. + */ +struct timespec raw_time; + /* flag for if timekeeping is suspended */ int __read_mostly timekeeping_suspended; @@ -56,42 +120,42 @@ void update_xtime_cache(u64 nsec) timespec_add_ns(&xtime_cache, nsec); } -struct clocksource *clock; - /* must hold xtime_lock */ void timekeeping_leap_insert(int leapsecond) { xtime.tv_sec += leapsecond; wall_to_monotonic.tv_sec -= leapsecond; - update_vsyscall(&xtime, clock); + update_vsyscall(&xtime, timekeeper.clock); } #ifdef CONFIG_GENERIC_TIME /** - * clocksource_forward_now - update clock to the current time + * timekeeping_forward_now - update clock to the current time * * Forward the current clock to update its state since the last call to * update_wall_time(). This is useful before significant clock changes, * as it avoids having to deal with this time offset explicitly. */ -static void clocksource_forward_now(void) +static void timekeeping_forward_now(void) { cycle_t cycle_now, cycle_delta; + struct clocksource *clock; s64 nsec; + clock = timekeeper.clock; cycle_now = clock->read(clock); cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; clock->cycle_last = cycle_now; - nsec = cyc2ns(clock, cycle_delta); + nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); /* If arch requires, add in gettimeoffset() */ nsec += arch_gettimeoffset(); timespec_add_ns(&xtime, nsec); - nsec = ((s64)cycle_delta * clock->mult_orig) >> clock->shift; - clock->raw_time.tv_nsec += nsec; + nsec = clocksource_cyc2ns(cycle_delta, clock->mult_orig, clock->shift); + timespec_add_ns(&raw_time, nsec); } /** @@ -103,6 +167,7 @@ static void clocksource_forward_now(void) void getnstimeofday(struct timespec *ts) { cycle_t cycle_now, cycle_delta; + struct clocksource *clock; unsigned long seq; s64 nsecs; @@ -114,13 +179,15 @@ void getnstimeofday(struct timespec *ts) *ts = xtime; /* read clocksource: */ + clock = timekeeper.clock; cycle_now = clock->read(clock); /* calculate the delta since the last update_wall_time: */ cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; /* convert to nanoseconds: */ - nsecs = cyc2ns(clock, cycle_delta); + nsecs = clocksource_cyc2ns(cycle_delta, clock->mult, + clock->shift); /* If arch requires, add in gettimeoffset() */ nsecs += arch_gettimeoffset(); @@ -135,6 +202,7 @@ EXPORT_SYMBOL(getnstimeofday); ktime_t ktime_get(void) { cycle_t cycle_now, cycle_delta; + struct clocksource *clock; unsigned int seq; s64 secs, nsecs; @@ -146,13 +214,15 @@ ktime_t ktime_get(void) nsecs = xtime.tv_nsec + wall_to_monotonic.tv_nsec; /* read clocksource: */ + clock = timekeeper.clock; cycle_now = clock->read(clock); /* calculate the delta since the last update_wall_time: */ cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; /* convert to nanoseconds: */ - nsecs += cyc2ns(clock, cycle_delta); + nsecs += clocksource_cyc2ns(cycle_delta, clock->mult, + clock->shift); } while (read_seqretry(&xtime_lock, seq)); /* @@ -174,6 +244,7 @@ EXPORT_SYMBOL_GPL(ktime_get); void ktime_get_ts(struct timespec *ts) { cycle_t cycle_now, cycle_delta; + struct clocksource *clock; struct timespec tomono; unsigned int seq; s64 nsecs; @@ -186,13 +257,15 @@ void ktime_get_ts(struct timespec *ts) tomono = wall_to_monotonic; /* read clocksource: */ + clock = timekeeper.clock; cycle_now = clock->read(clock); /* calculate the delta since the last update_wall_time: */ cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; /* convert to nanoseconds: */ - nsecs = cyc2ns(clock, cycle_delta); + nsecs = clocksource_cyc2ns(cycle_delta, clock->mult, + clock->shift); } while (read_seqretry(&xtime_lock, seq)); @@ -233,7 +306,7 @@ int do_settimeofday(struct timespec *tv) write_seqlock_irqsave(&xtime_lock, flags); - clocksource_forward_now(); + timekeeping_forward_now(); ts_delta.tv_sec = tv->tv_sec - xtime.tv_sec; ts_delta.tv_nsec = tv->tv_nsec - xtime.tv_nsec; @@ -243,10 +316,10 @@ int do_settimeofday(struct timespec *tv) update_xtime_cache(0); - clock->error = 0; + timekeeper.ntp_error = 0; ntp_clear(); - update_vsyscall(&xtime, clock); + update_vsyscall(&xtime, timekeeper.clock); write_sequnlock_irqrestore(&xtime_lock, flags); @@ -269,10 +342,10 @@ static void change_clocksource(void) new = clocksource_get_next(); - if (!new || clock == new) + if (!new || timekeeper.clock == new) return; - clocksource_forward_now(); + timekeeping_forward_now(); if (new->enable && !new->enable(new)) return; @@ -284,9 +357,9 @@ static void change_clocksource(void) */ new->mult_orig = new->mult; - new->raw_time = clock->raw_time; - old = clock; - clock = new; + old = timekeeper.clock; + timekeeper_setup_internals(new); + /* * Save mult_orig in mult so that the value can be restored * regardless if ->enable() updates the value of mult or not. @@ -295,22 +368,10 @@ static void change_clocksource(void) if (old->disable) old->disable(old); - clock->cycle_last = clock->read(clock); - clock->error = 0; - clock->xtime_nsec = 0; - clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); - tick_clock_notify(); - - /* - * We're holding xtime lock and waking up klogd would deadlock - * us on enqueue. So no printing! - printk(KERN_INFO "Time: %s clocksource has been installed.\n", - clock->name); - */ } #else /* GENERIC_TIME */ -static inline void clocksource_forward_now(void) { } +static inline void timekeeping_forward_now(void) { } static inline void change_clocksource(void) { } /** @@ -380,20 +441,23 @@ void getrawmonotonic(struct timespec *ts) unsigned long seq; s64 nsecs; cycle_t cycle_now, cycle_delta; + struct clocksource *clock; do { seq = read_seqbegin(&xtime_lock); /* read clocksource: */ + clock = timekeeper.clock; cycle_now = clock->read(clock); /* calculate the delta since the last update_wall_time: */ cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; /* convert to nanoseconds: */ - nsecs = ((s64)cycle_delta * clock->mult_orig) >> clock->shift; + nsecs = clocksource_cyc2ns(cycle_delta, clock->mult_orig, + clock->shift); - *ts = clock->raw_time; + *ts = raw_time; } while (read_seqretry(&xtime_lock, seq)); @@ -413,7 +477,7 @@ int timekeeping_valid_for_hres(void) do { seq = read_seqbegin(&xtime_lock); - ret = clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; + ret = timekeeper.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; } while (read_seqretry(&xtime_lock, seq)); @@ -439,6 +503,7 @@ unsigned long __attribute__((weak)) read_persistent_clock(void) */ void __init timekeeping_init(void) { + struct clocksource *clock; unsigned long flags; unsigned long sec = read_persistent_clock(); @@ -451,11 +516,13 @@ void __init timekeeping_init(void) clock->enable(clock); /* set mult_orig on enable */ clock->mult_orig = clock->mult; - clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); - clock->cycle_last = clock->read(clock); + + timekeeper_setup_internals(clock); xtime.tv_sec = sec; xtime.tv_nsec = 0; + raw_time.tv_sec = 0; + raw_time.tv_nsec = 0; set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec); update_xtime_cache(0); @@ -492,8 +559,8 @@ static int timekeeping_resume(struct sys_device *dev) } update_xtime_cache(0); /* re-base the last cycle value */ - clock->cycle_last = clock->read(clock); - clock->error = 0; + timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); + timekeeper.ntp_error = 0; timekeeping_suspended = 0; write_sequnlock_irqrestore(&xtime_lock, flags); @@ -514,7 +581,7 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state) timekeeping_suspend_time = read_persistent_clock(); write_seqlock_irqsave(&xtime_lock, flags); - clocksource_forward_now(); + timekeeping_forward_now(); timekeeping_suspended = 1; write_sequnlock_irqrestore(&xtime_lock, flags); @@ -549,7 +616,7 @@ device_initcall(timekeeping_init_device); * If the error is already larger, we look ahead even further * to compensate for late or lost adjustments. */ -static __always_inline int clocksource_bigadjust(s64 error, s64 *interval, +static __always_inline int timekeeping_bigadjust(s64 error, s64 *interval, s64 *offset) { s64 tick_error, i; @@ -565,7 +632,7 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval, * here. This is tuned so that an error of about 1 msec is adjusted * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks). */ - error2 = clock->error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ); + error2 = timekeeper.ntp_error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ); error2 = abs(error2); for (look_ahead = 0; error2 > 0; look_ahead++) error2 >>= 2; @@ -574,8 +641,9 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval, * Now calculate the error in (1 << look_ahead) ticks, but first * remove the single look ahead already included in the error. */ - tick_error = tick_length >> (NTP_SCALE_SHIFT - clock->shift + 1); - tick_error -= clock->xtime_interval >> 1; + tick_error = tick_length >> + (NTP_SCALE_SHIFT - timekeeper.clock->shift + 1); + tick_error -= timekeeper.xtime_interval >> 1; error = ((error - tick_error) >> look_ahead) + tick_error; /* Finally calculate the adjustment shift value. */ @@ -600,18 +668,19 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval, * this is optimized for the most common adjustments of -1,0,1, * for other values we can do a bit more work. */ -static void clocksource_adjust(s64 offset) +static void timekeeping_adjust(s64 offset) { - s64 error, interval = clock->cycle_interval; + s64 error, interval = timekeeper.cycle_interval; int adj; - error = clock->error >> (NTP_SCALE_SHIFT - clock->shift - 1); + error = timekeeper.ntp_error >> + (NTP_SCALE_SHIFT - timekeeper.clock->shift - 1); if (error > interval) { error >>= 2; if (likely(error <= interval)) adj = 1; else - adj = clocksource_bigadjust(error, &interval, &offset); + adj = timekeeping_bigadjust(error, &interval, &offset); } else if (error < -interval) { error >>= 2; if (likely(error >= -interval)) { @@ -619,15 +688,15 @@ static void clocksource_adjust(s64 offset) interval = -interval; offset = -offset; } else - adj = clocksource_bigadjust(error, &interval, &offset); + adj = timekeeping_bigadjust(error, &interval, &offset); } else return; - clock->mult += adj; - clock->xtime_interval += interval; - clock->xtime_nsec -= offset; - clock->error -= (interval - offset) << - (NTP_SCALE_SHIFT - clock->shift); + timekeeper.clock->mult += adj; + timekeeper.xtime_interval += interval; + timekeeper.xtime_nsec -= offset; + timekeeper.ntp_error -= (interval - offset) << + (NTP_SCALE_SHIFT - timekeeper.clock->shift); } /** @@ -637,53 +706,59 @@ static void clocksource_adjust(s64 offset) */ void update_wall_time(void) { + struct clocksource *clock; cycle_t offset; + s64 nsecs; /* Make sure we're fully resumed: */ if (unlikely(timekeeping_suspended)) return; + clock = timekeeper.clock; #ifdef CONFIG_GENERIC_TIME offset = (clock->read(clock) - clock->cycle_last) & clock->mask; #else - offset = clock->cycle_interval; + offset = timekeeper.cycle_interval; #endif - clock->xtime_nsec = (s64)xtime.tv_nsec << clock->shift; + timekeeper.xtime_nsec = (s64)xtime.tv_nsec << clock->shift; /* normally this loop will run just once, however in the * case of lost or late ticks, it will accumulate correctly. */ - while (offset >= clock->cycle_interval) { + while (offset >= timekeeper.cycle_interval) { + u64 nsecps = (u64)NSEC_PER_SEC << clock->shift; + /* accumulate one interval */ - offset -= clock->cycle_interval; - clock->cycle_last += clock->cycle_interval; + offset -= timekeeper.cycle_interval; + clock->cycle_last += timekeeper.cycle_interval; - clock->xtime_nsec += clock->xtime_interval; - if (clock->xtime_nsec >= (u64)NSEC_PER_SEC << clock->shift) { - clock->xtime_nsec -= (u64)NSEC_PER_SEC << clock->shift; + timekeeper.xtime_nsec += timekeeper.xtime_interval; + if (timekeeper.xtime_nsec >= nsecps) { + timekeeper.xtime_nsec -= nsecps; xtime.tv_sec++; second_overflow(); } - clock->raw_time.tv_nsec += clock->raw_interval; - if (clock->raw_time.tv_nsec >= NSEC_PER_SEC) { - clock->raw_time.tv_nsec -= NSEC_PER_SEC; - clock->raw_time.tv_sec++; + raw_time.tv_nsec += timekeeper.raw_interval; + if (raw_time.tv_nsec >= NSEC_PER_SEC) { + raw_time.tv_nsec -= NSEC_PER_SEC; + raw_time.tv_sec++; } /* accumulate error between NTP and clock interval */ - clock->error += tick_length; - clock->error -= clock->xtime_interval << (NTP_SCALE_SHIFT - clock->shift); + timekeeper.ntp_error += tick_length; + timekeeper.ntp_error -= timekeeper.xtime_interval << + (NTP_SCALE_SHIFT - clock->shift); } /* correct the clock when NTP error is too big */ - clocksource_adjust(offset); + timekeeping_adjust(offset); /* * Since in the loop above, we accumulate any amount of time * in xtime_nsec over a second into xtime.tv_sec, its possible for * xtime_nsec to be fairly small after the loop. Further, if we're - * slightly speeding the clocksource up in clocksource_adjust(), + * slightly speeding the clocksource up in timekeeping_adjust(), * its possible the required corrective factor to xtime_nsec could * cause it to underflow. * @@ -695,24 +770,26 @@ void update_wall_time(void) * We'll correct this error next time through this function, when * xtime_nsec is not as small. */ - if (unlikely((s64)clock->xtime_nsec < 0)) { - s64 neg = -(s64)clock->xtime_nsec; - clock->xtime_nsec = 0; - clock->error += neg << (NTP_SCALE_SHIFT - clock->shift); + if (unlikely((s64)timekeeper.xtime_nsec < 0)) { + s64 neg = -(s64)timekeeper.xtime_nsec; + timekeeper.xtime_nsec = 0; + timekeeper.ntp_error += neg << (NTP_SCALE_SHIFT - clock->shift); } /* store full nanoseconds into xtime after rounding it up and * add the remainder to the error difference. */ - xtime.tv_nsec = ((s64)clock->xtime_nsec >> clock->shift) + 1; - clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift; - clock->error += clock->xtime_nsec << (NTP_SCALE_SHIFT - clock->shift); + xtime.tv_nsec = ((s64)timekeeper.xtime_nsec >> clock->shift) + 1; + timekeeper.xtime_nsec -= (s64)xtime.tv_nsec << clock->shift; + timekeeper.ntp_error += timekeeper.xtime_nsec << + (NTP_SCALE_SHIFT - clock->shift); - update_xtime_cache(cyc2ns(clock, offset)); + nsecs = clocksource_cyc2ns(offset, clock->mult, clock->shift); + update_xtime_cache(nsecs); /* check to see if there is a new clocksource to use */ change_clocksource(); - update_vsyscall(&xtime, clock); + update_vsyscall(&xtime, timekeeper.clock); } /** -- cgit v1.2.3 From 0a54419836254a27baecd9037103171bcbabaf67 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 14 Aug 2009 15:47:28 +0200 Subject: timekeeping: Move NTP adjusted clock multiplier to struct timekeeper The clocksource structure has two multipliers, the unmodified multiplier clock->mult_orig and the NTP corrected multiplier clock->mult. The NTP multiplier is misplaced in the struct clocksource, this is private information of the timekeeping code. Add the mult field to the struct timekeeper to contain the NTP corrected value, keep the unmodifed multiplier in clock->mult and remove clock->mult_orig. Signed-off-by: Martin Schwidefsky Cc: Ingo Molnar Acked-by: John Stultz Cc: Daniel Walker LKML-Reference: <20090814134810.149047645@de.ibm.com> Signed-off-by: Thomas Gleixner --- arch/arm/plat-omap/common.c | 7 ++---- include/linux/clocksource.h | 4 +--- kernel/time/timekeeping.c | 53 ++++++++++++++++++++------------------------- 3 files changed, 27 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/arch/arm/plat-omap/common.c b/arch/arm/plat-omap/common.c index ebcf006406f9..95587b6c0259 100644 --- a/arch/arm/plat-omap/common.c +++ b/arch/arm/plat-omap/common.c @@ -253,11 +253,8 @@ static struct clocksource clocksource_32k = { */ unsigned long long sched_clock(void) { - unsigned long long ret; - - ret = (unsigned long long)clocksource_32k.read(&clocksource_32k); - ret = (ret * clocksource_32k.mult_orig) >> clocksource_32k.shift; - return ret; + return clocksource_cyc2ns(clocksource_32k.read(&clocksource_32k), + clocksource_32k.mult, clocksource_32k.shift); } static int __init omap_init_clocksource_32k(void) diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index e12e3095e2fb..e34015effeb6 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -149,8 +149,7 @@ extern u64 timecounter_cyc2time(struct timecounter *tc, * @disable: optional function to disable the clocksource * @mask: bitmask for two's complement * subtraction of non 64 bit counters - * @mult: cycle to nanosecond multiplier (adjusted by NTP) - * @mult_orig: cycle to nanosecond multiplier (unadjusted by NTP) + * @mult: cycle to nanosecond multiplier * @shift: cycle to nanosecond divisor (power of two) * @flags: flags describing special properties * @vread: vsyscall based read @@ -168,7 +167,6 @@ struct clocksource { void (*disable)(struct clocksource *cs); cycle_t mask; u32 mult; - u32 mult_orig; u32 shift; unsigned long flags; cycle_t (*vread)(void); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index dfdab1cefe1e..f4056f6c2632 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -41,6 +41,8 @@ struct timekeeper { /* Shift conversion between clock shifted nano seconds and * ntp shifted nano seconds. */ int ntp_error_shift; + /* NTP adjusted clock multiplier */ + u32 mult; }; struct timekeeper timekeeper; @@ -66,8 +68,8 @@ static void timekeeper_setup_internals(struct clocksource *clock) /* Do the ns -> cycle conversion first, using original mult */ tmp = NTP_INTERVAL_LENGTH; tmp <<= clock->shift; - tmp += clock->mult_orig/2; - do_div(tmp, clock->mult_orig); + tmp += clock->mult/2; + do_div(tmp, clock->mult); if (tmp == 0) tmp = 1; @@ -77,13 +79,20 @@ static void timekeeper_setup_internals(struct clocksource *clock) /* Go back from cycles -> shifted ns */ timekeeper.xtime_interval = (u64) interval * clock->mult; timekeeper.raw_interval = - ((u64) interval * clock->mult_orig) >> clock->shift; + ((u64) interval * clock->mult) >> clock->shift; timekeeper.xtime_nsec = 0; timekeeper.shift = clock->shift; timekeeper.ntp_error = 0; timekeeper.ntp_error_shift = NTP_SCALE_SHIFT - clock->shift; + + /* + * The timekeeper keeps its own mult values for the currently + * active clocksource. These value will be adjusted via NTP + * to counteract clock drifting. + */ + timekeeper.mult = clock->mult; } /* @@ -154,14 +163,15 @@ static void timekeeping_forward_now(void) cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; clock->cycle_last = cycle_now; - nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); + nsec = clocksource_cyc2ns(cycle_delta, timekeeper.mult, + timekeeper.shift); /* If arch requires, add in gettimeoffset() */ nsec += arch_gettimeoffset(); timespec_add_ns(&xtime, nsec); - nsec = clocksource_cyc2ns(cycle_delta, clock->mult_orig, clock->shift); + nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); timespec_add_ns(&raw_time, nsec); } @@ -193,8 +203,8 @@ void getnstimeofday(struct timespec *ts) cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; /* convert to nanoseconds: */ - nsecs = clocksource_cyc2ns(cycle_delta, clock->mult, - clock->shift); + nsecs = clocksource_cyc2ns(cycle_delta, timekeeper.mult, + timekeeper.shift); /* If arch requires, add in gettimeoffset() */ nsecs += arch_gettimeoffset(); @@ -228,8 +238,8 @@ ktime_t ktime_get(void) cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; /* convert to nanoseconds: */ - nsecs += clocksource_cyc2ns(cycle_delta, clock->mult, - clock->shift); + nsecs += clocksource_cyc2ns(cycle_delta, timekeeper.mult, + timekeeper.shift); } while (read_seqretry(&xtime_lock, seq)); /* @@ -271,8 +281,8 @@ void ktime_get_ts(struct timespec *ts) cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; /* convert to nanoseconds: */ - nsecs = clocksource_cyc2ns(cycle_delta, clock->mult, - clock->shift); + nsecs = clocksource_cyc2ns(cycle_delta, timekeeper.mult, + timekeeper.shift); } while (read_seqretry(&xtime_lock, seq)); @@ -356,22 +366,10 @@ static void change_clocksource(void) if (new->enable && !new->enable(new)) return; - /* - * The frequency may have changed while the clocksource - * was disabled. If so the code in ->enable() must update - * the mult value to reflect the new frequency. Make sure - * mult_orig follows this change. - */ - new->mult_orig = new->mult; old = timekeeper.clock; timekeeper_setup_internals(new); - /* - * Save mult_orig in mult so that the value can be restored - * regardless if ->enable() updates the value of mult or not. - */ - old->mult = old->mult_orig; if (old->disable) old->disable(old); @@ -461,7 +459,7 @@ void getrawmonotonic(struct timespec *ts) cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; /* convert to nanoseconds: */ - nsecs = clocksource_cyc2ns(cycle_delta, clock->mult_orig, + nsecs = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); *ts = raw_time; @@ -521,9 +519,6 @@ void __init timekeeping_init(void) clock = clocksource_default_clock(); if (clock->enable) clock->enable(clock); - /* set mult_orig on enable */ - clock->mult_orig = clock->mult; - timekeeper_setup_internals(clock); xtime.tv_sec = sec; @@ -697,7 +692,7 @@ static void timekeeping_adjust(s64 offset) } else return; - timekeeper.clock->mult += adj; + timekeeper.mult += adj; timekeeper.xtime_interval += interval; timekeeper.xtime_nsec -= offset; timekeeper.ntp_error -= (interval - offset) << @@ -789,7 +784,7 @@ void update_wall_time(void) timekeeper.ntp_error += timekeeper.xtime_nsec << timekeeper.ntp_error_shift; - nsecs = clocksource_cyc2ns(offset, clock->mult, clock->shift); + nsecs = clocksource_cyc2ns(offset, timekeeper.mult, timekeeper.shift); update_xtime_cache(nsecs); /* check to see if there is a new clocksource to use */ -- cgit v1.2.3 From 75c5158f70c065b9704b924503d96e8297838f79 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 14 Aug 2009 15:47:30 +0200 Subject: timekeeping: Update clocksource with stop_machine update_wall_time calls change_clocksource HZ times per second to check if a new clock source is available. In close to 100% of all calls there is no new clock. Replace the tick based check by an update done with stop_machine. Signed-off-by: Martin Schwidefsky Cc: Ingo Molnar Acked-by: John Stultz Cc: Daniel Walker LKML-Reference: <20090814134810.711836357@de.ibm.com> Signed-off-by: Thomas Gleixner --- include/linux/clocksource.h | 2 + kernel/time/clocksource.c | 112 +++++++++++++++++--------------------------- kernel/time/timekeeping.c | 41 ++++++++++------ 3 files changed, 72 insertions(+), 83 deletions(-) (limited to 'include') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index e34015effeb6..9ea40ff26f0e 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -291,4 +291,6 @@ static inline void update_vsyscall_tz(void) } #endif +extern void timekeeping_notify(struct clocksource *clock); + #endif /* _LINUX_CLOCKSOURCE_H */ diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index f18c9a6bdcf4..a1657b5fdeb9 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -109,35 +109,17 @@ EXPORT_SYMBOL(timecounter_cyc2time); /*[Clocksource internal variables]--------- * curr_clocksource: * currently selected clocksource. - * next_clocksource: - * pending next selected clocksource. * clocksource_list: * linked list with the registered clocksources - * clocksource_lock: - * protects manipulations to curr_clocksource and next_clocksource - * and the clocksource_list + * clocksource_mutex: + * protects manipulations to curr_clocksource and the clocksource_list * override_name: * Name of the user-specified clocksource. */ static struct clocksource *curr_clocksource; -static struct clocksource *next_clocksource; static LIST_HEAD(clocksource_list); -static DEFINE_SPINLOCK(clocksource_lock); +static DEFINE_MUTEX(clocksource_mutex); static char override_name[32]; -static int finished_booting; - -/* clocksource_done_booting - Called near the end of core bootup - * - * Hack to avoid lots of clocksource churn at boot time. - * We use fs_initcall because we want this to start before - * device_initcall but after subsys_initcall. - */ -static int __init clocksource_done_booting(void) -{ - finished_booting = 1; - return 0; -} -fs_initcall(clocksource_done_booting); #ifdef CONFIG_CLOCKSOURCE_WATCHDOG static LIST_HEAD(watchdog_list); @@ -356,18 +338,16 @@ static inline void clocksource_resume_watchdog(void) { } void clocksource_resume(void) { struct clocksource *cs; - unsigned long flags; - spin_lock_irqsave(&clocksource_lock, flags); + mutex_lock(&clocksource_mutex); - list_for_each_entry(cs, &clocksource_list, list) { + list_for_each_entry(cs, &clocksource_list, list) if (cs->resume) cs->resume(); - } clocksource_resume_watchdog(); - spin_unlock_irqrestore(&clocksource_lock, flags); + mutex_unlock(&clocksource_mutex); } /** @@ -383,28 +363,13 @@ void clocksource_touch_watchdog(void) } #ifdef CONFIG_GENERIC_TIME -/** - * clocksource_get_next - Returns the selected clocksource - * - */ -struct clocksource *clocksource_get_next(void) -{ - unsigned long flags; - spin_lock_irqsave(&clocksource_lock, flags); - if (next_clocksource && finished_booting) { - curr_clocksource = next_clocksource; - next_clocksource = NULL; - } - spin_unlock_irqrestore(&clocksource_lock, flags); - - return curr_clocksource; -} +static int finished_booting; /** * clocksource_select - Select the best clocksource available * - * Private function. Must hold clocksource_lock when called. + * Private function. Must hold clocksource_mutex when called. * * Select the clocksource with the best rating, or the clocksource, * which is selected by userspace override. @@ -413,7 +378,7 @@ static void clocksource_select(void) { struct clocksource *best, *cs; - if (list_empty(&clocksource_list)) + if (!finished_booting || list_empty(&clocksource_list)) return; /* First clocksource on the list has the best rating. */ best = list_first_entry(&clocksource_list, struct clocksource, list); @@ -438,13 +403,31 @@ static void clocksource_select(void) best = cs; break; } - if (curr_clocksource != best) - next_clocksource = best; + if (curr_clocksource != best) { + printk(KERN_INFO "Switching to clocksource %s\n", best->name); + curr_clocksource = best; + timekeeping_notify(curr_clocksource); + } } +/* + * clocksource_done_booting - Called near the end of core bootup + * + * Hack to avoid lots of clocksource churn at boot time. + * We use fs_initcall because we want this to start before + * device_initcall but after subsys_initcall. + */ +static int __init clocksource_done_booting(void) +{ + finished_booting = 1; + clocksource_select(); + return 0; +} +fs_initcall(clocksource_done_booting); + #else /* CONFIG_GENERIC_TIME */ -static void clocksource_select(void) { } +static inline void clocksource_select(void) { } #endif @@ -471,13 +454,11 @@ static void clocksource_enqueue(struct clocksource *cs) */ int clocksource_register(struct clocksource *cs) { - unsigned long flags; - - spin_lock_irqsave(&clocksource_lock, flags); + mutex_lock(&clocksource_mutex); clocksource_enqueue(cs); clocksource_select(); - spin_unlock_irqrestore(&clocksource_lock, flags); clocksource_enqueue_watchdog(cs); + mutex_unlock(&clocksource_mutex); return 0; } EXPORT_SYMBOL(clocksource_register); @@ -487,14 +468,12 @@ EXPORT_SYMBOL(clocksource_register); */ void clocksource_change_rating(struct clocksource *cs, int rating) { - unsigned long flags; - - spin_lock_irqsave(&clocksource_lock, flags); + mutex_lock(&clocksource_mutex); list_del(&cs->list); cs->rating = rating; clocksource_enqueue(cs); clocksource_select(); - spin_unlock_irqrestore(&clocksource_lock, flags); + mutex_unlock(&clocksource_mutex); } EXPORT_SYMBOL(clocksource_change_rating); @@ -503,13 +482,11 @@ EXPORT_SYMBOL(clocksource_change_rating); */ void clocksource_unregister(struct clocksource *cs) { - unsigned long flags; - + mutex_lock(&clocksource_mutex); clocksource_dequeue_watchdog(cs); - spin_lock_irqsave(&clocksource_lock, flags); list_del(&cs->list); clocksource_select(); - spin_unlock_irqrestore(&clocksource_lock, flags); + mutex_unlock(&clocksource_mutex); } EXPORT_SYMBOL(clocksource_unregister); @@ -527,9 +504,9 @@ sysfs_show_current_clocksources(struct sys_device *dev, { ssize_t count = 0; - spin_lock_irq(&clocksource_lock); + mutex_lock(&clocksource_mutex); count = snprintf(buf, PAGE_SIZE, "%s\n", curr_clocksource->name); - spin_unlock_irq(&clocksource_lock); + mutex_unlock(&clocksource_mutex); return count; } @@ -557,14 +534,14 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev, if (buf[count-1] == '\n') count--; - spin_lock_irq(&clocksource_lock); + mutex_lock(&clocksource_mutex); if (count > 0) memcpy(override_name, buf, count); override_name[count] = 0; clocksource_select(); - spin_unlock_irq(&clocksource_lock); + mutex_unlock(&clocksource_mutex); return ret; } @@ -584,7 +561,7 @@ sysfs_show_available_clocksources(struct sys_device *dev, struct clocksource *src; ssize_t count = 0; - spin_lock_irq(&clocksource_lock); + mutex_lock(&clocksource_mutex); list_for_each_entry(src, &clocksource_list, list) { /* * Don't show non-HRES clocksource if the tick code is @@ -596,7 +573,7 @@ sysfs_show_available_clocksources(struct sys_device *dev, max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "%s ", src->name); } - spin_unlock_irq(&clocksource_lock); + mutex_unlock(&clocksource_mutex); count += snprintf(buf + count, max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "\n"); @@ -651,11 +628,10 @@ device_initcall(init_clocksource_sysfs); */ static int __init boot_override_clocksource(char* str) { - unsigned long flags; - spin_lock_irqsave(&clocksource_lock, flags); + mutex_lock(&clocksource_mutex); if (str) strlcpy(override_name, str, sizeof(override_name)); - spin_unlock_irqrestore(&clocksource_lock, flags); + mutex_unlock(&clocksource_mutex); return 1; } diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 27ae01b596b7..41579e7fcf9d 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -18,6 +18,7 @@ #include #include #include +#include /* Structure holding internal timekeeping values. */ struct timekeeper { @@ -179,6 +180,7 @@ void timekeeping_leap_insert(int leapsecond) } #ifdef CONFIG_GENERIC_TIME + /** * timekeeping_forward_now - update clock to the current time * @@ -351,31 +353,40 @@ EXPORT_SYMBOL(do_settimeofday); * * Accumulates current time interval and initializes new clocksource */ -static void change_clocksource(void) +static int change_clocksource(void *data) { struct clocksource *new, *old; - new = clocksource_get_next(); - - if (!new || timekeeper.clock == new) - return; + new = (struct clocksource *) data; timekeeping_forward_now(); + if (!new->enable || new->enable(new) == 0) { + old = timekeeper.clock; + timekeeper_setup_internals(new); + if (old->disable) + old->disable(old); + } + return 0; +} - if (new->enable && !new->enable(new)) +/** + * timekeeping_notify - Install a new clock source + * @clock: pointer to the clock source + * + * This function is called from clocksource.c after a new, better clock + * source has been registered. The caller holds the clocksource_mutex. + */ +void timekeeping_notify(struct clocksource *clock) +{ + if (timekeeper.clock == clock) return; - - old = timekeeper.clock; - timekeeper_setup_internals(new); - - if (old->disable) - old->disable(old); - + stop_machine(change_clocksource, clock, NULL); tick_clock_notify(); } + #else /* GENERIC_TIME */ + static inline void timekeeping_forward_now(void) { } -static inline void change_clocksource(void) { } /** * ktime_get - get the monotonic time in ktime_t format @@ -416,6 +427,7 @@ void ktime_get_ts(struct timespec *ts) ts->tv_nsec + tomono.tv_nsec); } EXPORT_SYMBOL_GPL(ktime_get_ts); + #endif /* !GENERIC_TIME */ /** @@ -773,7 +785,6 @@ void update_wall_time(void) update_xtime_cache(nsecs); /* check to see if there is a new clocksource to use */ - change_clocksource(); update_vsyscall(&xtime, timekeeper.clock); } -- cgit v1.2.3 From d4f587c67fc39e0030ddd718675e252e208da4d7 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 14 Aug 2009 15:47:31 +0200 Subject: timekeeping: Increase granularity of read_persistent_clock() The persistent clock of some architectures (e.g. s390) have a better granularity than seconds. To reduce the delta between the host clock and the guest clock in a virtualized system change the read_persistent_clock function to return a struct timespec. Signed-off-by: Martin Schwidefsky Cc: Ingo Molnar Acked-by: John Stultz Cc: Daniel Walker LKML-Reference: <20090814134811.013873340@de.ibm.com> Signed-off-by: Thomas Gleixner --- arch/m68knommu/kernel/time.c | 5 ++-- arch/mips/dec/time.c | 5 ++-- arch/mips/lasat/ds1603.c | 5 ++-- arch/mips/lasat/sysctl.c | 8 ++++-- arch/mips/lemote/lm2e/setup.c | 5 ++-- arch/mips/mti-malta/malta-time.c | 5 ++-- arch/mips/pmc-sierra/yosemite/setup.c | 5 ++-- arch/mips/sibyte/swarm/setup.c | 15 +++++++--- arch/mips/sni/time.c | 5 ++-- arch/powerpc/kernel/time.c | 7 +++-- arch/s390/kernel/time.c | 22 +++------------ arch/sh/kernel/time.c | 6 ++-- arch/x86/kernel/rtc.c | 5 ++-- arch/xtensa/kernel/time.c | 5 ++-- include/linux/time.h | 2 +- kernel/time/timekeeping.c | 52 +++++++++++++++++++---------------- 16 files changed, 83 insertions(+), 74 deletions(-) (limited to 'include') diff --git a/arch/m68knommu/kernel/time.c b/arch/m68knommu/kernel/time.c index d182b2f72211..68432248515c 100644 --- a/arch/m68knommu/kernel/time.c +++ b/arch/m68knommu/kernel/time.c @@ -72,9 +72,10 @@ static unsigned long read_rtc_mmss(void) return mktime(year, mon, day, hour, min, sec);; } -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { - return read_rtc_mmss(); + ts->tv_sec = read_rtc_mmss(); + ts->tv_nsec = 0; } int update_persistent_clock(struct timespec now) diff --git a/arch/mips/dec/time.c b/arch/mips/dec/time.c index 463136e6685a..02f505f23c32 100644 --- a/arch/mips/dec/time.c +++ b/arch/mips/dec/time.c @@ -18,7 +18,7 @@ #include #include -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { unsigned int year, mon, day, hour, min, sec, real_year; unsigned long flags; @@ -53,7 +53,8 @@ unsigned long read_persistent_clock(void) year += real_year - 72 + 2000; - return mktime(year, mon, day, hour, min, sec); + ts->tv_sec = mktime(year, mon, day, hour, min, sec); + ts->tv_nsec = 0; } /* diff --git a/arch/mips/lasat/ds1603.c b/arch/mips/lasat/ds1603.c index 52cb1436a12a..c6fd96ff118d 100644 --- a/arch/mips/lasat/ds1603.c +++ b/arch/mips/lasat/ds1603.c @@ -135,7 +135,7 @@ static void rtc_end_op(void) lasat_ndelay(1000); } -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { unsigned long word; unsigned long flags; @@ -147,7 +147,8 @@ unsigned long read_persistent_clock(void) rtc_end_op(); spin_unlock_irqrestore(&rtc_lock, flags); - return word; + ts->tv_sec = word; + ts->tv_nsec = 0; } int rtc_mips_set_mmss(unsigned long time) diff --git a/arch/mips/lasat/sysctl.c b/arch/mips/lasat/sysctl.c index 8f88886feb12..3f04d4c406b7 100644 --- a/arch/mips/lasat/sysctl.c +++ b/arch/mips/lasat/sysctl.c @@ -92,10 +92,12 @@ static int rtctmp; int proc_dolasatrtc(ctl_table *table, int write, struct file *filp, void *buffer, size_t *lenp, loff_t *ppos) { + struct timespec ts; int r; if (!write) { - rtctmp = read_persistent_clock(); + read_persistent_clock(&ts); + rtctmp = ts.tv_sec; /* check for time < 0 and set to 0 */ if (rtctmp < 0) rtctmp = 0; @@ -134,9 +136,11 @@ int sysctl_lasat_rtc(ctl_table *table, void *oldval, size_t *oldlenp, void *newval, size_t newlen) { + struct timespec ts; int r; - rtctmp = read_persistent_clock(); + read_persistent_clock(&ts); + rtctmp = ts.tv_sec; if (rtctmp < 0) rtctmp = 0; r = sysctl_intvec(table, oldval, oldlenp, newval, newlen); diff --git a/arch/mips/lemote/lm2e/setup.c b/arch/mips/lemote/lm2e/setup.c index ebd6ceaef2fd..24b355df6127 100644 --- a/arch/mips/lemote/lm2e/setup.c +++ b/arch/mips/lemote/lm2e/setup.c @@ -54,9 +54,10 @@ void __init plat_time_init(void) mips_hpt_frequency = cpu_clock_freq / 2; } -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { - return mc146818_get_cmos_time(); + ts->tv_sec = mc146818_get_cmos_time(); + ts->tv_nsec = 0; } void (*__wbflush)(void); diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c index 0b97d47691fc..3c6f190aa61c 100644 --- a/arch/mips/mti-malta/malta-time.c +++ b/arch/mips/mti-malta/malta-time.c @@ -100,9 +100,10 @@ static unsigned int __init estimate_cpu_frequency(void) return count; } -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { - return mc146818_get_cmos_time(); + ts->tv_sec = mc146818_get_cmos_time(); + ts->tv_nsec = 0; } static void __init plat_perf_setup(void) diff --git a/arch/mips/pmc-sierra/yosemite/setup.c b/arch/mips/pmc-sierra/yosemite/setup.c index 2d3c0dca275d..3498ac9c35af 100644 --- a/arch/mips/pmc-sierra/yosemite/setup.c +++ b/arch/mips/pmc-sierra/yosemite/setup.c @@ -70,7 +70,7 @@ void __init bus_error_init(void) } -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { unsigned int year, month, day, hour, min, sec; unsigned long flags; @@ -92,7 +92,8 @@ unsigned long read_persistent_clock(void) m48t37_base->control = 0x00; spin_unlock_irqrestore(&rtc_lock, flags); - return mktime(year, month, day, hour, min, sec); + ts->tv_sec = mktime(year, month, day, hour, min, sec); + ts->tv_nsec = 0; } int rtc_mips_set_time(unsigned long tim) diff --git a/arch/mips/sibyte/swarm/setup.c b/arch/mips/sibyte/swarm/setup.c index 672e45d495a9..623ffc933c4c 100644 --- a/arch/mips/sibyte/swarm/setup.c +++ b/arch/mips/sibyte/swarm/setup.c @@ -87,19 +87,26 @@ enum swarm_rtc_type { enum swarm_rtc_type swarm_rtc_type; -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { + unsigned long sec; + switch (swarm_rtc_type) { case RTC_XICOR: - return xicor_get_time(); + sec = xicor_get_time(); + break; case RTC_M4LT81: - return m41t81_get_time(); + sec = m41t81_get_time(); + break; case RTC_NONE: default: - return mktime(2000, 1, 1, 0, 0, 0); + sec = mktime(2000, 1, 1, 0, 0, 0); + break; } + ts->tv_sec = sec; + tv->tv_nsec = 0; } int rtc_mips_set_time(unsigned long sec) diff --git a/arch/mips/sni/time.c b/arch/mips/sni/time.c index 0d9ec1a5c24a..62df6a598e0a 100644 --- a/arch/mips/sni/time.c +++ b/arch/mips/sni/time.c @@ -182,7 +182,8 @@ void __init plat_time_init(void) setup_pit_timer(); } -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { - return -1; + ts->tv_sec = -1; + ts->tv_nsec = 0; } diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index eae4511ceeac..ad63f30fe3da 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -769,7 +769,7 @@ int update_persistent_clock(struct timespec now) return ppc_md.set_rtc_time(&tm); } -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { struct rtc_time tm; static int first = 1; @@ -787,8 +787,9 @@ unsigned long read_persistent_clock(void) if (!ppc_md.get_rtc_time) return 0; ppc_md.get_rtc_time(&tm); - return mktime(tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday, - tm.tm_hour, tm.tm_min, tm.tm_sec); + ts->tv_sec = mktime(tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec); + ts->tv_nsec = 0; } /* clocksource code */ diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index e76c2e7a8b9a..a94ec48587b4 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -182,12 +182,9 @@ static void timing_alert_interrupt(__u16 code) static void etr_reset(void); static void stp_reset(void); -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { - struct timespec ts; - - tod_to_timeval(get_clock() - TOD_UNIX_EPOCH, &ts); - return ts.tv_sec; + tod_to_timeval(get_clock() - TOD_UNIX_EPOCH, ts); } static cycle_t read_tod_clock(struct clocksource *cs) @@ -248,7 +245,6 @@ void __init time_init(void) { struct timespec ts; unsigned long flags; - cycle_t now; /* Reset time synchronization interfaces. */ etr_reset(); @@ -266,20 +262,10 @@ void __init time_init(void) panic("Could not register TOD clock source"); /* - * The TOD clock is an accurate clock. The xtime should be - * initialized in a way that the difference between TOD and - * xtime is reasonably small. Too bad that timekeeping_init - * sets xtime.tv_nsec to zero. In addition the clock source - * change from the jiffies clock source to the TOD clock - * source add another error of up to 1/HZ second. The same - * function sets wall_to_monotonic to a value that is too - * small for /proc/uptime to be accurate. - * Reset xtime and wall_to_monotonic to sane values. + * Reset wall_to_monotonic to the initial timestamp created + * in head.S to get a precise value in /proc/uptime. */ write_seqlock_irqsave(&xtime_lock, flags); - now = get_clock(); - tod_to_timeval(now - TOD_UNIX_EPOCH, &xtime); - clocksource_tod.cycle_last = now; tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, &ts); set_normalized_timespec(&wall_to_monotonic, -ts.tv_sec, -ts.tv_nsec); write_sequnlock_irqrestore(&xtime_lock, flags); diff --git a/arch/sh/kernel/time.c b/arch/sh/kernel/time.c index 9b352a1e3fb4..3f4706aa975e 100644 --- a/arch/sh/kernel/time.c +++ b/arch/sh/kernel/time.c @@ -39,11 +39,9 @@ void (*rtc_sh_get_time)(struct timespec *) = null_rtc_get_time; int (*rtc_sh_set_time)(const time_t) = null_rtc_set_time; #ifdef CONFIG_GENERIC_CMOS_UPDATE -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { - struct timespec tv; - rtc_sh_get_time(&tv); - return tv.tv_sec; + rtc_sh_get_time(&ts); } int update_persistent_clock(struct timespec now) diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 5d465b207e72..bf67dcb4a44c 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -178,7 +178,7 @@ static int set_rtc_mmss(unsigned long nowtime) } /* not static: needed by APM */ -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { unsigned long retval, flags; @@ -186,7 +186,8 @@ unsigned long read_persistent_clock(void) retval = get_wallclock(); spin_unlock_irqrestore(&rtc_lock, flags); - return retval; + ts->tv_sec = retval; + ts->tv_nsec = 0; } int update_persistent_clock(struct timespec now) diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c index 8848120d291b..19085ff0484a 100644 --- a/arch/xtensa/kernel/time.c +++ b/arch/xtensa/kernel/time.c @@ -59,9 +59,8 @@ static struct irqaction timer_irqaction = { void __init time_init(void) { - xtime.tv_nsec = 0; - xtime.tv_sec = read_persistent_clock(); - + /* FIXME: xtime&wall_to_monotonic are set in timekeeping_init. */ + read_persistent_clock(&xtime); set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec); diff --git a/include/linux/time.h b/include/linux/time.h index e7c844558884..53a3216f0d1b 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -101,7 +101,7 @@ extern struct timespec xtime; extern struct timespec wall_to_monotonic; extern seqlock_t xtime_lock; -extern unsigned long read_persistent_clock(void); +extern void read_persistent_clock(struct timespec *ts); extern int update_persistent_clock(struct timespec now); extern int no_sync_cmos_clock __read_mostly; void timekeeping_init(void); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 41579e7fcf9d..f1a21ce491e6 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -154,7 +154,7 @@ __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock); */ struct timespec xtime __attribute__ ((aligned (16))); struct timespec wall_to_monotonic __attribute__ ((aligned (16))); -static unsigned long total_sleep_time; /* seconds */ +static struct timespec total_sleep_time; /* * The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. @@ -487,17 +487,18 @@ int timekeeping_valid_for_hres(void) } /** - * read_persistent_clock - Return time in seconds from the persistent clock. + * read_persistent_clock - Return time from the persistent clock. * * Weak dummy function for arches that do not yet support it. - * Returns seconds from epoch using the battery backed persistent clock. - * Returns zero if unsupported. + * Reads the time from the battery backed persistent clock. + * Returns a timespec with tv_sec=0 and tv_nsec=0 if unsupported. * * XXX - Do be sure to remove it once all arches implement it. */ -unsigned long __attribute__((weak)) read_persistent_clock(void) +void __attribute__((weak)) read_persistent_clock(struct timespec *ts) { - return 0; + ts->tv_sec = 0; + ts->tv_nsec = 0; } /* @@ -507,7 +508,9 @@ void __init timekeeping_init(void) { struct clocksource *clock; unsigned long flags; - unsigned long sec = read_persistent_clock(); + struct timespec now; + + read_persistent_clock(&now); write_seqlock_irqsave(&xtime_lock, flags); @@ -518,19 +521,20 @@ void __init timekeeping_init(void) clock->enable(clock); timekeeper_setup_internals(clock); - xtime.tv_sec = sec; - xtime.tv_nsec = 0; + xtime.tv_sec = now.tv_sec; + xtime.tv_nsec = now.tv_nsec; raw_time.tv_sec = 0; raw_time.tv_nsec = 0; set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec); update_xtime_cache(0); - total_sleep_time = 0; + total_sleep_time.tv_sec = 0; + total_sleep_time.tv_nsec = 0; write_sequnlock_irqrestore(&xtime_lock, flags); } /* time in seconds when suspend began */ -static unsigned long timekeeping_suspend_time; +static struct timespec timekeeping_suspend_time; /** * timekeeping_resume - Resumes the generic timekeeping subsystem. @@ -543,18 +547,19 @@ static unsigned long timekeeping_suspend_time; static int timekeeping_resume(struct sys_device *dev) { unsigned long flags; - unsigned long now = read_persistent_clock(); + struct timespec ts; + + read_persistent_clock(&ts); clocksource_resume(); write_seqlock_irqsave(&xtime_lock, flags); - if (now && (now > timekeeping_suspend_time)) { - unsigned long sleep_length = now - timekeeping_suspend_time; - - xtime.tv_sec += sleep_length; - wall_to_monotonic.tv_sec -= sleep_length; - total_sleep_time += sleep_length; + if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) { + ts = timespec_sub(ts, timekeeping_suspend_time); + xtime = timespec_add_safe(xtime, ts); + wall_to_monotonic = timespec_sub(wall_to_monotonic, ts); + total_sleep_time = timespec_add_safe(total_sleep_time, ts); } update_xtime_cache(0); /* re-base the last cycle value */ @@ -577,7 +582,7 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state) { unsigned long flags; - timekeeping_suspend_time = read_persistent_clock(); + read_persistent_clock(&timekeeping_suspend_time); write_seqlock_irqsave(&xtime_lock, flags); timekeeping_forward_now(); @@ -801,9 +806,10 @@ void update_wall_time(void) */ void getboottime(struct timespec *ts) { - set_normalized_timespec(ts, - - (wall_to_monotonic.tv_sec + total_sleep_time), - - wall_to_monotonic.tv_nsec); + struct timespec boottime; + + boottime = timespec_add_safe(wall_to_monotonic, total_sleep_time); + set_normalized_timespec(ts, -boottime.tv_sec, -boottime.tv_nsec); } /** @@ -812,7 +818,7 @@ void getboottime(struct timespec *ts) */ void monotonic_to_bootbased(struct timespec *ts) { - ts->tv_sec += total_sleep_time; + *ts = timespec_add_safe(*ts, total_sleep_time); } unsigned long get_seconds(void) -- cgit v1.2.3 From 23970e389e9cee43c4b41023935e1417271708b2 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 14 Aug 2009 15:47:32 +0200 Subject: timekeeping: Introduce read_boot_clock Add the new function read_boot_clock to get the exact time the system has been started. For architectures without support for exact boot time a new weak function is added that returns 0. Use the exact boot time to initialize wall_to_monotonic, or xtime if the read_boot_clock returned 0. Signed-off-by: Martin Schwidefsky Cc: Ingo Molnar Acked-by: John Stultz Cc: Daniel Walker LKML-Reference: <20090814134811.296703241@de.ibm.com> Signed-off-by: Thomas Gleixner --- arch/s390/kernel/time.c | 17 +++++------------ include/linux/time.h | 1 + kernel/time/timekeeping.c | 24 ++++++++++++++++++++++-- 3 files changed, 28 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index a94ec48587b4..6bff1a1d9060 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -187,6 +187,11 @@ void read_persistent_clock(struct timespec *ts) tod_to_timeval(get_clock() - TOD_UNIX_EPOCH, ts); } +void read_boot_clock(struct timespec *ts) +{ + tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, ts); +} + static cycle_t read_tod_clock(struct clocksource *cs) { return get_clock(); @@ -243,9 +248,6 @@ void update_vsyscall_tz(void) */ void __init time_init(void) { - struct timespec ts; - unsigned long flags; - /* Reset time synchronization interfaces. */ etr_reset(); stp_reset(); @@ -261,15 +263,6 @@ void __init time_init(void) if (clocksource_register(&clocksource_tod) != 0) panic("Could not register TOD clock source"); - /* - * Reset wall_to_monotonic to the initial timestamp created - * in head.S to get a precise value in /proc/uptime. - */ - write_seqlock_irqsave(&xtime_lock, flags); - tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, &ts); - set_normalized_timespec(&wall_to_monotonic, -ts.tv_sec, -ts.tv_nsec); - write_sequnlock_irqrestore(&xtime_lock, flags); - /* Enable TOD clock interrupts on the boot cpu. */ init_cpu_timer(); diff --git a/include/linux/time.h b/include/linux/time.h index 53a3216f0d1b..f505988398e6 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -102,6 +102,7 @@ extern struct timespec wall_to_monotonic; extern seqlock_t xtime_lock; extern void read_persistent_clock(struct timespec *ts); +extern void read_boot_clock(struct timespec *ts); extern int update_persistent_clock(struct timespec now); extern int no_sync_cmos_clock __read_mostly; void timekeeping_init(void); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index f1a21ce491e6..15e06defca55 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -501,6 +501,21 @@ void __attribute__((weak)) read_persistent_clock(struct timespec *ts) ts->tv_nsec = 0; } +/** + * read_boot_clock - Return time of the system start. + * + * Weak dummy function for arches that do not yet support it. + * Function to read the exact time the system has been started. + * Returns a timespec with tv_sec=0 and tv_nsec=0 if unsupported. + * + * XXX - Do be sure to remove it once all arches implement it. + */ +void __attribute__((weak)) read_boot_clock(struct timespec *ts) +{ + ts->tv_sec = 0; + ts->tv_nsec = 0; +} + /* * timekeeping_init - Initializes the clocksource and common timekeeping values */ @@ -508,9 +523,10 @@ void __init timekeeping_init(void) { struct clocksource *clock; unsigned long flags; - struct timespec now; + struct timespec now, boot; read_persistent_clock(&now); + read_boot_clock(&boot); write_seqlock_irqsave(&xtime_lock, flags); @@ -525,8 +541,12 @@ void __init timekeeping_init(void) xtime.tv_nsec = now.tv_nsec; raw_time.tv_sec = 0; raw_time.tv_nsec = 0; + if (boot.tv_sec == 0 && boot.tv_nsec == 0) { + boot.tv_sec = xtime.tv_sec; + boot.tv_nsec = xtime.tv_nsec; + } set_normalized_timespec(&wall_to_monotonic, - -xtime.tv_sec, -xtime.tv_nsec); + -boot.tv_sec, -boot.tv_nsec); update_xtime_cache(0); total_sleep_time.tv_sec = 0; total_sleep_time.tv_nsec = 0; -- cgit v1.2.3 From da15cfdae03351c689736f8d142618592e3cebc3 Mon Sep 17 00:00:00 2001 From: john stultz Date: Wed, 19 Aug 2009 19:13:34 -0700 Subject: time: Introduce CLOCK_REALTIME_COARSE After talking with some application writers who want very fast, but not fine-grained timestamps, I decided to try to implement new clock_ids to clock_gettime(): CLOCK_REALTIME_COARSE and CLOCK_MONOTONIC_COARSE which returns the time at the last tick. This is very fast as we don't have to access any hardware (which can be very painful if you're using something like the acpi_pm clocksource), and we can even use the vdso clock_gettime() method to avoid the syscall. The only trade off is you only get low-res tick grained time resolution. This isn't a new idea, I know Ingo has a patch in the -rt tree that made the vsyscall gettimeofday() return coarse grained time when the vsyscall64 sysctrl was set to 2. However this affects all applications on a system. With this method, applications can choose the proper speed/granularity trade-off for themselves. Signed-off-by: John Stultz Cc: Andi Kleen Cc: nikolag@ca.ibm.com Cc: Darren Hart Cc: arjan@infradead.org Cc: jonathan@jonmasters.org LKML-Reference: <1250734414.6897.5.camel@localhost.localdomain> Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/vgtod.h | 1 + arch/x86/kernel/vsyscall_64.c | 1 + arch/x86/vdso/vclock_gettime.c | 39 ++++++++++++++++++++++++++++++++++++--- include/linux/time.h | 4 ++++ kernel/posix-timers.c | 35 +++++++++++++++++++++++++++++++++++ kernel/time/timekeeping.c | 21 +++++++++++++++++++++ 6 files changed, 98 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index dc27a69e5d2a..3d61e204826f 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -21,6 +21,7 @@ struct vsyscall_gtod_data { u32 shift; } clock; struct timespec wall_to_monotonic; + struct timespec wall_time_coarse; }; extern struct vsyscall_gtod_data __vsyscall_gtod_data __section_vsyscall_gtod_data; diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 25ee06a80aad..cf53a78e2dcf 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -87,6 +87,7 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic; + vsyscall_gtod_data.wall_time_coarse = __current_kernel_time(); write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); } diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 6a40b78b46aa..ee55754cc3c5 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -86,14 +86,47 @@ notrace static noinline int do_monotonic(struct timespec *ts) return 0; } +notrace static noinline int do_realtime_coarse(struct timespec *ts) +{ + unsigned long seq; + do { + seq = read_seqbegin(>od->lock); + ts->tv_sec = gtod->wall_time_coarse.tv_sec; + ts->tv_nsec = gtod->wall_time_coarse.tv_nsec; + } while (unlikely(read_seqretry(>od->lock, seq))); + return 0; +} + +notrace static noinline int do_monotonic_coarse(struct timespec *ts) +{ + unsigned long seq, ns, secs; + do { + seq = read_seqbegin(>od->lock); + secs = gtod->wall_time_coarse.tv_sec; + ns = gtod->wall_time_coarse.tv_nsec; + secs += gtod->wall_to_monotonic.tv_sec; + ns += gtod->wall_to_monotonic.tv_nsec; + } while (unlikely(read_seqretry(>od->lock, seq))); + vset_normalized_timespec(ts, secs, ns); + return 0; +} + notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) { - if (likely(gtod->sysctl_enabled && gtod->clock.vread)) + if (likely(gtod->sysctl_enabled)) switch (clock) { case CLOCK_REALTIME: - return do_realtime(ts); + if (likely(gtod->clock.vread)) + return do_realtime(ts); + break; case CLOCK_MONOTONIC: - return do_monotonic(ts); + if (likely(gtod->clock.vread)) + return do_monotonic(ts); + break; + case CLOCK_REALTIME_COARSE: + return do_realtime_coarse(ts); + case CLOCK_MONOTONIC_COARSE: + return do_monotonic_coarse(ts); } return vdso_fallback_gettime(clock, ts); } diff --git a/include/linux/time.h b/include/linux/time.h index f505988398e6..256232f7e5e6 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -110,6 +110,8 @@ extern int timekeeping_suspended; unsigned long get_seconds(void); struct timespec current_kernel_time(void); +struct timespec __current_kernel_time(void); /* does not hold xtime_lock */ +struct timespec get_monotonic_coarse(void); #define CURRENT_TIME (current_kernel_time()) #define CURRENT_TIME_SEC ((struct timespec) { get_seconds(), 0 }) @@ -243,6 +245,8 @@ struct itimerval { #define CLOCK_PROCESS_CPUTIME_ID 2 #define CLOCK_THREAD_CPUTIME_ID 3 #define CLOCK_MONOTONIC_RAW 4 +#define CLOCK_REALTIME_COARSE 5 +#define CLOCK_MONOTONIC_COARSE 6 /* * The IDs of various hardware clocks: diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index d089d052c4a9..495440779ce3 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -242,6 +242,25 @@ static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec *tp) return 0; } + +static int posix_get_realtime_coarse(clockid_t which_clock, struct timespec *tp) +{ + *tp = current_kernel_time(); + return 0; +} + +static int posix_get_monotonic_coarse(clockid_t which_clock, + struct timespec *tp) +{ + *tp = get_monotonic_coarse(); + return 0; +} + +int posix_get_coarse_res(const clockid_t which_clock, struct timespec *tp) +{ + *tp = ktime_to_timespec(KTIME_LOW_RES); + return 0; +} /* * Initialize everything, well, just everything in Posix clocks/timers ;) */ @@ -262,10 +281,26 @@ static __init int init_posix_timers(void) .timer_create = no_timer_create, .nsleep = no_nsleep, }; + struct k_clock clock_realtime_coarse = { + .clock_getres = posix_get_coarse_res, + .clock_get = posix_get_realtime_coarse, + .clock_set = do_posix_clock_nosettime, + .timer_create = no_timer_create, + .nsleep = no_nsleep, + }; + struct k_clock clock_monotonic_coarse = { + .clock_getres = posix_get_coarse_res, + .clock_get = posix_get_monotonic_coarse, + .clock_set = do_posix_clock_nosettime, + .timer_create = no_timer_create, + .nsleep = no_nsleep, + }; register_posix_clock(CLOCK_REALTIME, &clock_realtime); register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic); register_posix_clock(CLOCK_MONOTONIC_RAW, &clock_monotonic_raw); + register_posix_clock(CLOCK_REALTIME_COARSE, &clock_realtime_coarse); + register_posix_clock(CLOCK_MONOTONIC_COARSE, &clock_monotonic_coarse); posix_timers_cache = kmem_cache_create("posix_timers_cache", sizeof (struct k_itimer), 0, SLAB_PANIC, diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 15e06defca55..03cbeb34d141 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -847,6 +847,10 @@ unsigned long get_seconds(void) } EXPORT_SYMBOL(get_seconds); +struct timespec __current_kernel_time(void) +{ + return xtime_cache; +} struct timespec current_kernel_time(void) { @@ -862,3 +866,20 @@ struct timespec current_kernel_time(void) return now; } EXPORT_SYMBOL(current_kernel_time); + +struct timespec get_monotonic_coarse(void) +{ + struct timespec now, mono; + unsigned long seq; + + do { + seq = read_seqbegin(&xtime_lock); + + now = xtime_cache; + mono = wall_to_monotonic; + } while (read_seqretry(&xtime_lock, seq)); + + set_normalized_timespec(&now, now.tv_sec + mono.tv_sec, + now.tv_nsec + mono.tv_nsec); + return now; +} -- cgit v1.2.3 From 7285dd7fd375763bfb8ab1ac9cf3f1206f503c16 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 28 Aug 2009 20:25:24 +0200 Subject: clocksource: Resolve cpu hotplug dead lock with TSC unstable Martin Schwidefsky analyzed it: To register a clocksource the clocksource_mutex is acquired and if necessary timekeeping_notify is called to install the clocksource as the timekeeper clock. timekeeping_notify uses stop_machine which needs to take cpu_add_remove_lock mutex. Starting a new cpu is done with the cpu_add_remove_lock mutex held. native_cpu_up checks the tsc of the new cpu and if the tsc is no good clocksource_change_rating is called. Which needs the clocksource_mutex and the deadlock is complete. The solution is to replace the TSC via the clocksource watchdog mechanism. Mark the TSC as unstable and schedule the watchdog work so it gets removed in the watchdog thread context. Signed-off-by: Thomas Gleixner LKML-Reference: Cc: Martin Schwidefsky Cc: John Stultz --- arch/x86/kernel/tsc.c | 8 +++++--- include/linux/clocksource.h | 1 + kernel/time/clocksource.c | 33 ++++++++++++++++++++++++++++++--- 3 files changed, 36 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 968425422c46..fc3672a303d6 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -767,12 +767,14 @@ void mark_tsc_unstable(char *reason) { if (!tsc_unstable) { tsc_unstable = 1; - printk("Marking TSC unstable due to %s\n", reason); + printk(KERN_INFO "Marking TSC unstable due to %s\n", reason); /* Change only the rating, when not registered */ if (clocksource_tsc.mult) - clocksource_change_rating(&clocksource_tsc, 0); - else + clocksource_mark_unstable(&clocksource_tsc); + else { + clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE; clocksource_tsc.rating = 0; + } } } diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 9ea40ff26f0e..83d2fbd81b93 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -277,6 +277,7 @@ extern struct clocksource* clocksource_get_next(void); extern void clocksource_change_rating(struct clocksource *cs, int rating); extern void clocksource_resume(void); extern struct clocksource * __init __weak clocksource_default_clock(void); +extern void clocksource_mark_unstable(struct clocksource *cs); #ifdef CONFIG_GENERIC_TIME_VSYSCALL extern void update_vsyscall(struct timespec *ts, struct clocksource *c); diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index e0c86ad6e9fb..a0af4ffcb6e5 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -149,15 +149,42 @@ static void clocksource_watchdog_work(struct work_struct *work) kthread_run(clocksource_watchdog_kthread, NULL, "kwatchdog"); } -static void clocksource_unstable(struct clocksource *cs, int64_t delta) +static void __clocksource_unstable(struct clocksource *cs) { - printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n", - cs->name, delta); cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG); cs->flags |= CLOCK_SOURCE_UNSTABLE; schedule_work(&watchdog_work); } +static void clocksource_unstable(struct clocksource *cs, int64_t delta) +{ + printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n", + cs->name, delta); + __clocksource_unstable(cs); +} + +/** + * clocksource_mark_unstable - mark clocksource unstable via watchdog + * @cs: clocksource to be marked unstable + * + * This function is called instead of clocksource_change_rating from + * cpu hotplug code to avoid a deadlock between the clocksource mutex + * and the cpu hotplug mutex. It defers the update of the clocksource + * to the watchdog thread. + */ +void clocksource_mark_unstable(struct clocksource *cs) +{ + unsigned long flags; + + spin_lock_irqsave(&watchdog_lock, flags); + if (!(cs->flags & CLOCK_SOURCE_UNSTABLE)) { + if (list_empty(&cs->wd_list)) + list_add(&cs->wd_list, &watchdog_list); + __clocksource_unstable(cs); + } + spin_unlock_irqrestore(&watchdog_lock, flags); +} + static void clocksource_watchdog(unsigned long data) { struct clocksource *cs; -- cgit v1.2.3 From e500011ffa191d662ac64d4ada6a5187b3180e16 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 30 Aug 2009 13:19:12 -0700 Subject: timers: Drop a function prototype Drop prototype for non-existent next_timer_interrupt() function. Signed-off-by: Randy Dunlap Cc: akpm LKML-Reference: <4A9ADEC0.70306@oracle.com> Signed-off-by: Thomas Gleixner --- include/linux/timer.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/timer.h b/include/linux/timer.h index be62ec2ebea5..a2d1eb6cb3f0 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -173,11 +173,6 @@ extern int mod_timer_pinned(struct timer_list *timer, unsigned long expires); */ #define NEXT_TIMER_MAX_DELTA ((1UL << 30) - 1) -/* - * Return when the next timer-wheel timeout occurs (in absolute jiffies), - * locks the timer base: - */ -extern unsigned long next_timer_interrupt(void); /* * Return when the next timer-wheel timeout occurs (in absolute jiffies), * locks the timer base and does the comparison against the given -- cgit v1.2.3 From 12e09337fe238981cb0c87543306e23775d1a143 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 14 Sep 2009 23:37:40 +0200 Subject: time: Prevent 32 bit overflow with set_normalized_timespec() set_normalized_timespec() nsec argument is of type long. The recent timekeeping changes of ktime_get_ts() feed ts->tv_nsec + tomono.tv_nsec + nsecs to set_normalized_timespec(). On 32 bit machines that sum can be larger than (1 << 31) and therefor result in a negative value which screws up the result completely. Make the nsec argument of set_normalized_timespec() s64 to fix the problem at hand. This also prevents similar problems for future users of set_normalized_timespec(). Signed-off-by: Thomas Gleixner Tested-by: Carsten Emde LKML-Reference: Cc: Martin Schwidefsky Cc: John Stultz --- include/linux/time.h | 2 +- kernel/time.c | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/time.h b/include/linux/time.h index 256232f7e5e6..56787c093345 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -75,7 +75,7 @@ extern unsigned long mktime(const unsigned int year, const unsigned int mon, const unsigned int day, const unsigned int hour, const unsigned int min, const unsigned int sec); -extern void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec); +extern void set_normalized_timespec(struct timespec *ts, time_t sec, s64 nsec); extern struct timespec timespec_add_safe(const struct timespec lhs, const struct timespec rhs); diff --git a/kernel/time.c b/kernel/time.c index 29511943871a..2e2e469a7fec 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -370,13 +370,20 @@ EXPORT_SYMBOL(mktime); * 0 <= tv_nsec < NSEC_PER_SEC * For negative values only the tv_sec field is negative ! */ -void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec) +void set_normalized_timespec(struct timespec *ts, time_t sec, s64 nsec) { while (nsec >= NSEC_PER_SEC) { + /* + * The following asm() prevents the compiler from + * optimising this loop into a modulo operation. See + * also __iter_div_u64_rem() in include/linux/time.h + */ + asm("" : "+rm"(nsec)); nsec -= NSEC_PER_SEC; ++sec; } while (nsec < 0) { + asm("" : "+rm"(nsec)); nsec += NSEC_PER_SEC; --sec; } -- cgit v1.2.3