From 41d2e494937715d3150e5c75d01f0e75ae899337 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 13 Nov 2009 17:05:44 +0100 Subject: hrtimer: Tune hrtimer_interrupt hang logic The hrtimer_interrupt hang logic adjusts min_delta_ns based on the execution time of the hrtimer callbacks. This is error-prone for virtual machines, where a guest vcpu can be scheduled out during the execution of the callbacks (and the callbacks themselves can do operations that translate to blocking operations in the hypervisor), which in can lead to large min_delta_ns rendering the system unusable. Replace the current heuristics with something more reliable. Allow the interrupt code to try 3 times to catch up with the lost time. If that fails use the total time spent in the interrupt handler to defer the next timer interrupt so the system can catch up with other things which got delayed. Limit that deferment to 100ms. The retry events and the maximum time spent in the interrupt handler are recorded and exposed via /proc/timer_list Inspired by a patch from Marcelo. Reported-by: Michael Tokarev Signed-off-by: Thomas Gleixner Tested-by: Marcelo Tosatti Cc: kvm@vger.kernel.org --- include/linux/hrtimer.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'include/linux/hrtimer.h') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 9bace4b9f4fe..040b6796ab4d 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -162,10 +162,11 @@ struct hrtimer_clock_base { * @expires_next: absolute time of the next event which was scheduled * via clock_set_next_event() * @hres_active: State of high resolution mode - * @check_clocks: Indictator, when set evaluate time source and clock - * event devices whether high resolution mode can be - * activated. - * @nr_events: Total number of timer interrupt events + * @hang_detected: The last hrtimer interrupt detected a hang + * @nr_events: Total number of hrtimer interrupt events + * @nr_retries: Total number of hrtimer interrupt retries + * @nr_hangs: Total number of hrtimer interrupt hangs + * @max_hang_time: Maximum time spent in hrtimer_interrupt */ struct hrtimer_cpu_base { spinlock_t lock; @@ -173,7 +174,11 @@ struct hrtimer_cpu_base { #ifdef CONFIG_HIGH_RES_TIMERS ktime_t expires_next; int hres_active; + int hang_detected; unsigned long nr_events; + unsigned long nr_retries; + unsigned long nr_hangs; + ktime_t max_hang_time; #endif }; -- cgit v1.2.3 From 5f201907dfe4ad42c44006ddfcec00ed12e59497 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 10 Dec 2009 10:56:29 +0100 Subject: hrtimer: move timer stats helper functions to hrtimer.c There is no reason to make timer_stats_hrtimer_set_start_info and friends visible to the rest of the kernel. So move all of them to hrtimer.c. Also make timer_stats_hrtimer_set_start_info a static inline function so it gets inlined and we avoid another function call. Based on a patch by Thomas Gleixner. Signed-off-by: Heiko Carstens LKML-Reference: <20091210095629.GC4144@osiris.boeblingen.de.ibm.com> Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 43 ------------------------------------------- kernel/hrtimer.c | 24 ++++++++++++++++++++---- 2 files changed, 20 insertions(+), 47 deletions(-) (limited to 'include/linux/hrtimer.h') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 040b6796ab4d..af634e95871d 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -440,47 +440,4 @@ extern u64 ktime_divns(const ktime_t kt, s64 div); /* Show pending timers: */ extern void sysrq_timer_list_show(void); -/* - * Timer-statistics info: - */ -#ifdef CONFIG_TIMER_STATS - -extern void timer_stats_update_stats(void *timer, pid_t pid, void *startf, - void *timerf, char *comm, - unsigned int timer_flag); - -static inline void timer_stats_account_hrtimer(struct hrtimer *timer) -{ - if (likely(!timer_stats_active)) - return; - timer_stats_update_stats(timer, timer->start_pid, timer->start_site, - timer->function, timer->start_comm, 0); -} - -extern void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer, - void *addr); - -static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) -{ - __timer_stats_hrtimer_set_start_info(timer, __builtin_return_address(0)); -} - -static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer) -{ - timer->start_site = NULL; -} -#else -static inline void timer_stats_account_hrtimer(struct hrtimer *timer) -{ -} - -static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) -{ -} - -static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer) -{ -} -#endif - #endif diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 931a4d99bc55..d2f9239dc6ba 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -756,17 +756,33 @@ static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { } #endif /* CONFIG_HIGH_RES_TIMERS */ -#ifdef CONFIG_TIMER_STATS -void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer, void *addr) +static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) { +#ifdef CONFIG_TIMER_STATS if (timer->start_site) return; - - timer->start_site = addr; + timer->start_site = __builtin_return_address(0); memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); timer->start_pid = current->pid; +#endif } + +static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer) +{ +#ifdef CONFIG_TIMER_STATS + timer->start_site = NULL; #endif +} + +static inline void timer_stats_account_hrtimer(struct hrtimer *timer) +{ +#ifdef CONFIG_TIMER_STATS + if (likely(!timer_stats_active)) + return; + timer_stats_update_stats(timer, timer->start_pid, timer->start_site, + timer->function, timer->start_comm, 0); +#endif +} /* * Counterpart to lock_hrtimer_base above: -- cgit v1.2.3 From ecb49d1a639acbacfc3771cae5ec07bed5df3847 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 17 Nov 2009 16:36:54 +0100 Subject: hrtimers: Convert to raw_spinlocks Convert locks which cannot be sleeping locks in preempt-rt to raw_spinlocks. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Acked-by: Ingo Molnar --- include/linux/hrtimer.h | 2 +- kernel/hrtimer.c | 50 +++++++++++++++++++++++------------------------ kernel/time/timer_list.c | 6 +++--- kernel/time/timer_stats.c | 17 ++++++++-------- 4 files changed, 38 insertions(+), 37 deletions(-) (limited to 'include/linux/hrtimer.h') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index af634e95871d..5d86fb2309d2 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -169,7 +169,7 @@ struct hrtimer_clock_base { * @max_hang_time: Maximum time spent in hrtimer_interrupt */ struct hrtimer_cpu_base { - spinlock_t lock; + raw_spinlock_t lock; struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; #ifdef CONFIG_HIGH_RES_TIMERS ktime_t expires_next; diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index d2f9239dc6ba..0086628b6e97 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -127,11 +127,11 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, for (;;) { base = timer->base; if (likely(base != NULL)) { - spin_lock_irqsave(&base->cpu_base->lock, *flags); + raw_spin_lock_irqsave(&base->cpu_base->lock, *flags); if (likely(base == timer->base)) return base; /* The timer has migrated to another CPU: */ - spin_unlock_irqrestore(&base->cpu_base->lock, *flags); + raw_spin_unlock_irqrestore(&base->cpu_base->lock, *flags); } cpu_relax(); } @@ -208,13 +208,13 @@ again: /* See the comment in lock_timer_base() */ timer->base = NULL; - spin_unlock(&base->cpu_base->lock); - spin_lock(&new_base->cpu_base->lock); + raw_spin_unlock(&base->cpu_base->lock); + raw_spin_lock(&new_base->cpu_base->lock); if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) { cpu = this_cpu; - spin_unlock(&new_base->cpu_base->lock); - spin_lock(&base->cpu_base->lock); + raw_spin_unlock(&new_base->cpu_base->lock); + raw_spin_lock(&base->cpu_base->lock); timer->base = base; goto again; } @@ -230,7 +230,7 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) { struct hrtimer_clock_base *base = timer->base; - spin_lock_irqsave(&base->cpu_base->lock, *flags); + raw_spin_lock_irqsave(&base->cpu_base->lock, *flags); return base; } @@ -628,12 +628,12 @@ static void retrigger_next_event(void *arg) base = &__get_cpu_var(hrtimer_bases); /* Adjust CLOCK_REALTIME offset */ - spin_lock(&base->lock); + raw_spin_lock(&base->lock); base->clock_base[CLOCK_REALTIME].offset = timespec_to_ktime(realtime_offset); hrtimer_force_reprogram(base, 0); - spin_unlock(&base->lock); + raw_spin_unlock(&base->lock); } /* @@ -694,9 +694,9 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, { if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) { if (wakeup) { - spin_unlock(&base->cpu_base->lock); + raw_spin_unlock(&base->cpu_base->lock); raise_softirq_irqoff(HRTIMER_SOFTIRQ); - spin_lock(&base->cpu_base->lock); + raw_spin_lock(&base->cpu_base->lock); } else __raise_softirq_irqoff(HRTIMER_SOFTIRQ); @@ -790,7 +790,7 @@ static inline void timer_stats_account_hrtimer(struct hrtimer *timer) static inline void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) { - spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags); + raw_spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags); } /** @@ -1123,7 +1123,7 @@ ktime_t hrtimer_get_next_event(void) unsigned long flags; int i; - spin_lock_irqsave(&cpu_base->lock, flags); + raw_spin_lock_irqsave(&cpu_base->lock, flags); if (!hrtimer_hres_active()) { for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) { @@ -1140,7 +1140,7 @@ ktime_t hrtimer_get_next_event(void) } } - spin_unlock_irqrestore(&cpu_base->lock, flags); + raw_spin_unlock_irqrestore(&cpu_base->lock, flags); if (mindelta.tv64 < 0) mindelta.tv64 = 0; @@ -1222,11 +1222,11 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now) * they get migrated to another cpu, therefore its safe to unlock * the timer base. */ - spin_unlock(&cpu_base->lock); + raw_spin_unlock(&cpu_base->lock); trace_hrtimer_expire_entry(timer, now); restart = fn(timer); trace_hrtimer_expire_exit(timer); - spin_lock(&cpu_base->lock); + raw_spin_lock(&cpu_base->lock); /* * Note: We clear the CALLBACK bit after enqueue_hrtimer and @@ -1261,7 +1261,7 @@ void hrtimer_interrupt(struct clock_event_device *dev) retry: expires_next.tv64 = KTIME_MAX; - spin_lock(&cpu_base->lock); + raw_spin_lock(&cpu_base->lock); /* * We set expires_next to KTIME_MAX here with cpu_base->lock * held to prevent that a timer is enqueued in our queue via @@ -1317,7 +1317,7 @@ retry: * against it. */ cpu_base->expires_next = expires_next; - spin_unlock(&cpu_base->lock); + raw_spin_unlock(&cpu_base->lock); /* Reprogramming necessary ? */ if (expires_next.tv64 == KTIME_MAX || @@ -1457,7 +1457,7 @@ void hrtimer_run_queues(void) gettime = 0; } - spin_lock(&cpu_base->lock); + raw_spin_lock(&cpu_base->lock); while ((node = base->first)) { struct hrtimer *timer; @@ -1469,7 +1469,7 @@ void hrtimer_run_queues(void) __run_hrtimer(timer, &base->softirq_time); } - spin_unlock(&cpu_base->lock); + raw_spin_unlock(&cpu_base->lock); } } @@ -1625,7 +1625,7 @@ static void __cpuinit init_hrtimers_cpu(int cpu) struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu); int i; - spin_lock_init(&cpu_base->lock); + raw_spin_lock_init(&cpu_base->lock); for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) cpu_base->clock_base[i].cpu_base = cpu_base; @@ -1683,16 +1683,16 @@ static void migrate_hrtimers(int scpu) * The caller is globally serialized and nobody else * takes two locks at once, deadlock is not possible. */ - spin_lock(&new_base->lock); - spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); + raw_spin_lock(&new_base->lock); + raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { migrate_hrtimer_list(&old_base->clock_base[i], &new_base->clock_base[i]); } - spin_unlock(&old_base->lock); - spin_unlock(&new_base->lock); + raw_spin_unlock(&old_base->lock); + raw_spin_unlock(&new_base->lock); /* Check, if we got expired work to do */ __hrtimer_peek_ahead_timers(); diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 9d80db4747d4..28265636b6c2 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -84,7 +84,7 @@ print_active_timers(struct seq_file *m, struct hrtimer_clock_base *base, next_one: i = 0; - spin_lock_irqsave(&base->cpu_base->lock, flags); + raw_spin_lock_irqsave(&base->cpu_base->lock, flags); curr = base->first; /* @@ -100,13 +100,13 @@ next_one: timer = rb_entry(curr, struct hrtimer, node); tmp = *timer; - spin_unlock_irqrestore(&base->cpu_base->lock, flags); + raw_spin_unlock_irqrestore(&base->cpu_base->lock, flags); print_timer(m, timer, &tmp, i, now); next++; goto next_one; } - spin_unlock_irqrestore(&base->cpu_base->lock, flags); + raw_spin_unlock_irqrestore(&base->cpu_base->lock, flags); } static void diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c index 63b117e9eba1..2f3b585b8d7d 100644 --- a/kernel/time/timer_stats.c +++ b/kernel/time/timer_stats.c @@ -86,7 +86,7 @@ static DEFINE_SPINLOCK(table_lock); /* * Per-CPU lookup locks for fast hash lookup: */ -static DEFINE_PER_CPU(spinlock_t, tstats_lookup_lock); +static DEFINE_PER_CPU(raw_spinlock_t, tstats_lookup_lock); /* * Mutex to serialize state changes with show-stats activities: @@ -238,7 +238,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf, /* * It doesnt matter which lock we take: */ - spinlock_t *lock; + raw_spinlock_t *lock; struct entry *entry, input; unsigned long flags; @@ -253,7 +253,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf, input.pid = pid; input.timer_flag = timer_flag; - spin_lock_irqsave(lock, flags); + raw_spin_lock_irqsave(lock, flags); if (!timer_stats_active) goto out_unlock; @@ -264,7 +264,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf, atomic_inc(&overflow_count); out_unlock: - spin_unlock_irqrestore(lock, flags); + raw_spin_unlock_irqrestore(lock, flags); } static void print_name_offset(struct seq_file *m, unsigned long addr) @@ -348,10 +348,11 @@ static void sync_access(void) int cpu; for_each_online_cpu(cpu) { - spinlock_t *lock = &per_cpu(tstats_lookup_lock, cpu); - spin_lock_irqsave(lock, flags); + raw_spinlock_t *lock = &per_cpu(tstats_lookup_lock, cpu); + + raw_spin_lock_irqsave(lock, flags); /* nothing */ - spin_unlock_irqrestore(lock, flags); + raw_spin_unlock_irqrestore(lock, flags); } } @@ -409,7 +410,7 @@ void __init init_timer_stats(void) int cpu; for_each_possible_cpu(cpu) - spin_lock_init(&per_cpu(tstats_lookup_lock, cpu)); + raw_spin_lock_init(&per_cpu(tstats_lookup_lock, cpu)); } static int __init init_tstats_procfs(void) -- cgit v1.2.3