From 41d2e494937715d3150e5c75d01f0e75ae899337 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 13 Nov 2009 17:05:44 +0100 Subject: hrtimer: Tune hrtimer_interrupt hang logic The hrtimer_interrupt hang logic adjusts min_delta_ns based on the execution time of the hrtimer callbacks. This is error-prone for virtual machines, where a guest vcpu can be scheduled out during the execution of the callbacks (and the callbacks themselves can do operations that translate to blocking operations in the hypervisor), which in can lead to large min_delta_ns rendering the system unusable. Replace the current heuristics with something more reliable. Allow the interrupt code to try 3 times to catch up with the lost time. If that fails use the total time spent in the interrupt handler to defer the next timer interrupt so the system can catch up with other things which got delayed. Limit that deferment to 100ms. The retry events and the maximum time spent in the interrupt handler are recorded and exposed via /proc/timer_list Inspired by a patch from Marcelo. Reported-by: Michael Tokarev Signed-off-by: Thomas Gleixner Tested-by: Marcelo Tosatti Cc: kvm@vger.kernel.org --- include/linux/hrtimer.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 9bace4b9f4fe..040b6796ab4d 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -162,10 +162,11 @@ struct hrtimer_clock_base { * @expires_next: absolute time of the next event which was scheduled * via clock_set_next_event() * @hres_active: State of high resolution mode - * @check_clocks: Indictator, when set evaluate time source and clock - * event devices whether high resolution mode can be - * activated. - * @nr_events: Total number of timer interrupt events + * @hang_detected: The last hrtimer interrupt detected a hang + * @nr_events: Total number of hrtimer interrupt events + * @nr_retries: Total number of hrtimer interrupt retries + * @nr_hangs: Total number of hrtimer interrupt hangs + * @max_hang_time: Maximum time spent in hrtimer_interrupt */ struct hrtimer_cpu_base { spinlock_t lock; @@ -173,7 +174,11 @@ struct hrtimer_cpu_base { #ifdef CONFIG_HIGH_RES_TIMERS ktime_t expires_next; int hres_active; + int hang_detected; unsigned long nr_events; + unsigned long nr_retries; + unsigned long nr_hangs; + ktime_t max_hang_time; #endif }; -- cgit v1.2.3 From 5f201907dfe4ad42c44006ddfcec00ed12e59497 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 10 Dec 2009 10:56:29 +0100 Subject: hrtimer: move timer stats helper functions to hrtimer.c There is no reason to make timer_stats_hrtimer_set_start_info and friends visible to the rest of the kernel. So move all of them to hrtimer.c. Also make timer_stats_hrtimer_set_start_info a static inline function so it gets inlined and we avoid another function call. Based on a patch by Thomas Gleixner. Signed-off-by: Heiko Carstens LKML-Reference: <20091210095629.GC4144@osiris.boeblingen.de.ibm.com> Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 43 ------------------------------------------- kernel/hrtimer.c | 24 ++++++++++++++++++++---- 2 files changed, 20 insertions(+), 47 deletions(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 040b6796ab4d..af634e95871d 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -440,47 +440,4 @@ extern u64 ktime_divns(const ktime_t kt, s64 div); /* Show pending timers: */ extern void sysrq_timer_list_show(void); -/* - * Timer-statistics info: - */ -#ifdef CONFIG_TIMER_STATS - -extern void timer_stats_update_stats(void *timer, pid_t pid, void *startf, - void *timerf, char *comm, - unsigned int timer_flag); - -static inline void timer_stats_account_hrtimer(struct hrtimer *timer) -{ - if (likely(!timer_stats_active)) - return; - timer_stats_update_stats(timer, timer->start_pid, timer->start_site, - timer->function, timer->start_comm, 0); -} - -extern void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer, - void *addr); - -static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) -{ - __timer_stats_hrtimer_set_start_info(timer, __builtin_return_address(0)); -} - -static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer) -{ - timer->start_site = NULL; -} -#else -static inline void timer_stats_account_hrtimer(struct hrtimer *timer) -{ -} - -static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) -{ -} - -static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer) -{ -} -#endif - #endif diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 931a4d99bc55..d2f9239dc6ba 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -756,17 +756,33 @@ static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { } #endif /* CONFIG_HIGH_RES_TIMERS */ -#ifdef CONFIG_TIMER_STATS -void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer, void *addr) +static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) { +#ifdef CONFIG_TIMER_STATS if (timer->start_site) return; - - timer->start_site = addr; + timer->start_site = __builtin_return_address(0); memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); timer->start_pid = current->pid; +#endif } + +static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer) +{ +#ifdef CONFIG_TIMER_STATS + timer->start_site = NULL; #endif +} + +static inline void timer_stats_account_hrtimer(struct hrtimer *timer) +{ +#ifdef CONFIG_TIMER_STATS + if (likely(!timer_stats_active)) + return; + timer_stats_update_stats(timer, timer->start_pid, timer->start_site, + timer->function, timer->start_comm, 0); +#endif +} /* * Counterpart to lock_hrtimer_base above: -- cgit v1.2.3 From e9c0748b687aa70179a9e6d8ffc24b2874fe350b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2009 13:23:19 +0100 Subject: itimer: Fix the itimer trace print format Compiling powerpc64 results in: include/trace/events/timer.h:279: warning: format '%lu' expects type 'long unsigned int', but argument 4 has type 'cputime_t' .... cputime_t on power is u64, which triggers the above warning. Cast the cputime_t to unsigned long long and fix the print format string. That works on both 32 and 64 bit architectures. While at it change the print format for long variables from %lu to %ld. Signed-off-by: Thomas Gleixner Cc: Xiao Guangrong --- include/trace/events/timer.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index e5ce87a0498d..9496b965d62a 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -301,8 +301,8 @@ TRACE_EVENT(itimer_state, __entry->interval_usec = value->it_interval.tv_usec; ), - TP_printk("which=%d expires=%lu it_value=%lu.%lu it_interval=%lu.%lu", - __entry->which, __entry->expires, + TP_printk("which=%d expires=%llu it_value=%ld.%ld it_interval=%ld.%ld", + __entry->which, (unsigned long long)__entry->expires, __entry->value_sec, __entry->value_usec, __entry->interval_sec, __entry->interval_usec) ); @@ -331,8 +331,8 @@ TRACE_EVENT(itimer_expire, __entry->pid = pid_nr(pid); ), - TP_printk("which=%d pid=%d now=%lu", __entry->which, - (int) __entry->pid, __entry->now) + TP_printk("which=%d pid=%d now=%llu", __entry->which, + (int) __entry->pid, (unsigned long long)__entry->now) ); #endif /* _TRACE_TIMER_H */ -- cgit v1.2.3