From ea8117478918a4734586d35ff530721b682425be Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 11 Sep 2013 12:43:13 +0200 Subject: sched, idle: Fix the idle polling state logic Mike reported that commit 7d1a9417 ("x86: Use generic idle loop") regressed several workloads and caused excessive reschedule interrupts. The patch in question failed to notice that the x86 code had an inverted sense of the polling state versus the new generic code (x86: default polling, generic: default !polling). Fix the two prominent x86 mwait based idle drivers and introduce a few new generic polling helpers (fixing the wrong smp_mb__after_clear_bit usage). Also switch the idle routines to using tif_need_resched() which is an immediate TIF_NEED_RESCHED test as opposed to need_resched which will end up being slightly different. Reported-by: Mike Galbraith Signed-off-by: Peter Zijlstra Cc: lenb@kernel.org Cc: tglx@linutronix.de Link: http://lkml.kernel.org/n/tip-nc03imb0etuefmzybzj7sprf@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/process.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c83516be1052..3fb8d95ab8b5 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -391,9 +391,9 @@ static void amd_e400_idle(void) * The switch back from broadcast mode needs to be * called with interrupts disabled. */ - local_irq_disable(); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); - local_irq_enable(); + local_irq_disable(); + clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); + local_irq_enable(); } else default_idle(); } -- cgit v1.2.3 From bdb43806589096ac4272fe1307e789846ac08d7c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 10 Sep 2013 12:15:23 +0200 Subject: sched: Extract the basic add/sub preempt_count modifiers Rewrite the preempt_count macros in order to extract the 3 basic preempt_count value modifiers: __preempt_count_add() __preempt_count_sub() and the new: __preempt_count_dec_and_test() And since we're at it anyway, replace the unconventional $op_preempt_count names with the more conventional preempt_count_$op. Since these basic operators are equivalent to the previous _notrace() variants, do away with the _notrace() versions. Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-ewbpdbupy9xpsjhg960zwbv8@git.kernel.org Signed-off-by: Ingo Molnar --- arch/mips/mm/init.c | 5 +- arch/x86/kernel/traps.c | 4 +- include/asm-generic/preempt.h | 35 ++++++++++++++ include/linux/hardirq.h | 8 ++-- include/linux/preempt.h | 106 +++++++++++++++++++----------------------- include/linux/sched.h | 5 -- include/linux/uaccess.h | 8 +--- kernel/context_tracking.c | 2 +- kernel/sched/core.c | 29 +++++------- kernel/softirq.c | 14 +++--- 10 files changed, 113 insertions(+), 103 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index e205ef598e97..12156176c7ca 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -124,7 +124,7 @@ void *kmap_coherent(struct page *page, unsigned long addr) BUG_ON(Page_dcache_dirty(page)); - inc_preempt_count(); + pagefault_disable(); idx = (addr >> PAGE_SHIFT) & (FIX_N_COLOURS - 1); #ifdef CONFIG_MIPS_MT_SMTC idx += FIX_N_COLOURS * smp_processor_id() + @@ -193,8 +193,7 @@ void kunmap_coherent(void) write_c0_entryhi(old_ctx); EXIT_CRITICAL(flags); #endif - dec_preempt_count(); - preempt_check_resched(); + pagefault_enable(); } void copy_user_highpage(struct page *to, struct page *from, diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 8c8093b146ca..729aa779ff75 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -88,7 +88,7 @@ static inline void conditional_sti(struct pt_regs *regs) static inline void preempt_conditional_sti(struct pt_regs *regs) { - inc_preempt_count(); + preempt_count_inc(); if (regs->flags & X86_EFLAGS_IF) local_irq_enable(); } @@ -103,7 +103,7 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) { if (regs->flags & X86_EFLAGS_IF) local_irq_disable(); - dec_preempt_count(); + preempt_count_dec(); } static int __kprobes diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h index 8100b1ec1715..82d958fc3823 100644 --- a/include/asm-generic/preempt.h +++ b/include/asm-generic/preempt.h @@ -65,4 +65,39 @@ static __always_inline bool test_preempt_need_resched(void) return !(*preempt_count_ptr() & PREEMPT_NEED_RESCHED); } +/* + * The various preempt_count add/sub methods + */ + +static __always_inline void __preempt_count_add(int val) +{ + *preempt_count_ptr() += val; +} + +static __always_inline void __preempt_count_sub(int val) +{ + *preempt_count_ptr() -= val; +} + +static __always_inline bool __preempt_count_dec_and_test(void) +{ + return !--*preempt_count_ptr(); +} + +/* + * Returns true when we need to resched -- even if we can not. + */ +static __always_inline bool need_resched(void) +{ + return unlikely(test_preempt_need_resched()); +} + +/* + * Returns true when we need to resched and can (barring IRQ state). + */ +static __always_inline bool should_resched(void) +{ + return unlikely(!*preempt_count_ptr()); +} + #endif /* __ASM_PREEMPT_H */ diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 1e041063b226..d9cf963ac832 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -33,7 +33,7 @@ extern void rcu_nmi_exit(void); #define __irq_enter() \ do { \ account_irq_enter_time(current); \ - add_preempt_count(HARDIRQ_OFFSET); \ + preempt_count_add(HARDIRQ_OFFSET); \ trace_hardirq_enter(); \ } while (0) @@ -49,7 +49,7 @@ extern void irq_enter(void); do { \ trace_hardirq_exit(); \ account_irq_exit_time(current); \ - sub_preempt_count(HARDIRQ_OFFSET); \ + preempt_count_sub(HARDIRQ_OFFSET); \ } while (0) /* @@ -62,7 +62,7 @@ extern void irq_exit(void); lockdep_off(); \ ftrace_nmi_enter(); \ BUG_ON(in_nmi()); \ - add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \ + preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET); \ rcu_nmi_enter(); \ trace_hardirq_enter(); \ } while (0) @@ -72,7 +72,7 @@ extern void irq_exit(void); trace_hardirq_exit(); \ rcu_nmi_exit(); \ BUG_ON(!in_nmi()); \ - sub_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \ + preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \ ftrace_nmi_exit(); \ lockdep_on(); \ } while (0) diff --git a/include/linux/preempt.h b/include/linux/preempt.h index df8e245e8729..2343d8715299 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -18,97 +18,86 @@ #include #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER) - extern void add_preempt_count(int val); - extern void sub_preempt_count(int val); +extern void preempt_count_add(int val); +extern void preempt_count_sub(int val); +#define preempt_count_dec_and_test() ({ preempt_count_sub(1); should_resched(); }) #else -# define add_preempt_count(val) do { *preempt_count_ptr() += (val); } while (0) -# define sub_preempt_count(val) do { *preempt_count_ptr() -= (val); } while (0) +#define preempt_count_add(val) __preempt_count_add(val) +#define preempt_count_sub(val) __preempt_count_sub(val) +#define preempt_count_dec_and_test() __preempt_count_dec_and_test() #endif -#define inc_preempt_count() add_preempt_count(1) -#define dec_preempt_count() sub_preempt_count(1) - -#ifdef CONFIG_PREEMPT - -asmlinkage void preempt_schedule(void); - -#define preempt_check_resched() \ -do { \ - if (unlikely(!*preempt_count_ptr())) \ - preempt_schedule(); \ -} while (0) - -#ifdef CONFIG_CONTEXT_TRACKING - -void preempt_schedule_context(void); - -#define preempt_check_resched_context() \ -do { \ - if (unlikely(!*preempt_count_ptr())) \ - preempt_schedule_context(); \ -} while (0) -#else - -#define preempt_check_resched_context() preempt_check_resched() - -#endif /* CONFIG_CONTEXT_TRACKING */ - -#else /* !CONFIG_PREEMPT */ - -#define preempt_check_resched() do { } while (0) -#define preempt_check_resched_context() do { } while (0) - -#endif /* CONFIG_PREEMPT */ +#define __preempt_count_inc() __preempt_count_add(1) +#define __preempt_count_dec() __preempt_count_sub(1) +#define preempt_count_inc() preempt_count_add(1) +#define preempt_count_dec() preempt_count_sub(1) #ifdef CONFIG_PREEMPT_COUNT #define preempt_disable() \ do { \ - inc_preempt_count(); \ + preempt_count_inc(); \ barrier(); \ } while (0) #define sched_preempt_enable_no_resched() \ do { \ barrier(); \ - dec_preempt_count(); \ + preempt_count_dec(); \ } while (0) -#define preempt_enable_no_resched() sched_preempt_enable_no_resched() +#define preempt_enable_no_resched() sched_preempt_enable_no_resched() +#ifdef CONFIG_PREEMPT +asmlinkage void preempt_schedule(void); #define preempt_enable() \ do { \ - preempt_enable_no_resched(); \ - preempt_check_resched(); \ + barrier(); \ + if (unlikely(preempt_count_dec_and_test())) \ + preempt_schedule(); \ } while (0) -/* For debugging and tracer internals only! */ -#define add_preempt_count_notrace(val) \ - do { *preempt_count_ptr() += (val); } while (0) -#define sub_preempt_count_notrace(val) \ - do { *preempt_count_ptr() -= (val); } while (0) -#define inc_preempt_count_notrace() add_preempt_count_notrace(1) -#define dec_preempt_count_notrace() sub_preempt_count_notrace(1) +#define preempt_check_resched() \ +do { \ + if (should_resched()) \ + preempt_schedule(); \ +} while (0) + +#else +#define preempt_enable() preempt_enable_no_resched() +#define preempt_check_resched() do { } while (0) +#endif #define preempt_disable_notrace() \ do { \ - inc_preempt_count_notrace(); \ + __preempt_count_inc(); \ barrier(); \ } while (0) #define preempt_enable_no_resched_notrace() \ do { \ barrier(); \ - dec_preempt_count_notrace(); \ + __preempt_count_dec(); \ } while (0) -/* preempt_check_resched is OK to trace */ +#ifdef CONFIG_PREEMPT + +#ifdef CONFIG_CONTEXT_TRACKING +asmlinkage void preempt_schedule_context(void); +#else +#define preempt_schedule_context() preempt_schedule() +#endif + #define preempt_enable_notrace() \ do { \ - preempt_enable_no_resched_notrace(); \ - preempt_check_resched_context(); \ + barrier(); \ + if (unlikely(__preempt_count_dec_and_test())) \ + preempt_schedule_context(); \ } while (0) +#else +#define preempt_enable_notrace() preempt_enable_no_resched_notrace() +#endif #else /* !CONFIG_PREEMPT_COUNT */ @@ -118,10 +107,11 @@ do { \ * that can cause faults and scheduling migrate into our preempt-protected * region. */ -#define preempt_disable() barrier() +#define preempt_disable() barrier() #define sched_preempt_enable_no_resched() barrier() -#define preempt_enable_no_resched() barrier() -#define preempt_enable() barrier() +#define preempt_enable_no_resched() barrier() +#define preempt_enable() barrier() +#define preempt_check_resched() do { } while (0) #define preempt_disable_notrace() barrier() #define preempt_enable_no_resched_notrace() barrier() diff --git a/include/linux/sched.h b/include/linux/sched.h index 9fa151fb968e..06ac17c7e639 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2409,11 +2409,6 @@ static inline int signal_pending_state(long state, struct task_struct *p) return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); } -static inline int need_resched(void) -{ - return unlikely(test_preempt_need_resched()); -} - /* * cond_resched() and cond_resched_lock(): latency reduction via * explicit rescheduling in places that are safe. The return diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 5ca0951e1855..9d8cf056e661 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -15,7 +15,7 @@ */ static inline void pagefault_disable(void) { - inc_preempt_count(); + preempt_count_inc(); /* * make sure to have issued the store before a pagefault * can hit. @@ -30,11 +30,7 @@ static inline void pagefault_enable(void) * the pagefault handler again. */ barrier(); - dec_preempt_count(); - /* - * make sure we do.. - */ - barrier(); + preempt_count_dec(); preempt_check_resched(); } diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index 247091bf0587..013161f1c807 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c @@ -111,7 +111,7 @@ void context_tracking_user_enter(void) * instead of preempt_schedule() to exit user context if needed before * calling the scheduler. */ -void __sched notrace preempt_schedule_context(void) +asmlinkage void __sched notrace preempt_schedule_context(void) { enum ctx_state prev_ctx; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 0ba4e4192390..9c84a9ab1892 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2219,7 +2219,7 @@ notrace unsigned long get_parent_ip(unsigned long addr) #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \ defined(CONFIG_PREEMPT_TRACER)) -void __kprobes add_preempt_count(int val) +void __kprobes preempt_count_add(int val) { #ifdef CONFIG_DEBUG_PREEMPT /* @@ -2228,7 +2228,7 @@ void __kprobes add_preempt_count(int val) if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0))) return; #endif - add_preempt_count_notrace(val); + __preempt_count_add(val); #ifdef CONFIG_DEBUG_PREEMPT /* * Spinlock count overflowing soon? @@ -2239,9 +2239,9 @@ void __kprobes add_preempt_count(int val) if (preempt_count() == val) trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1)); } -EXPORT_SYMBOL(add_preempt_count); +EXPORT_SYMBOL(preempt_count_add); -void __kprobes sub_preempt_count(int val) +void __kprobes preempt_count_sub(int val) { #ifdef CONFIG_DEBUG_PREEMPT /* @@ -2259,9 +2259,9 @@ void __kprobes sub_preempt_count(int val) if (preempt_count() == val) trace_preempt_on(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1)); - sub_preempt_count_notrace(val); + __preempt_count_sub(val); } -EXPORT_SYMBOL(sub_preempt_count); +EXPORT_SYMBOL(preempt_count_sub); #endif @@ -2525,9 +2525,9 @@ asmlinkage void __sched notrace preempt_schedule(void) return; do { - add_preempt_count_notrace(PREEMPT_ACTIVE); + __preempt_count_add(PREEMPT_ACTIVE); __schedule(); - sub_preempt_count_notrace(PREEMPT_ACTIVE); + __preempt_count_sub(PREEMPT_ACTIVE); /* * Check again in case we missed a preemption opportunity @@ -2554,11 +2554,11 @@ asmlinkage void __sched preempt_schedule_irq(void) prev_state = exception_enter(); do { - add_preempt_count(PREEMPT_ACTIVE); + __preempt_count_add(PREEMPT_ACTIVE); local_irq_enable(); __schedule(); local_irq_disable(); - sub_preempt_count(PREEMPT_ACTIVE); + __preempt_count_sub(PREEMPT_ACTIVE); /* * Check again in case we missed a preemption opportunity @@ -3798,16 +3798,11 @@ SYSCALL_DEFINE0(sched_yield) return 0; } -static inline int should_resched(void) -{ - return need_resched() && !(preempt_count() & PREEMPT_ACTIVE); -} - static void __cond_resched(void) { - add_preempt_count(PREEMPT_ACTIVE); + __preempt_count_add(PREEMPT_ACTIVE); __schedule(); - sub_preempt_count(PREEMPT_ACTIVE); + __preempt_count_sub(PREEMPT_ACTIVE); } int __sched _cond_resched(void) diff --git a/kernel/softirq.c b/kernel/softirq.c index a90de70cf1f3..3e88612fc87e 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -100,13 +100,13 @@ static void __local_bh_disable(unsigned long ip, unsigned int cnt) raw_local_irq_save(flags); /* - * The preempt tracer hooks into add_preempt_count and will break + * The preempt tracer hooks into preempt_count_add and will break * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET * is set and before current->softirq_enabled is cleared. * We must manually increment preempt_count here and manually * call the trace_preempt_off later. */ - add_preempt_count_notrace(cnt); + __preempt_count_add(cnt); /* * Were softirqs turned off above: */ @@ -120,7 +120,7 @@ static void __local_bh_disable(unsigned long ip, unsigned int cnt) #else /* !CONFIG_TRACE_IRQFLAGS */ static inline void __local_bh_disable(unsigned long ip, unsigned int cnt) { - add_preempt_count(cnt); + preempt_count_add(cnt); barrier(); } #endif /* CONFIG_TRACE_IRQFLAGS */ @@ -139,7 +139,7 @@ static void __local_bh_enable(unsigned int cnt) if (softirq_count() == cnt) trace_softirqs_on(_RET_IP_); - sub_preempt_count(cnt); + preempt_count_sub(cnt); } /* @@ -169,12 +169,12 @@ static inline void _local_bh_enable_ip(unsigned long ip) * Keep preemption disabled until we are done with * softirq processing: */ - sub_preempt_count(SOFTIRQ_DISABLE_OFFSET - 1); + preempt_count_sub(SOFTIRQ_DISABLE_OFFSET - 1); if (unlikely(!in_interrupt() && local_softirq_pending())) do_softirq(); - dec_preempt_count(); + preempt_count_dec(); #ifdef CONFIG_TRACE_IRQFLAGS local_irq_enable(); #endif @@ -360,7 +360,7 @@ void irq_exit(void) account_irq_exit_time(current); trace_hardirq_exit(); - sub_preempt_count(HARDIRQ_OFFSET); + preempt_count_sub(HARDIRQ_OFFSET); if (!in_interrupt() && local_softirq_pending()) invoke_softirq(); -- cgit v1.2.3 From c2daa3bed53a81171cf8c1a36db798e82b91afe8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 14 Aug 2013 14:51:00 +0200 Subject: sched, x86: Provide a per-cpu preempt_count implementation Convert x86 to use a per-cpu preemption count. The reason for doing so is that accessing per-cpu variables is a lot cheaper than accessing thread_info variables. We still need to save/restore the actual preemption count due to PREEMPT_ACTIVE so we place the per-cpu __preempt_count variable in the same cache-line as the other hot __switch_to() variables such as current_task. NOTE: this save/restore is required even for !PREEMPT kernels as cond_resched() also relies on preempt_count's PREEMPT_ACTIVE to ignore task_struct::state. Also rename thread_info::preempt_count to ensure nobody is 'accidentally' still poking at it. Suggested-by: Linus Torvalds Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-gzn5rfsf8trgjoqx8hyayy3q@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/Kbuild | 1 - arch/x86/include/asm/preempt.h | 98 ++++++++++++++++++++++++++++++++++++++ arch/x86/include/asm/thread_info.h | 5 +- arch/x86/kernel/asm-offsets.c | 1 - arch/x86/kernel/cpu/common.c | 5 ++ arch/x86/kernel/entry_32.S | 7 +-- arch/x86/kernel/entry_64.S | 4 +- arch/x86/kernel/irq_32.c | 4 -- arch/x86/kernel/process_32.c | 8 ++++ arch/x86/kernel/process_64.c | 8 ++++ 10 files changed, 124 insertions(+), 17 deletions(-) create mode 100644 arch/x86/include/asm/preempt.h (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index eca20286a91c..7f669853317a 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -5,4 +5,3 @@ genhdr-y += unistd_64.h genhdr-y += unistd_x32.h generic-y += clkdev.h -generic-y += preempt.h diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h new file mode 100644 index 000000000000..1309942b95e5 --- /dev/null +++ b/arch/x86/include/asm/preempt.h @@ -0,0 +1,98 @@ +#ifndef __ASM_PREEMPT_H +#define __ASM_PREEMPT_H + +#include +#include +#include + +DECLARE_PER_CPU(int, __preempt_count); + +/* + * We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users + * that think a non-zero value indicates we cannot preempt. + */ +static __always_inline int preempt_count(void) +{ + return __this_cpu_read_4(__preempt_count) & ~PREEMPT_NEED_RESCHED; +} + +static __always_inline void preempt_count_set(int pc) +{ + __this_cpu_write_4(__preempt_count, pc); +} + +/* + * must be macros to avoid header recursion hell + */ +#define task_preempt_count(p) \ + (task_thread_info(p)->saved_preempt_count & ~PREEMPT_NEED_RESCHED) + +#define init_task_preempt_count(p) do { \ + task_thread_info(p)->saved_preempt_count = PREEMPT_DISABLED; \ +} while (0) + +#define init_idle_preempt_count(p, cpu) do { \ + task_thread_info(p)->saved_preempt_count = PREEMPT_ENABLED; \ + per_cpu(__preempt_count, (cpu)) = PREEMPT_ENABLED; \ +} while (0) + +/* + * We fold the NEED_RESCHED bit into the preempt count such that + * preempt_enable() can decrement and test for needing to reschedule with a + * single instruction. + * + * We invert the actual bit, so that when the decrement hits 0 we know we both + * need to resched (the bit is cleared) and can resched (no preempt count). + */ + +static __always_inline void set_preempt_need_resched(void) +{ + __this_cpu_and_4(__preempt_count, ~PREEMPT_NEED_RESCHED); +} + +static __always_inline void clear_preempt_need_resched(void) +{ + __this_cpu_or_4(__preempt_count, PREEMPT_NEED_RESCHED); +} + +static __always_inline bool test_preempt_need_resched(void) +{ + return !(__this_cpu_read_4(__preempt_count) & PREEMPT_NEED_RESCHED); +} + +/* + * The various preempt_count add/sub methods + */ + +static __always_inline void __preempt_count_add(int val) +{ + __this_cpu_add_4(__preempt_count, val); +} + +static __always_inline void __preempt_count_sub(int val) +{ + __this_cpu_add_4(__preempt_count, -val); +} + +static __always_inline bool __preempt_count_dec_and_test(void) +{ + GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), "e"); +} + +/* + * Returns true when we need to resched -- even if we can not. + */ +static __always_inline bool need_resched(void) +{ + return unlikely(test_preempt_need_resched()); +} + +/* + * Returns true when we need to resched and can (barring IRQ state). + */ +static __always_inline bool should_resched(void) +{ + return unlikely(!__this_cpu_read_4(__preempt_count)); +} + +#endif /* __ASM_PREEMPT_H */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 27811190cbd7..c46a46be1ec6 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -28,8 +28,7 @@ struct thread_info { __u32 flags; /* low level flags */ __u32 status; /* thread synchronous flags */ __u32 cpu; /* current CPU */ - int preempt_count; /* 0 => preemptable, - <0 => BUG */ + int saved_preempt_count; mm_segment_t addr_limit; struct restart_block restart_block; void __user *sysenter_return; @@ -49,7 +48,7 @@ struct thread_info { .exec_domain = &default_exec_domain, \ .flags = 0, \ .cpu = 0, \ - .preempt_count = INIT_PREEMPT_COUNT, \ + .saved_preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ .restart_block = { \ .fn = do_no_restart_syscall, \ diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 28610822fb3c..9f6b9341950f 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -32,7 +32,6 @@ void common(void) { OFFSET(TI_flags, thread_info, flags); OFFSET(TI_status, thread_info, status); OFFSET(TI_addr_limit, thread_info, addr_limit); - OFFSET(TI_preempt_count, thread_info, preempt_count); BLANK(); OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 2793d1f095a2..5223fe6dec7b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1095,6 +1095,9 @@ DEFINE_PER_CPU(char *, irq_stack_ptr) = DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1; +DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; +EXPORT_PER_CPU_SYMBOL(__preempt_count); + DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); /* @@ -1169,6 +1172,8 @@ void debug_stack_reset(void) DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; EXPORT_PER_CPU_SYMBOL(current_task); +DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; +EXPORT_PER_CPU_SYMBOL(__preempt_count); DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); #ifdef CONFIG_CC_STACKPROTECTOR diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index f0dcb0ceb6a2..fd1bc1b15e6d 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -362,12 +362,9 @@ END(ret_from_exception) #ifdef CONFIG_PREEMPT ENTRY(resume_kernel) DISABLE_INTERRUPTS(CLBR_ANY) - cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? - jnz restore_all need_resched: - movl TI_flags(%ebp), %ecx # need_resched set ? - testb $_TIF_NEED_RESCHED, %cl - jz restore_all + cmpl $0,PER_CPU_VAR(__preempt_count) + jnz restore_all testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ? jz restore_all call preempt_schedule_irq diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 1b69951a81e2..6a43e7d29fe7 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1118,10 +1118,8 @@ retint_signal: /* Returning to kernel space. Check if we need preemption */ /* rcx: threadinfo. interrupts off. */ ENTRY(retint_kernel) - cmpl $0,TI_preempt_count(%rcx) + cmpl $0,PER_CPU_VAR(__preempt_count) jnz retint_restore_args - bt $TIF_NEED_RESCHED,TI_flags(%rcx) - jnc retint_restore_args bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ jnc retint_restore_args call preempt_schedule_irq diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 4186755f1d7c..3fe066359ac0 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -100,9 +100,6 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) irqctx->tinfo.task = curctx->tinfo.task; irqctx->tinfo.previous_esp = current_stack_pointer; - /* Copy the preempt_count so that the [soft]irq checks work. */ - irqctx->tinfo.preempt_count = curctx->tinfo.preempt_count; - if (unlikely(overflow)) call_on_stack(print_stack_overflow, isp); @@ -131,7 +128,6 @@ void irq_ctx_init(int cpu) THREAD_SIZE_ORDER)); memset(&irqctx->tinfo, 0, sizeof(struct thread_info)); irqctx->tinfo.cpu = cpu; - irqctx->tinfo.preempt_count = HARDIRQ_OFFSET; irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); per_cpu(hardirq_ctx, cpu) = irqctx; diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 884f98f69354..c2ec1aa6d454 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -291,6 +291,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl)) set_iopl_mask(next->iopl); + /* + * If it were not for PREEMPT_ACTIVE we could guarantee that the + * preempt_count of all tasks was equal here and this would not be + * needed. + */ + task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count); + this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count); + /* * Now maybe handle debug registers and/or IO bitmaps */ diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index bb1dc51bab05..45ab4d6fc8a7 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -363,6 +363,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) this_cpu_write(old_rsp, next->usersp); this_cpu_write(current_task, next_p); + /* + * If it were not for PREEMPT_ACTIVE we could guarantee that the + * preempt_count of all tasks was equal here and this would not be + * needed. + */ + task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count); + this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count); + this_cpu_write(kernel_stack, (unsigned long)task_stack_page(next_p) + THREAD_SIZE - KERNEL_STACK_OFFSET); -- cgit v1.2.3 From 1a338ac32ca630f67df25b4a16436cccc314e997 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 14 Aug 2013 14:51:00 +0200 Subject: sched, x86: Optimize the preempt_schedule() call Remove the bloat of the C calling convention out of the preempt_enable() sites by creating an ASM wrapper which allows us to do an asm("call ___preempt_schedule") instead. calling.h bits by Andi Kleen Suggested-by: Linus Torvalds Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-tk7xdi1cvvxewixzke8t8le1@git.kernel.org [ Fixed build error. ] Signed-off-by: Ingo Molnar --- arch/x86/include/asm/calling.h | 50 ++++++++++++++++++++++++++++++++++++++++ arch/x86/include/asm/preempt.h | 10 ++++++++ arch/x86/kernel/Makefile | 2 ++ arch/x86/kernel/i386_ksyms_32.c | 7 ++++++ arch/x86/kernel/preempt.S | 25 ++++++++++++++++++++ arch/x86/kernel/x8664_ksyms_64.c | 7 ++++++ include/asm-generic/preempt.h | 10 ++++++++ include/linux/preempt.h | 13 ++++------- 8 files changed, 116 insertions(+), 8 deletions(-) create mode 100644 arch/x86/kernel/preempt.S (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h index 0fa675033912..cb4c73bfeb48 100644 --- a/arch/x86/include/asm/calling.h +++ b/arch/x86/include/asm/calling.h @@ -48,6 +48,8 @@ For 32-bit we have the following conventions - kernel is built with #include +#ifdef CONFIG_X86_64 + /* * 64-bit system call stack frame layout defines and helpers, * for assembly code: @@ -192,3 +194,51 @@ For 32-bit we have the following conventions - kernel is built with .macro icebp .byte 0xf1 .endm + +#else /* CONFIG_X86_64 */ + +/* + * For 32bit only simplified versions of SAVE_ALL/RESTORE_ALL. These + * are different from the entry_32.S versions in not changing the segment + * registers. So only suitable for in kernel use, not when transitioning + * from or to user space. The resulting stack frame is not a standard + * pt_regs frame. The main use case is calling C code from assembler + * when all the registers need to be preserved. + */ + + .macro SAVE_ALL + pushl_cfi %eax + CFI_REL_OFFSET eax, 0 + pushl_cfi %ebp + CFI_REL_OFFSET ebp, 0 + pushl_cfi %edi + CFI_REL_OFFSET edi, 0 + pushl_cfi %esi + CFI_REL_OFFSET esi, 0 + pushl_cfi %edx + CFI_REL_OFFSET edx, 0 + pushl_cfi %ecx + CFI_REL_OFFSET ecx, 0 + pushl_cfi %ebx + CFI_REL_OFFSET ebx, 0 + .endm + + .macro RESTORE_ALL + popl_cfi %ebx + CFI_RESTORE ebx + popl_cfi %ecx + CFI_RESTORE ecx + popl_cfi %edx + CFI_RESTORE edx + popl_cfi %esi + CFI_RESTORE esi + popl_cfi %edi + CFI_RESTORE edi + popl_cfi %ebp + CFI_RESTORE ebp + popl_cfi %eax + CFI_RESTORE eax + .endm + +#endif /* CONFIG_X86_64 */ + diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index 1309942b95e5..1de41690ff99 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -95,4 +95,14 @@ static __always_inline bool should_resched(void) return unlikely(!__this_cpu_read_4(__preempt_count)); } +#ifdef CONFIG_PREEMPT + extern asmlinkage void ___preempt_schedule(void); +# define __preempt_schedule() asm ("call ___preempt_schedule") + extern asmlinkage void preempt_schedule(void); +# ifdef CONFIG_CONTEXT_TRACKING + extern asmlinkage void ___preempt_schedule_context(void); +# define __preempt_schedule_context() asm ("call ___preempt_schedule_context") +# endif +#endif + #endif /* __ASM_PREEMPT_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index a5408b965c9d..9b0a34e2cd79 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -36,6 +36,8 @@ obj-y += tsc.o io_delay.o rtc.o obj-y += pci-iommu_table.o obj-y += resource.o +obj-$(CONFIG_PREEMPT) += preempt.o + obj-y += process.o obj-y += i387.o xsave.o obj-y += ptrace.o diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c index 0fa69127209a..05fd74f537d6 100644 --- a/arch/x86/kernel/i386_ksyms_32.c +++ b/arch/x86/kernel/i386_ksyms_32.c @@ -37,3 +37,10 @@ EXPORT_SYMBOL(strstr); EXPORT_SYMBOL(csum_partial); EXPORT_SYMBOL(empty_zero_page); + +#ifdef CONFIG_PREEMPT +EXPORT_SYMBOL(___preempt_schedule); +#ifdef CONFIG_CONTEXT_TRACKING +EXPORT_SYMBOL(___preempt_schedule_context); +#endif +#endif diff --git a/arch/x86/kernel/preempt.S b/arch/x86/kernel/preempt.S new file mode 100644 index 000000000000..ca7f0d58a87d --- /dev/null +++ b/arch/x86/kernel/preempt.S @@ -0,0 +1,25 @@ + +#include +#include +#include +#include + +ENTRY(___preempt_schedule) + CFI_STARTPROC + SAVE_ALL + call preempt_schedule + RESTORE_ALL + ret + CFI_ENDPROC + +#ifdef CONFIG_CONTEXT_TRACKING + +ENTRY(___preempt_schedule_context) + CFI_STARTPROC + SAVE_ALL + call preempt_schedule_context + RESTORE_ALL + ret + CFI_ENDPROC + +#endif diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index b014d9414d08..040681928e9d 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -66,3 +66,10 @@ EXPORT_SYMBOL(empty_zero_page); #ifndef CONFIG_PARAVIRT EXPORT_SYMBOL(native_load_gs_index); #endif + +#ifdef CONFIG_PREEMPT +EXPORT_SYMBOL(___preempt_schedule); +#ifdef CONFIG_CONTEXT_TRACKING +EXPORT_SYMBOL(___preempt_schedule_context); +#endif +#endif diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h index 82d958fc3823..5dc14ed3791c 100644 --- a/include/asm-generic/preempt.h +++ b/include/asm-generic/preempt.h @@ -100,4 +100,14 @@ static __always_inline bool should_resched(void) return unlikely(!*preempt_count_ptr()); } +#ifdef CONFIG_PREEMPT +extern asmlinkage void preempt_schedule(void); +#define __preempt_schedule() preempt_schedule() + +#ifdef CONFIG_CONTEXT_TRACKING +extern asmlinkage void preempt_schedule_context(void); +#define __preempt_schedule_context() preempt_schedule_context() +#endif +#endif /* CONFIG_PREEMPT */ + #endif /* __ASM_PREEMPT_H */ diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 2343d8715299..a3d9dc8c2c00 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -50,18 +50,17 @@ do { \ #define preempt_enable_no_resched() sched_preempt_enable_no_resched() #ifdef CONFIG_PREEMPT -asmlinkage void preempt_schedule(void); #define preempt_enable() \ do { \ barrier(); \ if (unlikely(preempt_count_dec_and_test())) \ - preempt_schedule(); \ + __preempt_schedule(); \ } while (0) #define preempt_check_resched() \ do { \ if (should_resched()) \ - preempt_schedule(); \ + __preempt_schedule(); \ } while (0) #else @@ -83,17 +82,15 @@ do { \ #ifdef CONFIG_PREEMPT -#ifdef CONFIG_CONTEXT_TRACKING -asmlinkage void preempt_schedule_context(void); -#else -#define preempt_schedule_context() preempt_schedule() +#ifndef CONFIG_CONTEXT_TRACKING +#define __preempt_schedule_context() __preempt_schedule() #endif #define preempt_enable_notrace() \ do { \ barrier(); \ if (unlikely(__preempt_count_dec_and_test())) \ - preempt_schedule_context(); \ + __preempt_schedule_context(); \ } while (0) #else #define preempt_enable_notrace() preempt_enable_no_resched_notrace() -- cgit v1.2.3