From 9bd721c55c8a886b938a45198aab0ccb52f1f7fa Mon Sep 17 00:00:00 2001 From: Jason Low Date: Fri, 13 Sep 2013 11:26:52 -0700 Subject: sched/balancing: Consider max cost of idle balance per sched domain In this patch, we keep track of the max cost we spend doing idle load balancing for each sched domain. If the avg time the CPU remains idle is less then the time we have already spent on idle balancing + the max cost of idle balancing in the sched domain, then we don't continue to attempt the balance. We also keep a per rq variable, max_idle_balance_cost, which keeps track of the max time spent on newidle load balances throughout all its domains so that we can determine the avg_idle's max value. By using the max, we avoid overrunning the average. This further reduces the chance we attempt balancing when the CPU is not idle for longer than the cost to balance. Signed-off-by: Jason Low Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1379096813-3032-3-git-send-email-jason.low2@hp.com Signed-off-by: Ingo Molnar --- arch/metag/include/asm/topology.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/metag/include/asm/topology.h b/arch/metag/include/asm/topology.h index 23f5118f58db..db192924f4b0 100644 --- a/arch/metag/include/asm/topology.h +++ b/arch/metag/include/asm/topology.h @@ -26,6 +26,7 @@ .last_balance = jiffies, \ .balance_interval = 1, \ .nr_balance_failed = 0, \ + .max_newidle_lb_cost = 0, \ } #define cpu_to_node(cpu) ((void)(cpu), 0) -- cgit v1.2.3 From f48627e686a69f5215cb0761e731edb3d9859dd9 Mon Sep 17 00:00:00 2001 From: Jason Low Date: Fri, 13 Sep 2013 11:26:53 -0700 Subject: sched/balancing: Periodically decay max cost of idle balance This patch builds on patch 2 and periodically decays that max value to do idle balancing per sched domain by approximately 1% per second. Also decay the rq's max_idle_balance_cost value. Signed-off-by: Jason Low Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1379096813-3032-4-git-send-email-jason.low2@hp.com Signed-off-by: Ingo Molnar --- arch/metag/include/asm/topology.h | 1 + include/linux/sched.h | 3 +++ include/linux/topology.h | 3 +++ kernel/sched/fair.c | 38 +++++++++++++++++++++++++++++++------- 4 files changed, 38 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/metag/include/asm/topology.h b/arch/metag/include/asm/topology.h index db192924f4b0..8e9c0b3b9691 100644 --- a/arch/metag/include/asm/topology.h +++ b/arch/metag/include/asm/topology.h @@ -27,6 +27,7 @@ .balance_interval = 1, \ .nr_balance_failed = 0, \ .max_newidle_lb_cost = 0, \ + .next_decay_max_lb_cost = jiffies, \ } #define cpu_to_node(cpu) ((void)(cpu), 0) diff --git a/include/linux/sched.h b/include/linux/sched.h index be078ff9157f..b5344de1658b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -810,7 +810,10 @@ struct sched_domain { unsigned int nr_balance_failed; /* initialise to 0 */ u64 last_update; + + /* idle_balance() stats */ u64 max_newidle_lb_cost; + unsigned long next_decay_max_lb_cost; #ifdef CONFIG_SCHEDSTATS /* load_balance() stats */ diff --git a/include/linux/topology.h b/include/linux/topology.h index e2a2c3da2929..12ae6ce997d6 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -107,6 +107,7 @@ int arch_update_cpu_topology(void); .balance_interval = 1, \ .smt_gain = 1178, /* 15% */ \ .max_newidle_lb_cost = 0, \ + .next_decay_max_lb_cost = jiffies, \ } #endif #endif /* CONFIG_SCHED_SMT */ @@ -137,6 +138,7 @@ int arch_update_cpu_topology(void); .last_balance = jiffies, \ .balance_interval = 1, \ .max_newidle_lb_cost = 0, \ + .next_decay_max_lb_cost = jiffies, \ } #endif #endif /* CONFIG_SCHED_MC */ @@ -169,6 +171,7 @@ int arch_update_cpu_topology(void); .last_balance = jiffies, \ .balance_interval = 1, \ .max_newidle_lb_cost = 0, \ + .next_decay_max_lb_cost = jiffies, \ } #endif diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index ffc99d8f0a95..2b89cd244b0d 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5681,15 +5681,39 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) /* Earliest time when we have to do rebalance again */ unsigned long next_balance = jiffies + 60*HZ; int update_next_balance = 0; - int need_serialize; + int need_serialize, need_decay = 0; + u64 max_cost = 0; update_blocked_averages(cpu); rcu_read_lock(); for_each_domain(cpu, sd) { + /* + * Decay the newidle max times here because this is a regular + * visit to all the domains. Decay ~1% per second. + */ + if (time_after(jiffies, sd->next_decay_max_lb_cost)) { + sd->max_newidle_lb_cost = + (sd->max_newidle_lb_cost * 253) / 256; + sd->next_decay_max_lb_cost = jiffies + HZ; + need_decay = 1; + } + max_cost += sd->max_newidle_lb_cost; + if (!(sd->flags & SD_LOAD_BALANCE)) continue; + /* + * Stop the load balance at this level. There is another + * CPU in our sched group which is doing load balancing more + * actively. + */ + if (!continue_balancing) { + if (need_decay) + continue; + break; + } + interval = sd->balance_interval; if (idle != CPU_IDLE) interval *= sd->busy_factor; @@ -5723,14 +5747,14 @@ out: next_balance = sd->last_balance + interval; update_next_balance = 1; } - + } + if (need_decay) { /* - * Stop the load balance at this level. There is another - * CPU in our sched group which is doing load balancing more - * actively. + * Ensure the rq-wide value also decays but keep it at a + * reasonable floor to avoid funnies with rq->avg_idle. */ - if (!continue_balancing) - break; + rq->max_idle_balance_cost = + max((u64)sysctl_sched_migration_cost, max_cost); } rcu_read_unlock(); -- cgit v1.2.3 From 0c44c2d0f459cd7e275242b72f500137c4fa834d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 11 Sep 2013 15:19:24 +0200 Subject: x86: Use asm goto to implement better modify_and_test() functions Linus suggested using asm goto to get rid of the typical SETcc + TEST instruction pair -- which also clobbers an extra register -- for our typical modify_and_test() functions. Because asm goto doesn't allow output fields it has to include an unconditinal memory clobber when it changes a memory variable to force a reload. Luckily all atomic ops already imply a compiler barrier to go along with their memory barrier semantics. Suggested-by: Linus Torvalds Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-0mtn9siwbeo1d33bap1422se@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/atomic.h | 29 +++++---------------------- arch/x86/include/asm/atomic64_64.h | 28 ++++---------------------- arch/x86/include/asm/bitops.h | 24 ++++------------------ arch/x86/include/asm/local.h | 28 ++++---------------------- arch/x86/include/asm/rmwcc.h | 41 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 58 insertions(+), 92 deletions(-) create mode 100644 arch/x86/include/asm/rmwcc.h (limited to 'arch') diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 722aa3b04624..da31c8b8a92d 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -6,6 +6,7 @@ #include #include #include +#include /* * Atomic operations that C can't guarantee us. Useful for @@ -76,12 +77,7 @@ static inline void atomic_sub(int i, atomic_t *v) */ static inline int atomic_sub_and_test(int i, atomic_t *v) { - unsigned char c; - - asm volatile(LOCK_PREFIX "subl %2,%0; sete %1" - : "+m" (v->counter), "=qm" (c) - : "ir" (i) : "memory"); - return c; + GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, i, "%0", "e"); } /** @@ -118,12 +114,7 @@ static inline void atomic_dec(atomic_t *v) */ static inline int atomic_dec_and_test(atomic_t *v) { - unsigned char c; - - asm volatile(LOCK_PREFIX "decl %0; sete %1" - : "+m" (v->counter), "=qm" (c) - : : "memory"); - return c != 0; + GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e"); } /** @@ -136,12 +127,7 @@ static inline int atomic_dec_and_test(atomic_t *v) */ static inline int atomic_inc_and_test(atomic_t *v) { - unsigned char c; - - asm volatile(LOCK_PREFIX "incl %0; sete %1" - : "+m" (v->counter), "=qm" (c) - : : "memory"); - return c != 0; + GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", "e"); } /** @@ -155,12 +141,7 @@ static inline int atomic_inc_and_test(atomic_t *v) */ static inline int atomic_add_negative(int i, atomic_t *v) { - unsigned char c; - - asm volatile(LOCK_PREFIX "addl %2,%0; sets %1" - : "+m" (v->counter), "=qm" (c) - : "ir" (i) : "memory"); - return c; + GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, i, "%0", "s"); } /** diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 0e1cbfc8ee06..3f065c985aee 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -72,12 +72,7 @@ static inline void atomic64_sub(long i, atomic64_t *v) */ static inline int atomic64_sub_and_test(long i, atomic64_t *v) { - unsigned char c; - - asm volatile(LOCK_PREFIX "subq %2,%0; sete %1" - : "=m" (v->counter), "=qm" (c) - : "er" (i), "m" (v->counter) : "memory"); - return c; + GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, i, "%0", "e"); } /** @@ -116,12 +111,7 @@ static inline void atomic64_dec(atomic64_t *v) */ static inline int atomic64_dec_and_test(atomic64_t *v) { - unsigned char c; - - asm volatile(LOCK_PREFIX "decq %0; sete %1" - : "=m" (v->counter), "=qm" (c) - : "m" (v->counter) : "memory"); - return c != 0; + GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", "e"); } /** @@ -134,12 +124,7 @@ static inline int atomic64_dec_and_test(atomic64_t *v) */ static inline int atomic64_inc_and_test(atomic64_t *v) { - unsigned char c; - - asm volatile(LOCK_PREFIX "incq %0; sete %1" - : "=m" (v->counter), "=qm" (c) - : "m" (v->counter) : "memory"); - return c != 0; + GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", "e"); } /** @@ -153,12 +138,7 @@ static inline int atomic64_inc_and_test(atomic64_t *v) */ static inline int atomic64_add_negative(long i, atomic64_t *v) { - unsigned char c; - - asm volatile(LOCK_PREFIX "addq %2,%0; sets %1" - : "=m" (v->counter), "=qm" (c) - : "er" (i), "m" (v->counter) : "memory"); - return c; + GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, i, "%0", "s"); } /** diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 41639ce8fd63..6d76d0935989 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -14,6 +14,7 @@ #include #include +#include #if BITS_PER_LONG == 32 # define _BITOPS_LONG_SHIFT 5 @@ -204,12 +205,7 @@ static inline void change_bit(long nr, volatile unsigned long *addr) */ static inline int test_and_set_bit(long nr, volatile unsigned long *addr) { - int oldbit; - - asm volatile(LOCK_PREFIX "bts %2,%1\n\t" - "sbb %0,%0" : "=r" (oldbit), ADDR : "Ir" (nr) : "memory"); - - return oldbit; + GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, nr, "%0", "c"); } /** @@ -255,13 +251,7 @@ static inline int __test_and_set_bit(long nr, volatile unsigned long *addr) */ static inline int test_and_clear_bit(long nr, volatile unsigned long *addr) { - int oldbit; - - asm volatile(LOCK_PREFIX "btr %2,%1\n\t" - "sbb %0,%0" - : "=r" (oldbit), ADDR : "Ir" (nr) : "memory"); - - return oldbit; + GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, nr, "%0", "c"); } /** @@ -314,13 +304,7 @@ static inline int __test_and_change_bit(long nr, volatile unsigned long *addr) */ static inline int test_and_change_bit(long nr, volatile unsigned long *addr) { - int oldbit; - - asm volatile(LOCK_PREFIX "btc %2,%1\n\t" - "sbb %0,%0" - : "=r" (oldbit), ADDR : "Ir" (nr) : "memory"); - - return oldbit; + GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, nr, "%0", "c"); } static __always_inline int constant_test_bit(long nr, const volatile unsigned long *addr) diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h index 2d89e3980cbd..5b23e605e707 100644 --- a/arch/x86/include/asm/local.h +++ b/arch/x86/include/asm/local.h @@ -52,12 +52,7 @@ static inline void local_sub(long i, local_t *l) */ static inline int local_sub_and_test(long i, local_t *l) { - unsigned char c; - - asm volatile(_ASM_SUB "%2,%0; sete %1" - : "+m" (l->a.counter), "=qm" (c) - : "ir" (i) : "memory"); - return c; + GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, i, "%0", "e"); } /** @@ -70,12 +65,7 @@ static inline int local_sub_and_test(long i, local_t *l) */ static inline int local_dec_and_test(local_t *l) { - unsigned char c; - - asm volatile(_ASM_DEC "%0; sete %1" - : "+m" (l->a.counter), "=qm" (c) - : : "memory"); - return c != 0; + GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, "%0", "e"); } /** @@ -88,12 +78,7 @@ static inline int local_dec_and_test(local_t *l) */ static inline int local_inc_and_test(local_t *l) { - unsigned char c; - - asm volatile(_ASM_INC "%0; sete %1" - : "+m" (l->a.counter), "=qm" (c) - : : "memory"); - return c != 0; + GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, "%0", "e"); } /** @@ -107,12 +92,7 @@ static inline int local_inc_and_test(local_t *l) */ static inline int local_add_negative(long i, local_t *l) { - unsigned char c; - - asm volatile(_ASM_ADD "%2,%0; sets %1" - : "+m" (l->a.counter), "=qm" (c) - : "ir" (i) : "memory"); - return c; + GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, i, "%0", "s"); } /** diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h new file mode 100644 index 000000000000..735f1849795f --- /dev/null +++ b/arch/x86/include/asm/rmwcc.h @@ -0,0 +1,41 @@ +#ifndef _ASM_X86_RMWcc +#define _ASM_X86_RMWcc + +#ifdef CC_HAVE_ASM_GOTO + +#define __GEN_RMWcc(fullop, var, cc, ...) \ +do { \ + asm volatile goto (fullop "; j" cc " %l[cc_label]" \ + : : "m" (var), ## __VA_ARGS__ \ + : "memory" : cc_label); \ + return 0; \ +cc_label: \ + return 1; \ +} while (0) + +#define GEN_UNARY_RMWcc(op, var, arg0, cc) \ + __GEN_RMWcc(op " " arg0, var, cc) + +#define GEN_BINARY_RMWcc(op, var, val, arg0, cc) \ + __GEN_RMWcc(op " %1, " arg0, var, cc, "er" (val)) + +#else /* !CC_HAVE_ASM_GOTO */ + +#define __GEN_RMWcc(fullop, var, cc, ...) \ +do { \ + char c; \ + asm volatile (fullop "; set" cc " %1" \ + : "+m" (var), "=qm" (c) \ + : __VA_ARGS__ : "memory"); \ + return c != 0; \ +} while (0) + +#define GEN_UNARY_RMWcc(op, var, arg0, cc) \ + __GEN_RMWcc(op " " arg0, var, cc) + +#define GEN_BINARY_RMWcc(op, var, val, arg0, cc) \ + __GEN_RMWcc(op " %2, " arg0, var, cc, "er" (val)) + +#endif /* CC_HAVE_ASM_GOTO */ + +#endif /* _ASM_X86_RMWcc */ -- cgit v1.2.3 From ea8117478918a4734586d35ff530721b682425be Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 11 Sep 2013 12:43:13 +0200 Subject: sched, idle: Fix the idle polling state logic Mike reported that commit 7d1a9417 ("x86: Use generic idle loop") regressed several workloads and caused excessive reschedule interrupts. The patch in question failed to notice that the x86 code had an inverted sense of the polling state versus the new generic code (x86: default polling, generic: default !polling). Fix the two prominent x86 mwait based idle drivers and introduce a few new generic polling helpers (fixing the wrong smp_mb__after_clear_bit usage). Also switch the idle routines to using tif_need_resched() which is an immediate TIF_NEED_RESCHED test as opposed to need_resched which will end up being slightly different. Reported-by: Mike Galbraith Signed-off-by: Peter Zijlstra Cc: lenb@kernel.org Cc: tglx@linutronix.de Link: http://lkml.kernel.org/n/tip-nc03imb0etuefmzybzj7sprf@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/process.c | 6 ++-- drivers/acpi/processor_idle.c | 46 ++++++------------------- drivers/idle/intel_idle.c | 2 +- include/linux/sched.h | 78 +++++++++++++++++++++++++++++++++++++++---- include/linux/thread_info.h | 2 ++ kernel/cpu/idle.c | 9 +++-- 6 files changed, 91 insertions(+), 52 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c83516be1052..3fb8d95ab8b5 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -391,9 +391,9 @@ static void amd_e400_idle(void) * The switch back from broadcast mode needs to be * called with interrupts disabled. */ - local_irq_disable(); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); - local_irq_enable(); + local_irq_disable(); + clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); + local_irq_enable(); } else default_idle(); } diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index f98dd00b51a9..c7414a545a4f 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -119,17 +119,10 @@ static struct dmi_system_id processor_power_dmi_table[] = { */ static void acpi_safe_halt(void) { - current_thread_info()->status &= ~TS_POLLING; - /* - * TS_POLLING-cleared state must be visible before we - * test NEED_RESCHED: - */ - smp_mb(); - if (!need_resched()) { + if (!tif_need_resched()) { safe_halt(); local_irq_disable(); } - current_thread_info()->status |= TS_POLLING; } #ifdef ARCH_APICTIMER_STOPS_ON_C3 @@ -737,6 +730,11 @@ static int acpi_idle_enter_c1(struct cpuidle_device *dev, if (unlikely(!pr)) return -EINVAL; + if (cx->entry_method == ACPI_CSTATE_FFH) { + if (current_set_polling_and_test()) + return -EINVAL; + } + lapic_timer_state_broadcast(pr, cx, 1); acpi_idle_do_entry(cx); @@ -790,18 +788,9 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev, if (unlikely(!pr)) return -EINVAL; - if (cx->entry_method != ACPI_CSTATE_FFH) { - current_thread_info()->status &= ~TS_POLLING; - /* - * TS_POLLING-cleared state must be visible before we test - * NEED_RESCHED: - */ - smp_mb(); - - if (unlikely(need_resched())) { - current_thread_info()->status |= TS_POLLING; + if (cx->entry_method == ACPI_CSTATE_FFH) { + if (current_set_polling_and_test()) return -EINVAL; - } } /* @@ -819,9 +808,6 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev, sched_clock_idle_wakeup_event(0); - if (cx->entry_method != ACPI_CSTATE_FFH) - current_thread_info()->status |= TS_POLLING; - lapic_timer_state_broadcast(pr, cx, 0); return index; } @@ -858,18 +844,9 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev, } } - if (cx->entry_method != ACPI_CSTATE_FFH) { - current_thread_info()->status &= ~TS_POLLING; - /* - * TS_POLLING-cleared state must be visible before we test - * NEED_RESCHED: - */ - smp_mb(); - - if (unlikely(need_resched())) { - current_thread_info()->status |= TS_POLLING; + if (cx->entry_method == ACPI_CSTATE_FFH) { + if (current_set_polling_and_test()) return -EINVAL; - } } acpi_unlazy_tlb(smp_processor_id()); @@ -915,9 +892,6 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev, sched_clock_idle_wakeup_event(0); - if (cx->entry_method != ACPI_CSTATE_FFH) - current_thread_info()->status |= TS_POLLING; - lapic_timer_state_broadcast(pr, cx, 0); return index; } diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index fa6964d8681a..f116d664b473 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -359,7 +359,7 @@ static int intel_idle(struct cpuidle_device *dev, if (!(lapic_timer_reliable_states & (1 << (cstate)))) clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); - if (!need_resched()) { + if (!current_set_polling_and_test()) { __monitor((void *)¤t_thread_info()->flags, 0, 0); smp_mb(); diff --git a/include/linux/sched.h b/include/linux/sched.h index b5344de1658b..e783ec52295a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2479,34 +2479,98 @@ static inline int tsk_is_polling(struct task_struct *p) { return task_thread_info(p)->status & TS_POLLING; } -static inline void current_set_polling(void) +static inline void __current_set_polling(void) { current_thread_info()->status |= TS_POLLING; } -static inline void current_clr_polling(void) +static inline bool __must_check current_set_polling_and_test(void) +{ + __current_set_polling(); + + /* + * Polling state must be visible before we test NEED_RESCHED, + * paired by resched_task() + */ + smp_mb(); + + return unlikely(tif_need_resched()); +} + +static inline void __current_clr_polling(void) { current_thread_info()->status &= ~TS_POLLING; - smp_mb__after_clear_bit(); +} + +static inline bool __must_check current_clr_polling_and_test(void) +{ + __current_clr_polling(); + + /* + * Polling state must be visible before we test NEED_RESCHED, + * paired by resched_task() + */ + smp_mb(); + + return unlikely(tif_need_resched()); } #elif defined(TIF_POLLING_NRFLAG) static inline int tsk_is_polling(struct task_struct *p) { return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG); } -static inline void current_set_polling(void) + +static inline void __current_set_polling(void) { set_thread_flag(TIF_POLLING_NRFLAG); } -static inline void current_clr_polling(void) +static inline bool __must_check current_set_polling_and_test(void) +{ + __current_set_polling(); + + /* + * Polling state must be visible before we test NEED_RESCHED, + * paired by resched_task() + * + * XXX: assumes set/clear bit are identical barrier wise. + */ + smp_mb__after_clear_bit(); + + return unlikely(tif_need_resched()); +} + +static inline void __current_clr_polling(void) { clear_thread_flag(TIF_POLLING_NRFLAG); } + +static inline bool __must_check current_clr_polling_and_test(void) +{ + __current_clr_polling(); + + /* + * Polling state must be visible before we test NEED_RESCHED, + * paired by resched_task() + */ + smp_mb__after_clear_bit(); + + return unlikely(tif_need_resched()); +} + #else static inline int tsk_is_polling(struct task_struct *p) { return 0; } -static inline void current_set_polling(void) { } -static inline void current_clr_polling(void) { } +static inline void __current_set_polling(void) { } +static inline void __current_clr_polling(void) { } + +static inline bool __must_check current_set_polling_and_test(void) +{ + return unlikely(tif_need_resched()); +} +static inline bool __must_check current_clr_polling_and_test(void) +{ + return unlikely(tif_need_resched()); +} #endif /* diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index a629e4b23217..fddbe2023a5d 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -118,6 +118,8 @@ static inline __deprecated void set_need_resched(void) */ } +#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED) + #if defined TIF_RESTORE_SIGMASK && !defined HAVE_SET_RESTORE_SIGMASK /* * An arch can define its own version of set_restore_sigmask() to get the diff --git a/kernel/cpu/idle.c b/kernel/cpu/idle.c index e695c0a0bcb5..c261409500e4 100644 --- a/kernel/cpu/idle.c +++ b/kernel/cpu/idle.c @@ -44,7 +44,7 @@ static inline int cpu_idle_poll(void) rcu_idle_enter(); trace_cpu_idle_rcuidle(0, smp_processor_id()); local_irq_enable(); - while (!need_resched()) + while (!tif_need_resched()) cpu_relax(); trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); rcu_idle_exit(); @@ -92,8 +92,7 @@ static void cpu_idle_loop(void) if (cpu_idle_force_poll || tick_check_broadcast_expired()) { cpu_idle_poll(); } else { - current_clr_polling(); - if (!need_resched()) { + if (!current_clr_polling_and_test()) { stop_critical_timings(); rcu_idle_enter(); arch_cpu_idle(); @@ -103,7 +102,7 @@ static void cpu_idle_loop(void) } else { local_irq_enable(); } - current_set_polling(); + __current_set_polling(); } arch_cpu_idle_exit(); } @@ -129,7 +128,7 @@ void cpu_startup_entry(enum cpuhp_state state) */ boot_init_stack_canary(); #endif - current_set_polling(); + __current_set_polling(); arch_cpu_idle_prepare(); cpu_idle_loop(); } -- cgit v1.2.3 From a787870924dbd6f321661e06d4ec1c7a408c9ccf Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 14 Aug 2013 14:55:40 +0200 Subject: sched, arch: Create asm/preempt.h In order to prepare to per-arch implementations of preempt_count move the required bits into an asm-generic header and use this for all archs. Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-h5j0c1r3e3fk015m30h8f1zx@git.kernel.org Signed-off-by: Ingo Molnar --- arch/alpha/include/asm/Kbuild | 1 + arch/arc/include/asm/Kbuild | 1 + arch/arm/include/asm/Kbuild | 1 + arch/arm64/include/asm/Kbuild | 1 + arch/avr32/include/asm/Kbuild | 1 + arch/blackfin/include/asm/Kbuild | 1 + arch/c6x/include/asm/Kbuild | 1 + arch/cris/include/asm/Kbuild | 1 + arch/frv/include/asm/Kbuild | 1 + arch/h8300/include/asm/Kbuild | 1 + arch/hexagon/include/asm/Kbuild | 1 + arch/ia64/include/asm/Kbuild | 1 + arch/m32r/include/asm/Kbuild | 1 + arch/m68k/include/asm/Kbuild | 1 + arch/metag/include/asm/Kbuild | 1 + arch/microblaze/include/asm/Kbuild | 1 + arch/mips/include/asm/Kbuild | 1 + arch/mn10300/include/asm/Kbuild | 1 + arch/openrisc/include/asm/Kbuild | 1 + arch/parisc/include/asm/Kbuild | 1 + arch/powerpc/include/asm/Kbuild | 1 + arch/s390/include/asm/Kbuild | 1 + arch/score/include/asm/Kbuild | 1 + arch/sh/include/asm/Kbuild | 1 + arch/sparc/include/asm/Kbuild | 1 + arch/tile/include/asm/Kbuild | 1 + arch/um/include/asm/Kbuild | 1 + arch/unicore32/include/asm/Kbuild | 1 + arch/x86/include/asm/Kbuild | 1 + arch/xtensa/include/asm/Kbuild | 1 + include/asm-generic/preempt.h | 54 ++++++++++++++++++++++++++++++++++++++ include/linux/preempt.h | 49 +--------------------------------- 32 files changed, 85 insertions(+), 48 deletions(-) create mode 100644 include/asm-generic/preempt.h (limited to 'arch') diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild index a6e85f448c1c..f01fb505ad52 100644 --- a/arch/alpha/include/asm/Kbuild +++ b/arch/alpha/include/asm/Kbuild @@ -3,3 +3,4 @@ generic-y += clkdev.h generic-y += exec.h generic-y += trace_clock.h +generic-y += preempt.h diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild index d8dd660898b9..5943f7f9d325 100644 --- a/arch/arc/include/asm/Kbuild +++ b/arch/arc/include/asm/Kbuild @@ -46,3 +46,4 @@ generic-y += ucontext.h generic-y += user.h generic-y += vga.h generic-y += xor.h +generic-y += preempt.h diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index d3db39860b9c..4e6838d4ddf6 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -33,3 +33,4 @@ generic-y += timex.h generic-y += trace_clock.h generic-y += types.h generic-y += unaligned.h +generic-y += preempt.h diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 79a642d199f2..519f89f5b6a3 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -50,3 +50,4 @@ generic-y += unaligned.h generic-y += user.h generic-y += vga.h generic-y += xor.h +generic-y += preempt.h diff --git a/arch/avr32/include/asm/Kbuild b/arch/avr32/include/asm/Kbuild index d22af851f3f6..b946080ee8bb 100644 --- a/arch/avr32/include/asm/Kbuild +++ b/arch/avr32/include/asm/Kbuild @@ -3,3 +3,4 @@ generic-y += clkdev.h generic-y += exec.h generic-y += trace_clock.h generic-y += param.h +generic-y += preempt.h diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild index 127826f8a375..f2b43474b0e2 100644 --- a/arch/blackfin/include/asm/Kbuild +++ b/arch/blackfin/include/asm/Kbuild @@ -44,3 +44,4 @@ generic-y += ucontext.h generic-y += unaligned.h generic-y += user.h generic-y += xor.h +generic-y += preempt.h diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild index e49f918531ad..fc0b3c356027 100644 --- a/arch/c6x/include/asm/Kbuild +++ b/arch/c6x/include/asm/Kbuild @@ -56,3 +56,4 @@ generic-y += ucontext.h generic-y += user.h generic-y += vga.h generic-y += xor.h +generic-y += preempt.h diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild index c8325455520e..b06caf649a95 100644 --- a/arch/cris/include/asm/Kbuild +++ b/arch/cris/include/asm/Kbuild @@ -11,3 +11,4 @@ generic-y += module.h generic-y += trace_clock.h generic-y += vga.h generic-y += xor.h +generic-y += preempt.h diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild index c5d767028306..74742dc6a3da 100644 --- a/arch/frv/include/asm/Kbuild +++ b/arch/frv/include/asm/Kbuild @@ -2,3 +2,4 @@ generic-y += clkdev.h generic-y += exec.h generic-y += trace_clock.h +generic-y += preempt.h diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild index 8ada3cf0c98d..7e0e7213a481 100644 --- a/arch/h8300/include/asm/Kbuild +++ b/arch/h8300/include/asm/Kbuild @@ -6,3 +6,4 @@ generic-y += mmu.h generic-y += module.h generic-y += trace_clock.h generic-y += xor.h +generic-y += preempt.h diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild index 1da17caac23c..67c3450309b7 100644 --- a/arch/hexagon/include/asm/Kbuild +++ b/arch/hexagon/include/asm/Kbuild @@ -53,3 +53,4 @@ generic-y += types.h generic-y += ucontext.h generic-y += unaligned.h generic-y += xor.h +generic-y += preempt.h diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild index a3456f34f672..f93ee087e8fe 100644 --- a/arch/ia64/include/asm/Kbuild +++ b/arch/ia64/include/asm/Kbuild @@ -3,4 +3,5 @@ generic-y += clkdev.h generic-y += exec.h generic-y += kvm_para.h generic-y += trace_clock.h +generic-y += preempt.h generic-y += vtime.h \ No newline at end of file diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild index bebdc36ebb0a..2b58c5f0bc38 100644 --- a/arch/m32r/include/asm/Kbuild +++ b/arch/m32r/include/asm/Kbuild @@ -3,3 +3,4 @@ generic-y += clkdev.h generic-y += exec.h generic-y += module.h generic-y += trace_clock.h +generic-y += preempt.h diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index 09d77a862da3..a5d27f272a59 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild @@ -31,3 +31,4 @@ generic-y += trace_clock.h generic-y += types.h generic-y += word-at-a-time.h generic-y += xor.h +generic-y += preempt.h diff --git a/arch/metag/include/asm/Kbuild b/arch/metag/include/asm/Kbuild index 6ae0ccb632cb..84d0c1d6b9b3 100644 --- a/arch/metag/include/asm/Kbuild +++ b/arch/metag/include/asm/Kbuild @@ -52,3 +52,4 @@ generic-y += unaligned.h generic-y += user.h generic-y += vga.h generic-y += xor.h +generic-y += preempt.h diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild index d3c51a6a601d..ce0bbf8f5640 100644 --- a/arch/microblaze/include/asm/Kbuild +++ b/arch/microblaze/include/asm/Kbuild @@ -3,3 +3,4 @@ generic-y += clkdev.h generic-y += exec.h generic-y += trace_clock.h generic-y += syscalls.h +generic-y += preempt.h diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild index 454ddf9bb76f..1acbb8b77a71 100644 --- a/arch/mips/include/asm/Kbuild +++ b/arch/mips/include/asm/Kbuild @@ -11,5 +11,6 @@ generic-y += sections.h generic-y += segment.h generic-y += serial.h generic-y += trace_clock.h +generic-y += preempt.h generic-y += ucontext.h generic-y += xor.h diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild index c5d767028306..74742dc6a3da 100644 --- a/arch/mn10300/include/asm/Kbuild +++ b/arch/mn10300/include/asm/Kbuild @@ -2,3 +2,4 @@ generic-y += clkdev.h generic-y += exec.h generic-y += trace_clock.h +generic-y += preempt.h diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild index 195653e851da..78405625e799 100644 --- a/arch/openrisc/include/asm/Kbuild +++ b/arch/openrisc/include/asm/Kbuild @@ -67,3 +67,4 @@ generic-y += ucontext.h generic-y += user.h generic-y += word-at-a-time.h generic-y += xor.h +generic-y += preempt.h diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild index ff4c9faed546..a603b9ebe54c 100644 --- a/arch/parisc/include/asm/Kbuild +++ b/arch/parisc/include/asm/Kbuild @@ -4,3 +4,4 @@ generic-y += word-at-a-time.h auxvec.h user.h cputime.h emergency-restart.h \ div64.h irq_regs.h kdebug.h kvm_para.h local64.h local.h param.h \ poll.h xor.h clkdev.h exec.h generic-y += trace_clock.h +generic-y += preempt.h diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 704e6f10ae80..d8f9d2f18a23 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -2,4 +2,5 @@ generic-y += clkdev.h generic-y += rwsem.h generic-y += trace_clock.h +generic-y += preempt.h generic-y += vtime.h \ No newline at end of file diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index f313f9cbcf44..7a5288f3479a 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -2,3 +2,4 @@ generic-y += clkdev.h generic-y += trace_clock.h +generic-y += preempt.h diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild index e1c7bb999b06..f3414ade77a3 100644 --- a/arch/score/include/asm/Kbuild +++ b/arch/score/include/asm/Kbuild @@ -4,3 +4,4 @@ header-y += generic-y += clkdev.h generic-y += trace_clock.h generic-y += xor.h +generic-y += preempt.h diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild index 280bea9e5e2b..231efbb68108 100644 --- a/arch/sh/include/asm/Kbuild +++ b/arch/sh/include/asm/Kbuild @@ -34,3 +34,4 @@ generic-y += termios.h generic-y += trace_clock.h generic-y += ucontext.h generic-y += xor.h +generic-y += preempt.h diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index 7e4a97fbded4..bf390667657a 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -16,3 +16,4 @@ generic-y += serial.h generic-y += trace_clock.h generic-y += types.h generic-y += word-at-a-time.h +generic-y += preempt.h diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild index 664d6ad23f80..22f3bd147fa7 100644 --- a/arch/tile/include/asm/Kbuild +++ b/arch/tile/include/asm/Kbuild @@ -38,3 +38,4 @@ generic-y += termios.h generic-y += trace_clock.h generic-y += types.h generic-y += xor.h +generic-y += preempt.h diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index b30f34a79882..fdde187e6087 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -3,3 +3,4 @@ generic-y += hw_irq.h irq_regs.h kdebug.h percpu.h sections.h topology.h xor.h generic-y += ftrace.h pci.h io.h param.h delay.h mutex.h current.h exec.h generic-y += switch_to.h clkdev.h generic-y += trace_clock.h +generic-y += preempt.h diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild index 89d8b6c4e39a..00045cbe5c63 100644 --- a/arch/unicore32/include/asm/Kbuild +++ b/arch/unicore32/include/asm/Kbuild @@ -60,3 +60,4 @@ generic-y += unaligned.h generic-y += user.h generic-y += vga.h generic-y += xor.h +generic-y += preempt.h diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 7f669853317a..eca20286a91c 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -5,3 +5,4 @@ genhdr-y += unistd_64.h genhdr-y += unistd_x32.h generic-y += clkdev.h +generic-y += preempt.h diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index 1b982641ec35..228d6aee3a16 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -28,3 +28,4 @@ generic-y += termios.h generic-y += topology.h generic-y += trace_clock.h generic-y += xor.h +generic-y += preempt.h diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h new file mode 100644 index 000000000000..a1fc6590a743 --- /dev/null +++ b/include/asm-generic/preempt.h @@ -0,0 +1,54 @@ +#ifndef __ASM_PREEMPT_H +#define __ASM_PREEMPT_H + +#include + +/* + * We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users + * that think a non-zero value indicates we cannot preempt. + */ +static __always_inline int preempt_count(void) +{ + return current_thread_info()->preempt_count & ~PREEMPT_NEED_RESCHED; +} + +static __always_inline int *preempt_count_ptr(void) +{ + return ¤t_thread_info()->preempt_count; +} + +/* + * We now loose PREEMPT_NEED_RESCHED and cause an extra reschedule; however the + * alternative is loosing a reschedule. Better schedule too often -- also this + * should be a very rare operation. + */ +static __always_inline void preempt_count_set(int pc) +{ + *preempt_count_ptr() = pc; +} + +/* + * We fold the NEED_RESCHED bit into the preempt count such that + * preempt_enable() can decrement and test for needing to reschedule with a + * single instruction. + * + * We invert the actual bit, so that when the decrement hits 0 we know we both + * need to resched (the bit is cleared) and can resched (no preempt count). + */ + +static __always_inline void set_preempt_need_resched(void) +{ + *preempt_count_ptr() &= ~PREEMPT_NEED_RESCHED; +} + +static __always_inline void clear_preempt_need_resched(void) +{ + *preempt_count_ptr() |= PREEMPT_NEED_RESCHED; +} + +static __always_inline bool test_preempt_need_resched(void) +{ + return !(*preempt_count_ptr() & PREEMPT_NEED_RESCHED); +} + +#endif /* __ASM_PREEMPT_H */ diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 92e341853e4b..df8e245e8729 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -6,7 +6,6 @@ * preempt_count (used for kernel preemption, interrupt count, etc.) */ -#include #include #include @@ -16,53 +15,7 @@ */ #define PREEMPT_NEED_RESCHED 0x80000000 -/* - * We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users - * that think a non-zero value indicates we cannot preempt. - */ -static __always_inline int preempt_count(void) -{ - return current_thread_info()->preempt_count & ~PREEMPT_NEED_RESCHED; -} - -static __always_inline int *preempt_count_ptr(void) -{ - return ¤t_thread_info()->preempt_count; -} - -/* - * We now loose PREEMPT_NEED_RESCHED and cause an extra reschedule; however the - * alternative is loosing a reschedule. Better schedule too often -- also this - * should be a very rare operation. - */ -static __always_inline void preempt_count_set(int pc) -{ - *preempt_count_ptr() = pc; -} - -/* - * We fold the NEED_RESCHED bit into the preempt count such that - * preempt_enable() can decrement and test for needing to reschedule with a - * single instruction. - * - * We invert the actual bit, so that when the decrement hits 0 we know we both - * need to resched (the bit is cleared) and can resched (no preempt count). - */ - -static __always_inline void set_preempt_need_resched(void) -{ - *preempt_count_ptr() &= ~PREEMPT_NEED_RESCHED; -} - -static __always_inline void clear_preempt_need_resched(void) -{ - *preempt_count_ptr() |= PREEMPT_NEED_RESCHED; -} - -static __always_inline bool test_preempt_need_resched(void) -{ - return !(*preempt_count_ptr() & PREEMPT_NEED_RESCHED); -} +#include #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER) extern void add_preempt_count(int val); -- cgit v1.2.3 From bdb43806589096ac4272fe1307e789846ac08d7c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 10 Sep 2013 12:15:23 +0200 Subject: sched: Extract the basic add/sub preempt_count modifiers Rewrite the preempt_count macros in order to extract the 3 basic preempt_count value modifiers: __preempt_count_add() __preempt_count_sub() and the new: __preempt_count_dec_and_test() And since we're at it anyway, replace the unconventional $op_preempt_count names with the more conventional preempt_count_$op. Since these basic operators are equivalent to the previous _notrace() variants, do away with the _notrace() versions. Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-ewbpdbupy9xpsjhg960zwbv8@git.kernel.org Signed-off-by: Ingo Molnar --- arch/mips/mm/init.c | 5 +- arch/x86/kernel/traps.c | 4 +- include/asm-generic/preempt.h | 35 ++++++++++++++ include/linux/hardirq.h | 8 ++-- include/linux/preempt.h | 106 +++++++++++++++++++----------------------- include/linux/sched.h | 5 -- include/linux/uaccess.h | 8 +--- kernel/context_tracking.c | 2 +- kernel/sched/core.c | 29 +++++------- kernel/softirq.c | 14 +++--- 10 files changed, 113 insertions(+), 103 deletions(-) (limited to 'arch') diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index e205ef598e97..12156176c7ca 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -124,7 +124,7 @@ void *kmap_coherent(struct page *page, unsigned long addr) BUG_ON(Page_dcache_dirty(page)); - inc_preempt_count(); + pagefault_disable(); idx = (addr >> PAGE_SHIFT) & (FIX_N_COLOURS - 1); #ifdef CONFIG_MIPS_MT_SMTC idx += FIX_N_COLOURS * smp_processor_id() + @@ -193,8 +193,7 @@ void kunmap_coherent(void) write_c0_entryhi(old_ctx); EXIT_CRITICAL(flags); #endif - dec_preempt_count(); - preempt_check_resched(); + pagefault_enable(); } void copy_user_highpage(struct page *to, struct page *from, diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 8c8093b146ca..729aa779ff75 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -88,7 +88,7 @@ static inline void conditional_sti(struct pt_regs *regs) static inline void preempt_conditional_sti(struct pt_regs *regs) { - inc_preempt_count(); + preempt_count_inc(); if (regs->flags & X86_EFLAGS_IF) local_irq_enable(); } @@ -103,7 +103,7 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) { if (regs->flags & X86_EFLAGS_IF) local_irq_disable(); - dec_preempt_count(); + preempt_count_dec(); } static int __kprobes diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h index 8100b1ec1715..82d958fc3823 100644 --- a/include/asm-generic/preempt.h +++ b/include/asm-generic/preempt.h @@ -65,4 +65,39 @@ static __always_inline bool test_preempt_need_resched(void) return !(*preempt_count_ptr() & PREEMPT_NEED_RESCHED); } +/* + * The various preempt_count add/sub methods + */ + +static __always_inline void __preempt_count_add(int val) +{ + *preempt_count_ptr() += val; +} + +static __always_inline void __preempt_count_sub(int val) +{ + *preempt_count_ptr() -= val; +} + +static __always_inline bool __preempt_count_dec_and_test(void) +{ + return !--*preempt_count_ptr(); +} + +/* + * Returns true when we need to resched -- even if we can not. + */ +static __always_inline bool need_resched(void) +{ + return unlikely(test_preempt_need_resched()); +} + +/* + * Returns true when we need to resched and can (barring IRQ state). + */ +static __always_inline bool should_resched(void) +{ + return unlikely(!*preempt_count_ptr()); +} + #endif /* __ASM_PREEMPT_H */ diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 1e041063b226..d9cf963ac832 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -33,7 +33,7 @@ extern void rcu_nmi_exit(void); #define __irq_enter() \ do { \ account_irq_enter_time(current); \ - add_preempt_count(HARDIRQ_OFFSET); \ + preempt_count_add(HARDIRQ_OFFSET); \ trace_hardirq_enter(); \ } while (0) @@ -49,7 +49,7 @@ extern void irq_enter(void); do { \ trace_hardirq_exit(); \ account_irq_exit_time(current); \ - sub_preempt_count(HARDIRQ_OFFSET); \ + preempt_count_sub(HARDIRQ_OFFSET); \ } while (0) /* @@ -62,7 +62,7 @@ extern void irq_exit(void); lockdep_off(); \ ftrace_nmi_enter(); \ BUG_ON(in_nmi()); \ - add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \ + preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET); \ rcu_nmi_enter(); \ trace_hardirq_enter(); \ } while (0) @@ -72,7 +72,7 @@ extern void irq_exit(void); trace_hardirq_exit(); \ rcu_nmi_exit(); \ BUG_ON(!in_nmi()); \ - sub_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \ + preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \ ftrace_nmi_exit(); \ lockdep_on(); \ } while (0) diff --git a/include/linux/preempt.h b/include/linux/preempt.h index df8e245e8729..2343d8715299 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -18,97 +18,86 @@ #include #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER) - extern void add_preempt_count(int val); - extern void sub_preempt_count(int val); +extern void preempt_count_add(int val); +extern void preempt_count_sub(int val); +#define preempt_count_dec_and_test() ({ preempt_count_sub(1); should_resched(); }) #else -# define add_preempt_count(val) do { *preempt_count_ptr() += (val); } while (0) -# define sub_preempt_count(val) do { *preempt_count_ptr() -= (val); } while (0) +#define preempt_count_add(val) __preempt_count_add(val) +#define preempt_count_sub(val) __preempt_count_sub(val) +#define preempt_count_dec_and_test() __preempt_count_dec_and_test() #endif -#define inc_preempt_count() add_preempt_count(1) -#define dec_preempt_count() sub_preempt_count(1) - -#ifdef CONFIG_PREEMPT - -asmlinkage void preempt_schedule(void); - -#define preempt_check_resched() \ -do { \ - if (unlikely(!*preempt_count_ptr())) \ - preempt_schedule(); \ -} while (0) - -#ifdef CONFIG_CONTEXT_TRACKING - -void preempt_schedule_context(void); - -#define preempt_check_resched_context() \ -do { \ - if (unlikely(!*preempt_count_ptr())) \ - preempt_schedule_context(); \ -} while (0) -#else - -#define preempt_check_resched_context() preempt_check_resched() - -#endif /* CONFIG_CONTEXT_TRACKING */ - -#else /* !CONFIG_PREEMPT */ - -#define preempt_check_resched() do { } while (0) -#define preempt_check_resched_context() do { } while (0) - -#endif /* CONFIG_PREEMPT */ +#define __preempt_count_inc() __preempt_count_add(1) +#define __preempt_count_dec() __preempt_count_sub(1) +#define preempt_count_inc() preempt_count_add(1) +#define preempt_count_dec() preempt_count_sub(1) #ifdef CONFIG_PREEMPT_COUNT #define preempt_disable() \ do { \ - inc_preempt_count(); \ + preempt_count_inc(); \ barrier(); \ } while (0) #define sched_preempt_enable_no_resched() \ do { \ barrier(); \ - dec_preempt_count(); \ + preempt_count_dec(); \ } while (0) -#define preempt_enable_no_resched() sched_preempt_enable_no_resched() +#define preempt_enable_no_resched() sched_preempt_enable_no_resched() +#ifdef CONFIG_PREEMPT +asmlinkage void preempt_schedule(void); #define preempt_enable() \ do { \ - preempt_enable_no_resched(); \ - preempt_check_resched(); \ + barrier(); \ + if (unlikely(preempt_count_dec_and_test())) \ + preempt_schedule(); \ } while (0) -/* For debugging and tracer internals only! */ -#define add_preempt_count_notrace(val) \ - do { *preempt_count_ptr() += (val); } while (0) -#define sub_preempt_count_notrace(val) \ - do { *preempt_count_ptr() -= (val); } while (0) -#define inc_preempt_count_notrace() add_preempt_count_notrace(1) -#define dec_preempt_count_notrace() sub_preempt_count_notrace(1) +#define preempt_check_resched() \ +do { \ + if (should_resched()) \ + preempt_schedule(); \ +} while (0) + +#else +#define preempt_enable() preempt_enable_no_resched() +#define preempt_check_resched() do { } while (0) +#endif #define preempt_disable_notrace() \ do { \ - inc_preempt_count_notrace(); \ + __preempt_count_inc(); \ barrier(); \ } while (0) #define preempt_enable_no_resched_notrace() \ do { \ barrier(); \ - dec_preempt_count_notrace(); \ + __preempt_count_dec(); \ } while (0) -/* preempt_check_resched is OK to trace */ +#ifdef CONFIG_PREEMPT + +#ifdef CONFIG_CONTEXT_TRACKING +asmlinkage void preempt_schedule_context(void); +#else +#define preempt_schedule_context() preempt_schedule() +#endif + #define preempt_enable_notrace() \ do { \ - preempt_enable_no_resched_notrace(); \ - preempt_check_resched_context(); \ + barrier(); \ + if (unlikely(__preempt_count_dec_and_test())) \ + preempt_schedule_context(); \ } while (0) +#else +#define preempt_enable_notrace() preempt_enable_no_resched_notrace() +#endif #else /* !CONFIG_PREEMPT_COUNT */ @@ -118,10 +107,11 @@ do { \ * that can cause faults and scheduling migrate into our preempt-protected * region. */ -#define preempt_disable() barrier() +#define preempt_disable() barrier() #define sched_preempt_enable_no_resched() barrier() -#define preempt_enable_no_resched() barrier() -#define preempt_enable() barrier() +#define preempt_enable_no_resched() barrier() +#define preempt_enable() barrier() +#define preempt_check_resched() do { } while (0) #define preempt_disable_notrace() barrier() #define preempt_enable_no_resched_notrace() barrier() diff --git a/include/linux/sched.h b/include/linux/sched.h index 9fa151fb968e..06ac17c7e639 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2409,11 +2409,6 @@ static inline int signal_pending_state(long state, struct task_struct *p) return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); } -static inline int need_resched(void) -{ - return unlikely(test_preempt_need_resched()); -} - /* * cond_resched() and cond_resched_lock(): latency reduction via * explicit rescheduling in places that are safe. The return diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 5ca0951e1855..9d8cf056e661 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -15,7 +15,7 @@ */ static inline void pagefault_disable(void) { - inc_preempt_count(); + preempt_count_inc(); /* * make sure to have issued the store before a pagefault * can hit. @@ -30,11 +30,7 @@ static inline void pagefault_enable(void) * the pagefault handler again. */ barrier(); - dec_preempt_count(); - /* - * make sure we do.. - */ - barrier(); + preempt_count_dec(); preempt_check_resched(); } diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index 247091bf0587..013161f1c807 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c @@ -111,7 +111,7 @@ void context_tracking_user_enter(void) * instead of preempt_schedule() to exit user context if needed before * calling the scheduler. */ -void __sched notrace preempt_schedule_context(void) +asmlinkage void __sched notrace preempt_schedule_context(void) { enum ctx_state prev_ctx; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 0ba4e4192390..9c84a9ab1892 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2219,7 +2219,7 @@ notrace unsigned long get_parent_ip(unsigned long addr) #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \ defined(CONFIG_PREEMPT_TRACER)) -void __kprobes add_preempt_count(int val) +void __kprobes preempt_count_add(int val) { #ifdef CONFIG_DEBUG_PREEMPT /* @@ -2228,7 +2228,7 @@ void __kprobes add_preempt_count(int val) if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0))) return; #endif - add_preempt_count_notrace(val); + __preempt_count_add(val); #ifdef CONFIG_DEBUG_PREEMPT /* * Spinlock count overflowing soon? @@ -2239,9 +2239,9 @@ void __kprobes add_preempt_count(int val) if (preempt_count() == val) trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1)); } -EXPORT_SYMBOL(add_preempt_count); +EXPORT_SYMBOL(preempt_count_add); -void __kprobes sub_preempt_count(int val) +void __kprobes preempt_count_sub(int val) { #ifdef CONFIG_DEBUG_PREEMPT /* @@ -2259,9 +2259,9 @@ void __kprobes sub_preempt_count(int val) if (preempt_count() == val) trace_preempt_on(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1)); - sub_preempt_count_notrace(val); + __preempt_count_sub(val); } -EXPORT_SYMBOL(sub_preempt_count); +EXPORT_SYMBOL(preempt_count_sub); #endif @@ -2525,9 +2525,9 @@ asmlinkage void __sched notrace preempt_schedule(void) return; do { - add_preempt_count_notrace(PREEMPT_ACTIVE); + __preempt_count_add(PREEMPT_ACTIVE); __schedule(); - sub_preempt_count_notrace(PREEMPT_ACTIVE); + __preempt_count_sub(PREEMPT_ACTIVE); /* * Check again in case we missed a preemption opportunity @@ -2554,11 +2554,11 @@ asmlinkage void __sched preempt_schedule_irq(void) prev_state = exception_enter(); do { - add_preempt_count(PREEMPT_ACTIVE); + __preempt_count_add(PREEMPT_ACTIVE); local_irq_enable(); __schedule(); local_irq_disable(); - sub_preempt_count(PREEMPT_ACTIVE); + __preempt_count_sub(PREEMPT_ACTIVE); /* * Check again in case we missed a preemption opportunity @@ -3798,16 +3798,11 @@ SYSCALL_DEFINE0(sched_yield) return 0; } -static inline int should_resched(void) -{ - return need_resched() && !(preempt_count() & PREEMPT_ACTIVE); -} - static void __cond_resched(void) { - add_preempt_count(PREEMPT_ACTIVE); + __preempt_count_add(PREEMPT_ACTIVE); __schedule(); - sub_preempt_count(PREEMPT_ACTIVE); + __preempt_count_sub(PREEMPT_ACTIVE); } int __sched _cond_resched(void) diff --git a/kernel/softirq.c b/kernel/softirq.c index a90de70cf1f3..3e88612fc87e 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -100,13 +100,13 @@ static void __local_bh_disable(unsigned long ip, unsigned int cnt) raw_local_irq_save(flags); /* - * The preempt tracer hooks into add_preempt_count and will break + * The preempt tracer hooks into preempt_count_add and will break * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET * is set and before current->softirq_enabled is cleared. * We must manually increment preempt_count here and manually * call the trace_preempt_off later. */ - add_preempt_count_notrace(cnt); + __preempt_count_add(cnt); /* * Were softirqs turned off above: */ @@ -120,7 +120,7 @@ static void __local_bh_disable(unsigned long ip, unsigned int cnt) #else /* !CONFIG_TRACE_IRQFLAGS */ static inline void __local_bh_disable(unsigned long ip, unsigned int cnt) { - add_preempt_count(cnt); + preempt_count_add(cnt); barrier(); } #endif /* CONFIG_TRACE_IRQFLAGS */ @@ -139,7 +139,7 @@ static void __local_bh_enable(unsigned int cnt) if (softirq_count() == cnt) trace_softirqs_on(_RET_IP_); - sub_preempt_count(cnt); + preempt_count_sub(cnt); } /* @@ -169,12 +169,12 @@ static inline void _local_bh_enable_ip(unsigned long ip) * Keep preemption disabled until we are done with * softirq processing: */ - sub_preempt_count(SOFTIRQ_DISABLE_OFFSET - 1); + preempt_count_sub(SOFTIRQ_DISABLE_OFFSET - 1); if (unlikely(!in_interrupt() && local_softirq_pending())) do_softirq(); - dec_preempt_count(); + preempt_count_dec(); #ifdef CONFIG_TRACE_IRQFLAGS local_irq_enable(); #endif @@ -360,7 +360,7 @@ void irq_exit(void) account_irq_exit_time(current); trace_hardirq_exit(); - sub_preempt_count(HARDIRQ_OFFSET); + preempt_count_sub(HARDIRQ_OFFSET); if (!in_interrupt() && local_softirq_pending()) invoke_softirq(); -- cgit v1.2.3 From c2daa3bed53a81171cf8c1a36db798e82b91afe8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 14 Aug 2013 14:51:00 +0200 Subject: sched, x86: Provide a per-cpu preempt_count implementation Convert x86 to use a per-cpu preemption count. The reason for doing so is that accessing per-cpu variables is a lot cheaper than accessing thread_info variables. We still need to save/restore the actual preemption count due to PREEMPT_ACTIVE so we place the per-cpu __preempt_count variable in the same cache-line as the other hot __switch_to() variables such as current_task. NOTE: this save/restore is required even for !PREEMPT kernels as cond_resched() also relies on preempt_count's PREEMPT_ACTIVE to ignore task_struct::state. Also rename thread_info::preempt_count to ensure nobody is 'accidentally' still poking at it. Suggested-by: Linus Torvalds Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-gzn5rfsf8trgjoqx8hyayy3q@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/Kbuild | 1 - arch/x86/include/asm/preempt.h | 98 ++++++++++++++++++++++++++++++++++++++ arch/x86/include/asm/thread_info.h | 5 +- arch/x86/kernel/asm-offsets.c | 1 - arch/x86/kernel/cpu/common.c | 5 ++ arch/x86/kernel/entry_32.S | 7 +-- arch/x86/kernel/entry_64.S | 4 +- arch/x86/kernel/irq_32.c | 4 -- arch/x86/kernel/process_32.c | 8 ++++ arch/x86/kernel/process_64.c | 8 ++++ 10 files changed, 124 insertions(+), 17 deletions(-) create mode 100644 arch/x86/include/asm/preempt.h (limited to 'arch') diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index eca20286a91c..7f669853317a 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -5,4 +5,3 @@ genhdr-y += unistd_64.h genhdr-y += unistd_x32.h generic-y += clkdev.h -generic-y += preempt.h diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h new file mode 100644 index 000000000000..1309942b95e5 --- /dev/null +++ b/arch/x86/include/asm/preempt.h @@ -0,0 +1,98 @@ +#ifndef __ASM_PREEMPT_H +#define __ASM_PREEMPT_H + +#include +#include +#include + +DECLARE_PER_CPU(int, __preempt_count); + +/* + * We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users + * that think a non-zero value indicates we cannot preempt. + */ +static __always_inline int preempt_count(void) +{ + return __this_cpu_read_4(__preempt_count) & ~PREEMPT_NEED_RESCHED; +} + +static __always_inline void preempt_count_set(int pc) +{ + __this_cpu_write_4(__preempt_count, pc); +} + +/* + * must be macros to avoid header recursion hell + */ +#define task_preempt_count(p) \ + (task_thread_info(p)->saved_preempt_count & ~PREEMPT_NEED_RESCHED) + +#define init_task_preempt_count(p) do { \ + task_thread_info(p)->saved_preempt_count = PREEMPT_DISABLED; \ +} while (0) + +#define init_idle_preempt_count(p, cpu) do { \ + task_thread_info(p)->saved_preempt_count = PREEMPT_ENABLED; \ + per_cpu(__preempt_count, (cpu)) = PREEMPT_ENABLED; \ +} while (0) + +/* + * We fold the NEED_RESCHED bit into the preempt count such that + * preempt_enable() can decrement and test for needing to reschedule with a + * single instruction. + * + * We invert the actual bit, so that when the decrement hits 0 we know we both + * need to resched (the bit is cleared) and can resched (no preempt count). + */ + +static __always_inline void set_preempt_need_resched(void) +{ + __this_cpu_and_4(__preempt_count, ~PREEMPT_NEED_RESCHED); +} + +static __always_inline void clear_preempt_need_resched(void) +{ + __this_cpu_or_4(__preempt_count, PREEMPT_NEED_RESCHED); +} + +static __always_inline bool test_preempt_need_resched(void) +{ + return !(__this_cpu_read_4(__preempt_count) & PREEMPT_NEED_RESCHED); +} + +/* + * The various preempt_count add/sub methods + */ + +static __always_inline void __preempt_count_add(int val) +{ + __this_cpu_add_4(__preempt_count, val); +} + +static __always_inline void __preempt_count_sub(int val) +{ + __this_cpu_add_4(__preempt_count, -val); +} + +static __always_inline bool __preempt_count_dec_and_test(void) +{ + GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), "e"); +} + +/* + * Returns true when we need to resched -- even if we can not. + */ +static __always_inline bool need_resched(void) +{ + return unlikely(test_preempt_need_resched()); +} + +/* + * Returns true when we need to resched and can (barring IRQ state). + */ +static __always_inline bool should_resched(void) +{ + return unlikely(!__this_cpu_read_4(__preempt_count)); +} + +#endif /* __ASM_PREEMPT_H */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 27811190cbd7..c46a46be1ec6 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -28,8 +28,7 @@ struct thread_info { __u32 flags; /* low level flags */ __u32 status; /* thread synchronous flags */ __u32 cpu; /* current CPU */ - int preempt_count; /* 0 => preemptable, - <0 => BUG */ + int saved_preempt_count; mm_segment_t addr_limit; struct restart_block restart_block; void __user *sysenter_return; @@ -49,7 +48,7 @@ struct thread_info { .exec_domain = &default_exec_domain, \ .flags = 0, \ .cpu = 0, \ - .preempt_count = INIT_PREEMPT_COUNT, \ + .saved_preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ .restart_block = { \ .fn = do_no_restart_syscall, \ diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 28610822fb3c..9f6b9341950f 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -32,7 +32,6 @@ void common(void) { OFFSET(TI_flags, thread_info, flags); OFFSET(TI_status, thread_info, status); OFFSET(TI_addr_limit, thread_info, addr_limit); - OFFSET(TI_preempt_count, thread_info, preempt_count); BLANK(); OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 2793d1f095a2..5223fe6dec7b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1095,6 +1095,9 @@ DEFINE_PER_CPU(char *, irq_stack_ptr) = DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1; +DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; +EXPORT_PER_CPU_SYMBOL(__preempt_count); + DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); /* @@ -1169,6 +1172,8 @@ void debug_stack_reset(void) DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; EXPORT_PER_CPU_SYMBOL(current_task); +DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; +EXPORT_PER_CPU_SYMBOL(__preempt_count); DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); #ifdef CONFIG_CC_STACKPROTECTOR diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index f0dcb0ceb6a2..fd1bc1b15e6d 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -362,12 +362,9 @@ END(ret_from_exception) #ifdef CONFIG_PREEMPT ENTRY(resume_kernel) DISABLE_INTERRUPTS(CLBR_ANY) - cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? - jnz restore_all need_resched: - movl TI_flags(%ebp), %ecx # need_resched set ? - testb $_TIF_NEED_RESCHED, %cl - jz restore_all + cmpl $0,PER_CPU_VAR(__preempt_count) + jnz restore_all testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ? jz restore_all call preempt_schedule_irq diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 1b69951a81e2..6a43e7d29fe7 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1118,10 +1118,8 @@ retint_signal: /* Returning to kernel space. Check if we need preemption */ /* rcx: threadinfo. interrupts off. */ ENTRY(retint_kernel) - cmpl $0,TI_preempt_count(%rcx) + cmpl $0,PER_CPU_VAR(__preempt_count) jnz retint_restore_args - bt $TIF_NEED_RESCHED,TI_flags(%rcx) - jnc retint_restore_args bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ jnc retint_restore_args call preempt_schedule_irq diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 4186755f1d7c..3fe066359ac0 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -100,9 +100,6 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) irqctx->tinfo.task = curctx->tinfo.task; irqctx->tinfo.previous_esp = current_stack_pointer; - /* Copy the preempt_count so that the [soft]irq checks work. */ - irqctx->tinfo.preempt_count = curctx->tinfo.preempt_count; - if (unlikely(overflow)) call_on_stack(print_stack_overflow, isp); @@ -131,7 +128,6 @@ void irq_ctx_init(int cpu) THREAD_SIZE_ORDER)); memset(&irqctx->tinfo, 0, sizeof(struct thread_info)); irqctx->tinfo.cpu = cpu; - irqctx->tinfo.preempt_count = HARDIRQ_OFFSET; irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); per_cpu(hardirq_ctx, cpu) = irqctx; diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 884f98f69354..c2ec1aa6d454 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -291,6 +291,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl)) set_iopl_mask(next->iopl); + /* + * If it were not for PREEMPT_ACTIVE we could guarantee that the + * preempt_count of all tasks was equal here and this would not be + * needed. + */ + task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count); + this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count); + /* * Now maybe handle debug registers and/or IO bitmaps */ diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index bb1dc51bab05..45ab4d6fc8a7 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -363,6 +363,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) this_cpu_write(old_rsp, next->usersp); this_cpu_write(current_task, next_p); + /* + * If it were not for PREEMPT_ACTIVE we could guarantee that the + * preempt_count of all tasks was equal here and this would not be + * needed. + */ + task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count); + this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count); + this_cpu_write(kernel_stack, (unsigned long)task_stack_page(next_p) + THREAD_SIZE - KERNEL_STACK_OFFSET); -- cgit v1.2.3 From 1a338ac32ca630f67df25b4a16436cccc314e997 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 14 Aug 2013 14:51:00 +0200 Subject: sched, x86: Optimize the preempt_schedule() call Remove the bloat of the C calling convention out of the preempt_enable() sites by creating an ASM wrapper which allows us to do an asm("call ___preempt_schedule") instead. calling.h bits by Andi Kleen Suggested-by: Linus Torvalds Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-tk7xdi1cvvxewixzke8t8le1@git.kernel.org [ Fixed build error. ] Signed-off-by: Ingo Molnar --- arch/x86/include/asm/calling.h | 50 ++++++++++++++++++++++++++++++++++++++++ arch/x86/include/asm/preempt.h | 10 ++++++++ arch/x86/kernel/Makefile | 2 ++ arch/x86/kernel/i386_ksyms_32.c | 7 ++++++ arch/x86/kernel/preempt.S | 25 ++++++++++++++++++++ arch/x86/kernel/x8664_ksyms_64.c | 7 ++++++ include/asm-generic/preempt.h | 10 ++++++++ include/linux/preempt.h | 13 ++++------- 8 files changed, 116 insertions(+), 8 deletions(-) create mode 100644 arch/x86/kernel/preempt.S (limited to 'arch') diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h index 0fa675033912..cb4c73bfeb48 100644 --- a/arch/x86/include/asm/calling.h +++ b/arch/x86/include/asm/calling.h @@ -48,6 +48,8 @@ For 32-bit we have the following conventions - kernel is built with #include +#ifdef CONFIG_X86_64 + /* * 64-bit system call stack frame layout defines and helpers, * for assembly code: @@ -192,3 +194,51 @@ For 32-bit we have the following conventions - kernel is built with .macro icebp .byte 0xf1 .endm + +#else /* CONFIG_X86_64 */ + +/* + * For 32bit only simplified versions of SAVE_ALL/RESTORE_ALL. These + * are different from the entry_32.S versions in not changing the segment + * registers. So only suitable for in kernel use, not when transitioning + * from or to user space. The resulting stack frame is not a standard + * pt_regs frame. The main use case is calling C code from assembler + * when all the registers need to be preserved. + */ + + .macro SAVE_ALL + pushl_cfi %eax + CFI_REL_OFFSET eax, 0 + pushl_cfi %ebp + CFI_REL_OFFSET ebp, 0 + pushl_cfi %edi + CFI_REL_OFFSET edi, 0 + pushl_cfi %esi + CFI_REL_OFFSET esi, 0 + pushl_cfi %edx + CFI_REL_OFFSET edx, 0 + pushl_cfi %ecx + CFI_REL_OFFSET ecx, 0 + pushl_cfi %ebx + CFI_REL_OFFSET ebx, 0 + .endm + + .macro RESTORE_ALL + popl_cfi %ebx + CFI_RESTORE ebx + popl_cfi %ecx + CFI_RESTORE ecx + popl_cfi %edx + CFI_RESTORE edx + popl_cfi %esi + CFI_RESTORE esi + popl_cfi %edi + CFI_RESTORE edi + popl_cfi %ebp + CFI_RESTORE ebp + popl_cfi %eax + CFI_RESTORE eax + .endm + +#endif /* CONFIG_X86_64 */ + diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index 1309942b95e5..1de41690ff99 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -95,4 +95,14 @@ static __always_inline bool should_resched(void) return unlikely(!__this_cpu_read_4(__preempt_count)); } +#ifdef CONFIG_PREEMPT + extern asmlinkage void ___preempt_schedule(void); +# define __preempt_schedule() asm ("call ___preempt_schedule") + extern asmlinkage void preempt_schedule(void); +# ifdef CONFIG_CONTEXT_TRACKING + extern asmlinkage void ___preempt_schedule_context(void); +# define __preempt_schedule_context() asm ("call ___preempt_schedule_context") +# endif +#endif + #endif /* __ASM_PREEMPT_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index a5408b965c9d..9b0a34e2cd79 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -36,6 +36,8 @@ obj-y += tsc.o io_delay.o rtc.o obj-y += pci-iommu_table.o obj-y += resource.o +obj-$(CONFIG_PREEMPT) += preempt.o + obj-y += process.o obj-y += i387.o xsave.o obj-y += ptrace.o diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c index 0fa69127209a..05fd74f537d6 100644 --- a/arch/x86/kernel/i386_ksyms_32.c +++ b/arch/x86/kernel/i386_ksyms_32.c @@ -37,3 +37,10 @@ EXPORT_SYMBOL(strstr); EXPORT_SYMBOL(csum_partial); EXPORT_SYMBOL(empty_zero_page); + +#ifdef CONFIG_PREEMPT +EXPORT_SYMBOL(___preempt_schedule); +#ifdef CONFIG_CONTEXT_TRACKING +EXPORT_SYMBOL(___preempt_schedule_context); +#endif +#endif diff --git a/arch/x86/kernel/preempt.S b/arch/x86/kernel/preempt.S new file mode 100644 index 000000000000..ca7f0d58a87d --- /dev/null +++ b/arch/x86/kernel/preempt.S @@ -0,0 +1,25 @@ + +#include +#include +#include +#include + +ENTRY(___preempt_schedule) + CFI_STARTPROC + SAVE_ALL + call preempt_schedule + RESTORE_ALL + ret + CFI_ENDPROC + +#ifdef CONFIG_CONTEXT_TRACKING + +ENTRY(___preempt_schedule_context) + CFI_STARTPROC + SAVE_ALL + call preempt_schedule_context + RESTORE_ALL + ret + CFI_ENDPROC + +#endif diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index b014d9414d08..040681928e9d 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -66,3 +66,10 @@ EXPORT_SYMBOL(empty_zero_page); #ifndef CONFIG_PARAVIRT EXPORT_SYMBOL(native_load_gs_index); #endif + +#ifdef CONFIG_PREEMPT +EXPORT_SYMBOL(___preempt_schedule); +#ifdef CONFIG_CONTEXT_TRACKING +EXPORT_SYMBOL(___preempt_schedule_context); +#endif +#endif diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h index 82d958fc3823..5dc14ed3791c 100644 --- a/include/asm-generic/preempt.h +++ b/include/asm-generic/preempt.h @@ -100,4 +100,14 @@ static __always_inline bool should_resched(void) return unlikely(!*preempt_count_ptr()); } +#ifdef CONFIG_PREEMPT +extern asmlinkage void preempt_schedule(void); +#define __preempt_schedule() preempt_schedule() + +#ifdef CONFIG_CONTEXT_TRACKING +extern asmlinkage void preempt_schedule_context(void); +#define __preempt_schedule_context() preempt_schedule_context() +#endif +#endif /* CONFIG_PREEMPT */ + #endif /* __ASM_PREEMPT_H */ diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 2343d8715299..a3d9dc8c2c00 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -50,18 +50,17 @@ do { \ #define preempt_enable_no_resched() sched_preempt_enable_no_resched() #ifdef CONFIG_PREEMPT -asmlinkage void preempt_schedule(void); #define preempt_enable() \ do { \ barrier(); \ if (unlikely(preempt_count_dec_and_test())) \ - preempt_schedule(); \ + __preempt_schedule(); \ } while (0) #define preempt_check_resched() \ do { \ if (should_resched()) \ - preempt_schedule(); \ + __preempt_schedule(); \ } while (0) #else @@ -83,17 +82,15 @@ do { \ #ifdef CONFIG_PREEMPT -#ifdef CONFIG_CONTEXT_TRACKING -asmlinkage void preempt_schedule_context(void); -#else -#define preempt_schedule_context() preempt_schedule() +#ifndef CONFIG_CONTEXT_TRACKING +#define __preempt_schedule_context() __preempt_schedule() #endif #define preempt_enable_notrace() \ do { \ barrier(); \ if (unlikely(__preempt_count_dec_and_test())) \ - preempt_schedule_context(); \ + __preempt_schedule_context(); \ } while (0) #else #define preempt_enable_notrace() preempt_enable_no_resched_notrace() -- cgit v1.2.3 From 75f93fed50c2abadbab6ef546b265f51ca975b27 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 27 Sep 2013 17:30:03 +0200 Subject: sched: Revert need_resched() to look at TIF_NEED_RESCHED Yuanhan reported a serious throughput regression in his pigz benchmark. Using the ftrace patch I found that several idle paths need more TLC before we can switch the generic need_resched() over to preempt_need_resched. The preemption paths benefit most from preempt_need_resched and do indeed use it; all other need_resched() users don't really care that much so reverting need_resched() back to tif_need_resched() is the simple and safe solution. Reported-by: Yuanhan Liu Signed-off-by: Peter Zijlstra Cc: Fengguang Wu Cc: Huang Ying Cc: lkp@linux.intel.com Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20130927153003.GF15690@laptop.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- arch/x86/include/asm/preempt.h | 8 -------- include/asm-generic/preempt.h | 8 -------- include/linux/sched.h | 5 +++++ 3 files changed, 5 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index 1de41690ff99..8729723636fd 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -79,14 +79,6 @@ static __always_inline bool __preempt_count_dec_and_test(void) GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), "e"); } -/* - * Returns true when we need to resched -- even if we can not. - */ -static __always_inline bool need_resched(void) -{ - return unlikely(test_preempt_need_resched()); -} - /* * Returns true when we need to resched and can (barring IRQ state). */ diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h index 5dc14ed3791c..ddf2b420ac8f 100644 --- a/include/asm-generic/preempt.h +++ b/include/asm-generic/preempt.h @@ -84,14 +84,6 @@ static __always_inline bool __preempt_count_dec_and_test(void) return !--*preempt_count_ptr(); } -/* - * Returns true when we need to resched -- even if we can not. - */ -static __always_inline bool need_resched(void) -{ - return unlikely(test_preempt_need_resched()); -} - /* * Returns true when we need to resched and can (barring IRQ state). */ diff --git a/include/linux/sched.h b/include/linux/sched.h index b09798b672f3..2ac5285db434 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2577,6 +2577,11 @@ static inline bool __must_check current_clr_polling_and_test(void) } #endif +static __always_inline bool need_resched(void) +{ + return unlikely(tif_need_resched()); +} + /* * Thread group CPU time accounting. */ -- cgit v1.2.3 From 35a2af94c7ce7130ca292c68b1d27fcfdb648f6b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 2 Oct 2013 11:22:33 +0200 Subject: sched/wait: Make the __wait_event*() interface more friendly Change all __wait_event*() implementations to match the corresponding wait_event*() signature for convenience. In particular this does away with the weird 'ret' logic. Since there are __wait_event*() users this requires we update them too. Reviewed-by: Oleg Nesterov Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20131002092529.042563462@infradead.org Signed-off-by: Ingo Molnar --- arch/mips/kernel/rtlx.c | 19 ++++--- include/linux/tty.h | 10 ++-- include/linux/wait.h | 113 +++++++++++++++++++--------------------- net/irda/af_irda.c | 5 +- net/netfilter/ipvs/ip_vs_sync.c | 7 +-- 5 files changed, 73 insertions(+), 81 deletions(-) (limited to 'arch') diff --git a/arch/mips/kernel/rtlx.c b/arch/mips/kernel/rtlx.c index d763f11e35e2..2c12ea1668d1 100644 --- a/arch/mips/kernel/rtlx.c +++ b/arch/mips/kernel/rtlx.c @@ -172,8 +172,9 @@ int rtlx_open(int index, int can_sleep) if (rtlx == NULL) { if( (p = vpe_get_shared(tclimit)) == NULL) { if (can_sleep) { - __wait_event_interruptible(channel_wqs[index].lx_queue, - (p = vpe_get_shared(tclimit)), ret); + ret = __wait_event_interruptible( + channel_wqs[index].lx_queue, + (p = vpe_get_shared(tclimit))); if (ret) goto out_fail; } else { @@ -263,11 +264,10 @@ unsigned int rtlx_read_poll(int index, int can_sleep) /* data available to read? */ if (chan->lx_read == chan->lx_write) { if (can_sleep) { - int ret = 0; - - __wait_event_interruptible(channel_wqs[index].lx_queue, + int ret = __wait_event_interruptible( + channel_wqs[index].lx_queue, (chan->lx_read != chan->lx_write) || - sp_stopping, ret); + sp_stopping); if (ret) return ret; @@ -440,14 +440,13 @@ static ssize_t file_write(struct file *file, const char __user * buffer, /* any space left... */ if (!rtlx_write_poll(minor)) { - int ret = 0; + int ret; if (file->f_flags & O_NONBLOCK) return -EAGAIN; - __wait_event_interruptible(channel_wqs[minor].rt_queue, - rtlx_write_poll(minor), - ret); + ret = __wait_event_interruptible(channel_wqs[minor].rt_queue, + rtlx_write_poll(minor)); if (ret) return ret; } diff --git a/include/linux/tty.h b/include/linux/tty.h index 6e803291028f..633cac77f9f9 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -672,14 +672,14 @@ static inline void tty_wait_until_sent_from_close(struct tty_struct *tty, #define wait_event_interruptible_tty(tty, wq, condition) \ ({ \ int __ret = 0; \ - if (!(condition)) { \ - __wait_event_interruptible_tty(tty, wq, condition, __ret); \ - } \ + if (!(condition)) \ + __ret = __wait_event_interruptible_tty(tty, wq, \ + condition); \ __ret; \ }) -#define __wait_event_interruptible_tty(tty, wq, condition, ret) \ - ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, ret, \ +#define __wait_event_interruptible_tty(tty, wq, condition) \ + ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0, \ tty_unlock(tty); \ schedule(); \ tty_lock(tty)) diff --git a/include/linux/wait.h b/include/linux/wait.h index c065e8af9749..bd4bd7b479b6 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -179,24 +179,23 @@ wait_queue_head_t *bit_waitqueue(void *, int); #define wake_up_interruptible_sync_poll(x, m) \ __wake_up_sync_key((x), TASK_INTERRUPTIBLE, 1, (void *) (m)) -#define ___wait_cond_timeout(condition, ret) \ +#define ___wait_cond_timeout(condition) \ ({ \ bool __cond = (condition); \ - if (__cond && !ret) \ - ret = 1; \ - __cond || !ret; \ + if (__cond && !__ret) \ + __ret = 1; \ + __cond || !__ret; \ }) #define ___wait_signal_pending(state) \ ((state == TASK_INTERRUPTIBLE && signal_pending(current)) || \ (state == TASK_KILLABLE && fatal_signal_pending(current))) -#define ___wait_nop_ret int ret __always_unused - #define ___wait_event(wq, condition, state, exclusive, ret, cmd) \ -do { \ +({ \ __label__ __out; \ DEFINE_WAIT(__wait); \ + long __ret = ret; \ \ for (;;) { \ if (exclusive) \ @@ -208,7 +207,7 @@ do { \ break; \ \ if (___wait_signal_pending(state)) { \ - ret = -ERESTARTSYS; \ + __ret = -ERESTARTSYS; \ if (exclusive) { \ abort_exclusive_wait(&wq, &__wait, \ state, NULL); \ @@ -220,12 +219,12 @@ do { \ cmd; \ } \ finish_wait(&wq, &__wait); \ -__out: ; \ -} while (0) +__out: __ret; \ +}) #define __wait_event(wq, condition) \ - ___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, \ - ___wait_nop_ret, schedule()) + (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ + schedule()) /** * wait_event - sleep until a condition gets true @@ -246,10 +245,10 @@ do { \ __wait_event(wq, condition); \ } while (0) -#define __wait_event_timeout(wq, condition, ret) \ - ___wait_event(wq, ___wait_cond_timeout(condition, ret), \ - TASK_UNINTERRUPTIBLE, 0, ret, \ - ret = schedule_timeout(ret)) +#define __wait_event_timeout(wq, condition, timeout) \ + ___wait_event(wq, ___wait_cond_timeout(condition), \ + TASK_UNINTERRUPTIBLE, 0, timeout, \ + __ret = schedule_timeout(__ret)) /** * wait_event_timeout - sleep until a condition gets true or a timeout elapses @@ -272,12 +271,12 @@ do { \ ({ \ long __ret = timeout; \ if (!(condition)) \ - __wait_event_timeout(wq, condition, __ret); \ + __ret = __wait_event_timeout(wq, condition, timeout); \ __ret; \ }) -#define __wait_event_interruptible(wq, condition, ret) \ - ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, ret, \ +#define __wait_event_interruptible(wq, condition) \ + ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0, \ schedule()) /** @@ -299,14 +298,14 @@ do { \ ({ \ int __ret = 0; \ if (!(condition)) \ - __wait_event_interruptible(wq, condition, __ret); \ + __ret = __wait_event_interruptible(wq, condition); \ __ret; \ }) -#define __wait_event_interruptible_timeout(wq, condition, ret) \ - ___wait_event(wq, ___wait_cond_timeout(condition, ret), \ - TASK_INTERRUPTIBLE, 0, ret, \ - ret = schedule_timeout(ret)) +#define __wait_event_interruptible_timeout(wq, condition, timeout) \ + ___wait_event(wq, ___wait_cond_timeout(condition), \ + TASK_INTERRUPTIBLE, 0, timeout, \ + __ret = schedule_timeout(__ret)) /** * wait_event_interruptible_timeout - sleep until a condition gets true or a timeout elapses @@ -330,7 +329,8 @@ do { \ ({ \ long __ret = timeout; \ if (!(condition)) \ - __wait_event_interruptible_timeout(wq, condition, __ret); \ + __ret = __wait_event_interruptible_timeout(wq, \ + condition, timeout); \ __ret; \ }) @@ -347,7 +347,7 @@ do { \ current->timer_slack_ns, \ HRTIMER_MODE_REL); \ \ - ___wait_event(wq, condition, state, 0, __ret, \ + __ret = ___wait_event(wq, condition, state, 0, 0, \ if (!__t.task) { \ __ret = -ETIME; \ break; \ @@ -409,15 +409,15 @@ do { \ __ret; \ }) -#define __wait_event_interruptible_exclusive(wq, condition, ret) \ - ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, ret, \ +#define __wait_event_interruptible_exclusive(wq, condition) \ + ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0, \ schedule()) #define wait_event_interruptible_exclusive(wq, condition) \ ({ \ int __ret = 0; \ if (!(condition)) \ - __wait_event_interruptible_exclusive(wq, condition, __ret);\ + __ret = __wait_event_interruptible_exclusive(wq, condition);\ __ret; \ }) @@ -570,8 +570,8 @@ do { \ -#define __wait_event_killable(wq, condition, ret) \ - ___wait_event(wq, condition, TASK_KILLABLE, 0, ret, schedule()) +#define __wait_event_killable(wq, condition) \ + ___wait_event(wq, condition, TASK_KILLABLE, 0, 0, schedule()) /** * wait_event_killable - sleep until a condition gets true @@ -592,18 +592,17 @@ do { \ ({ \ int __ret = 0; \ if (!(condition)) \ - __wait_event_killable(wq, condition, __ret); \ + __ret = __wait_event_killable(wq, condition); \ __ret; \ }) #define __wait_event_lock_irq(wq, condition, lock, cmd) \ - ___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, \ - ___wait_nop_ret, \ - spin_unlock_irq(&lock); \ - cmd; \ - schedule(); \ - spin_lock_irq(&lock)) + (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ + spin_unlock_irq(&lock); \ + cmd; \ + schedule(); \ + spin_lock_irq(&lock)) /** * wait_event_lock_irq_cmd - sleep until a condition gets true. The @@ -663,11 +662,11 @@ do { \ } while (0) -#define __wait_event_interruptible_lock_irq(wq, condition, lock, ret, cmd) \ - ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, ret, \ - spin_unlock_irq(&lock); \ - cmd; \ - schedule(); \ +#define __wait_event_interruptible_lock_irq(wq, condition, lock, cmd) \ + ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0, \ + spin_unlock_irq(&lock); \ + cmd; \ + schedule(); \ spin_lock_irq(&lock)) /** @@ -698,10 +697,9 @@ do { \ #define wait_event_interruptible_lock_irq_cmd(wq, condition, lock, cmd) \ ({ \ int __ret = 0; \ - \ if (!(condition)) \ - __wait_event_interruptible_lock_irq(wq, condition, \ - lock, __ret, cmd); \ + __ret = __wait_event_interruptible_lock_irq(wq, \ + condition, lock, cmd); \ __ret; \ }) @@ -730,18 +728,18 @@ do { \ #define wait_event_interruptible_lock_irq(wq, condition, lock) \ ({ \ int __ret = 0; \ - \ if (!(condition)) \ - __wait_event_interruptible_lock_irq(wq, condition, \ - lock, __ret, ); \ + __ret = __wait_event_interruptible_lock_irq(wq, \ + condition, lock,) \ __ret; \ }) -#define __wait_event_interruptible_lock_irq_timeout(wq, condition, lock, ret) \ - ___wait_event(wq, ___wait_cond_timeout(condition, ret), \ - TASK_INTERRUPTIBLE, 0, ret, \ - spin_unlock_irq(&lock); \ - ret = schedule_timeout(ret); \ +#define __wait_event_interruptible_lock_irq_timeout(wq, condition, \ + lock, timeout) \ + ___wait_event(wq, ___wait_cond_timeout(condition), \ + TASK_INTERRUPTIBLE, 0, ret, \ + spin_unlock_irq(&lock); \ + __ret = schedule_timeout(__ret); \ spin_lock_irq(&lock)); /** @@ -771,11 +769,10 @@ do { \ #define wait_event_interruptible_lock_irq_timeout(wq, condition, lock, \ timeout) \ ({ \ - int __ret = timeout; \ - \ + long __ret = timeout; \ if (!(condition)) \ - __wait_event_interruptible_lock_irq_timeout( \ - wq, condition, lock, __ret); \ + __ret = __wait_event_interruptible_lock_irq_timeout( \ + wq, condition, lock, timeout); \ __ret; \ }) diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 0578d4fa00a9..0f676908d15b 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -2563,9 +2563,8 @@ bed: jiffies + msecs_to_jiffies(val)); /* Wait for IR-LMP to call us back */ - __wait_event_interruptible(self->query_wait, - (self->cachedaddr != 0 || self->errno == -ETIME), - err); + err = __wait_event_interruptible(self->query_wait, + (self->cachedaddr != 0 || self->errno == -ETIME)); /* If watchdog is still activated, kill it! */ del_timer(&(self->watchdog)); diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index f4484719f3e6..f63c2388f38d 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1637,12 +1637,9 @@ static int sync_thread_master(void *data) continue; } while (ip_vs_send_sync_msg(tinfo->sock, sb->mesg) < 0) { - int ret = 0; - - __wait_event_interruptible(*sk_sleep(sk), + int ret = __wait_event_interruptible(*sk_sleep(sk), sock_writeable(sk) || - kthread_should_stop(), - ret); + kthread_should_stop()); if (unlikely(kthread_should_stop())) goto done; } -- cgit v1.2.3 From 88f182dd779b9d350b4774c12d16633a5b60f50c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 10 Oct 2013 10:16:30 +0200 Subject: x86: Apply the asm_volatile_goto() compiler quirk Apply the asm_volatile_goto() compiler quirk to the new rmwcc.h file as well, introduced in: c2daa3bed53a sched, x86: Provide a per-cpu preempt_count implementation Reported-and-tested-by: Fengguang Wu Reported-by: Oleg Nesterov Reported-by: Peter Zijlstra Suggested-by: Jakub Jelinek Reviewed-by: Richard Henderson Cc: Linus Torvalds Cc: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/include/asm/rmwcc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h index 735f1849795f..1ff990f1de8e 100644 --- a/arch/x86/include/asm/rmwcc.h +++ b/arch/x86/include/asm/rmwcc.h @@ -5,7 +5,7 @@ #define __GEN_RMWcc(fullop, var, cc, ...) \ do { \ - asm volatile goto (fullop "; j" cc " %l[cc_label]" \ + asm_volatile_goto (fullop "; j" cc " %l[cc_label]" \ : : "m" (var), ## __VA_ARGS__ \ : "memory" : cc_label); \ return 0; \ -- cgit v1.2.3