From b76834bc1b6db0a0923eed85c81b1113021b0612 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 6 Dec 2010 11:16:25 -0600 Subject: kprobes: Use this_cpu_ops Use this_cpu ops in various places to optimize per cpu data access. Cc: Jason Baron Cc: Namhyung Kim Acked-by: H. Peter Anvin Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- include/linux/kprobes.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index e7d1b2e0070d..0c251e9f0507 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -303,12 +303,12 @@ struct hlist_head * kretprobe_inst_table_head(struct task_struct *tsk); /* kprobe_running() will just return the current_kprobe on this CPU */ static inline struct kprobe *kprobe_running(void) { - return (__get_cpu_var(current_kprobe)); + return (__this_cpu_read(current_kprobe)); } static inline void reset_current_kprobe(void) { - __get_cpu_var(current_kprobe) = NULL; + __this_cpu_write(current_kprobe, NULL); } static inline struct kprobe_ctlblk *get_kprobe_ctlblk(void) -- cgit v1.2.3 From 909ea96468096b07fbb41aaf69be060d92bd9271 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 8 Dec 2010 16:22:55 +0100 Subject: core: Replace __get_cpu_var with __this_cpu_read if not used for an address. __get_cpu_var() can be replaced with this_cpu_read and will then use a single read instruction with implied address calculation to access the correct per cpu instance. However, the address of a per cpu variable passed to __this_cpu_read() cannot be determined (since it's an implied address conversion through segment prefixes). Therefore apply this only to uses of __get_cpu_var where the address of the variable is not used. Cc: Pekka Enberg Cc: Hugh Dickins Cc: Thomas Gleixner Acked-by: H. Peter Anvin Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- include/asm-generic/irq_regs.h | 8 ++++---- include/linux/elevator.h | 12 +++--------- include/linux/kernel_stat.h | 2 +- kernel/exit.c | 2 +- kernel/fork.c | 2 +- kernel/hrtimer.c | 2 +- kernel/printk.c | 4 ++-- kernel/rcutree.c | 4 ++-- kernel/softirq.c | 42 +++++++++++++++++++++--------------------- kernel/time/tick-common.c | 2 +- kernel/time/tick-oneshot.c | 4 ++-- kernel/watchdog.c | 36 ++++++++++++++++++------------------ mm/slab.c | 6 +++--- 13 files changed, 60 insertions(+), 66 deletions(-) (limited to 'include/linux') diff --git a/include/asm-generic/irq_regs.h b/include/asm-generic/irq_regs.h index 5ae1d07d4a12..6bf9355fa7eb 100644 --- a/include/asm-generic/irq_regs.h +++ b/include/asm-generic/irq_regs.h @@ -22,15 +22,15 @@ DECLARE_PER_CPU(struct pt_regs *, __irq_regs); static inline struct pt_regs *get_irq_regs(void) { - return __get_cpu_var(__irq_regs); + return __this_cpu_read(__irq_regs); } static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) { - struct pt_regs *old_regs, **pp_regs = &__get_cpu_var(__irq_regs); + struct pt_regs *old_regs; - old_regs = *pp_regs; - *pp_regs = new_regs; + old_regs = __this_cpu_read(__irq_regs); + __this_cpu_write(__irq_regs, new_regs); return old_regs; } diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 4fd978e7eb83..4d857973d2c9 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -195,15 +195,9 @@ enum { /* * io context count accounting */ -#define elv_ioc_count_mod(name, __val) \ - do { \ - preempt_disable(); \ - __get_cpu_var(name) += (__val); \ - preempt_enable(); \ - } while (0) - -#define elv_ioc_count_inc(name) elv_ioc_count_mod(name, 1) -#define elv_ioc_count_dec(name) elv_ioc_count_mod(name, -1) +#define elv_ioc_count_mod(name, __val) this_cpu_add(name, __val) +#define elv_ioc_count_inc(name) this_cpu_inc(name) +#define elv_ioc_count_dec(name) this_cpu_dec(name) #define elv_ioc_count_read(name) \ ({ \ diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index ad54c846911b..44e83ba12b5b 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -47,7 +47,7 @@ extern unsigned long long nr_context_switches(void); #ifndef CONFIG_GENERIC_HARDIRQS #define kstat_irqs_this_cpu(irq) \ - (kstat_this_cpu.irqs[irq]) + (this_cpu_read(kstat.irqs[irq]) struct irq_desc; diff --git a/kernel/exit.c b/kernel/exit.c index 676149a4ac5f..89c74861a3da 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -69,7 +69,7 @@ static void __unhash_process(struct task_struct *p, bool group_dead) list_del_rcu(&p->tasks); list_del_init(&p->sibling); - __get_cpu_var(process_counts)--; + __this_cpu_dec(process_counts); } list_del_rcu(&p->thread_group); } diff --git a/kernel/fork.c b/kernel/fork.c index 3b159c5991b7..e05e27de67df 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1282,7 +1282,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, attach_pid(p, PIDTYPE_SID, task_session(current)); list_add_tail(&p->sibling, &p->real_parent->children); list_add_tail_rcu(&p->tasks, &init_task.tasks); - __get_cpu_var(process_counts)++; + __this_cpu_inc(process_counts); } attach_pid(p, PIDTYPE_PID, pid); nr_threads++; diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 72206cf5c6cf..29de5ae4ca95 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -497,7 +497,7 @@ static inline int hrtimer_is_hres_enabled(void) */ static inline int hrtimer_hres_active(void) { - return __get_cpu_var(hrtimer_bases).hres_active; + return __this_cpu_read(hrtimer_bases.hres_active); } /* diff --git a/kernel/printk.c b/kernel/printk.c index 9a2264fc42ca..b032317f9964 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -1074,8 +1074,8 @@ static DEFINE_PER_CPU(int, printk_pending); void printk_tick(void) { - if (__get_cpu_var(printk_pending)) { - __get_cpu_var(printk_pending) = 0; + if (__this_cpu_read(printk_pending)) { + __this_cpu_write(printk_pending, 0); wake_up_interruptible(&log_wait); } } diff --git a/kernel/rcutree.c b/kernel/rcutree.c index ccdc04c47981..aeebf772d6a2 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -367,8 +367,8 @@ void rcu_irq_exit(void) WARN_ON_ONCE(rdtp->dynticks & 0x1); /* If the interrupt queued a callback, get out of dyntick mode. */ - if (__get_cpu_var(rcu_sched_data).nxtlist || - __get_cpu_var(rcu_bh_data).nxtlist) + if (__this_cpu_read(rcu_sched_data.nxtlist) || + __this_cpu_read(rcu_bh_data.nxtlist)) set_need_resched(); } diff --git a/kernel/softirq.c b/kernel/softirq.c index 18f4be0d5fe0..d0a0dda52c1a 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -70,7 +70,7 @@ char *softirq_to_name[NR_SOFTIRQS] = { static void wakeup_softirqd(void) { /* Interrupts are disabled: no need to stop preemption */ - struct task_struct *tsk = __get_cpu_var(ksoftirqd); + struct task_struct *tsk = __this_cpu_read(ksoftirqd); if (tsk && tsk->state != TASK_RUNNING) wake_up_process(tsk); @@ -388,8 +388,8 @@ void __tasklet_schedule(struct tasklet_struct *t) local_irq_save(flags); t->next = NULL; - *__get_cpu_var(tasklet_vec).tail = t; - __get_cpu_var(tasklet_vec).tail = &(t->next); + *__this_cpu_read(tasklet_vec.tail) = t; + __this_cpu_write(tasklet_vec.tail, &(t->next)); raise_softirq_irqoff(TASKLET_SOFTIRQ); local_irq_restore(flags); } @@ -402,8 +402,8 @@ void __tasklet_hi_schedule(struct tasklet_struct *t) local_irq_save(flags); t->next = NULL; - *__get_cpu_var(tasklet_hi_vec).tail = t; - __get_cpu_var(tasklet_hi_vec).tail = &(t->next); + *__this_cpu_read(tasklet_hi_vec.tail) = t; + __this_cpu_write(tasklet_hi_vec.tail, &(t->next)); raise_softirq_irqoff(HI_SOFTIRQ); local_irq_restore(flags); } @@ -414,8 +414,8 @@ void __tasklet_hi_schedule_first(struct tasklet_struct *t) { BUG_ON(!irqs_disabled()); - t->next = __get_cpu_var(tasklet_hi_vec).head; - __get_cpu_var(tasklet_hi_vec).head = t; + t->next = __this_cpu_read(tasklet_hi_vec.head); + __this_cpu_write(tasklet_hi_vec.head, t); __raise_softirq_irqoff(HI_SOFTIRQ); } @@ -426,9 +426,9 @@ static void tasklet_action(struct softirq_action *a) struct tasklet_struct *list; local_irq_disable(); - list = __get_cpu_var(tasklet_vec).head; - __get_cpu_var(tasklet_vec).head = NULL; - __get_cpu_var(tasklet_vec).tail = &__get_cpu_var(tasklet_vec).head; + list = __this_cpu_read(tasklet_vec.head); + __this_cpu_write(tasklet_vec.head, NULL); + __this_cpu_write(tasklet_vec.tail, &__get_cpu_var(tasklet_vec).head); local_irq_enable(); while (list) { @@ -449,8 +449,8 @@ static void tasklet_action(struct softirq_action *a) local_irq_disable(); t->next = NULL; - *__get_cpu_var(tasklet_vec).tail = t; - __get_cpu_var(tasklet_vec).tail = &(t->next); + *__this_cpu_read(tasklet_vec.tail) = t; + __this_cpu_write(tasklet_vec.tail, &(t->next)); __raise_softirq_irqoff(TASKLET_SOFTIRQ); local_irq_enable(); } @@ -461,9 +461,9 @@ static void tasklet_hi_action(struct softirq_action *a) struct tasklet_struct *list; local_irq_disable(); - list = __get_cpu_var(tasklet_hi_vec).head; - __get_cpu_var(tasklet_hi_vec).head = NULL; - __get_cpu_var(tasklet_hi_vec).tail = &__get_cpu_var(tasklet_hi_vec).head; + list = __this_cpu_read(tasklet_hi_vec.head); + __this_cpu_write(tasklet_hi_vec.head, NULL); + __this_cpu_write(tasklet_hi_vec.tail, &__get_cpu_var(tasklet_hi_vec).head); local_irq_enable(); while (list) { @@ -484,8 +484,8 @@ static void tasklet_hi_action(struct softirq_action *a) local_irq_disable(); t->next = NULL; - *__get_cpu_var(tasklet_hi_vec).tail = t; - __get_cpu_var(tasklet_hi_vec).tail = &(t->next); + *__this_cpu_read(tasklet_hi_vec.tail) = t; + __this_cpu_write(tasklet_hi_vec.tail, &(t->next)); __raise_softirq_irqoff(HI_SOFTIRQ); local_irq_enable(); } @@ -802,16 +802,16 @@ static void takeover_tasklets(unsigned int cpu) /* Find end, append list for that CPU. */ if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) { - *(__get_cpu_var(tasklet_vec).tail) = per_cpu(tasklet_vec, cpu).head; - __get_cpu_var(tasklet_vec).tail = per_cpu(tasklet_vec, cpu).tail; + *__this_cpu_read(tasklet_vec.tail) = per_cpu(tasklet_vec, cpu).head; + this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail); per_cpu(tasklet_vec, cpu).head = NULL; per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head; } raise_softirq_irqoff(TASKLET_SOFTIRQ); if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) { - *__get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).head; - __get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).tail; + *__this_cpu_read(tasklet_hi_vec.tail) = per_cpu(tasklet_hi_vec, cpu).head; + __this_cpu_write(tasklet_hi_vec.tail, per_cpu(tasklet_hi_vec, cpu).tail); per_cpu(tasklet_hi_vec, cpu).head = NULL; per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head; } diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index b6b898d2eeef..051bc80a0c43 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -49,7 +49,7 @@ struct tick_device *tick_get_device(int cpu) */ int tick_is_oneshot_available(void) { - struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; + struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); return dev && (dev->features & CLOCK_EVT_FEAT_ONESHOT); } diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c index aada0e52680a..5cbc101f908b 100644 --- a/kernel/time/tick-oneshot.c +++ b/kernel/time/tick-oneshot.c @@ -95,7 +95,7 @@ int tick_dev_program_event(struct clock_event_device *dev, ktime_t expires, */ int tick_program_event(ktime_t expires, int force) { - struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; + struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); return tick_dev_program_event(dev, expires, force); } @@ -167,7 +167,7 @@ int tick_oneshot_mode_active(void) int ret; local_irq_save(flags); - ret = __get_cpu_var(tick_cpu_device).mode == TICKDEV_MODE_ONESHOT; + ret = __this_cpu_read(tick_cpu_device.mode) == TICKDEV_MODE_ONESHOT; local_irq_restore(flags); return ret; diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 6e3c41a4024c..8037a86106ed 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -116,12 +116,12 @@ static void __touch_watchdog(void) { int this_cpu = smp_processor_id(); - __get_cpu_var(watchdog_touch_ts) = get_timestamp(this_cpu); + __this_cpu_write(watchdog_touch_ts, get_timestamp(this_cpu)); } void touch_softlockup_watchdog(void) { - __raw_get_cpu_var(watchdog_touch_ts) = 0; + __this_cpu_write(watchdog_touch_ts, 0); } EXPORT_SYMBOL(touch_softlockup_watchdog); @@ -165,12 +165,12 @@ void touch_softlockup_watchdog_sync(void) /* watchdog detector functions */ static int is_hardlockup(void) { - unsigned long hrint = __get_cpu_var(hrtimer_interrupts); + unsigned long hrint = __this_cpu_read(hrtimer_interrupts); - if (__get_cpu_var(hrtimer_interrupts_saved) == hrint) + if (__this_cpu_read(hrtimer_interrupts_saved) == hrint) return 1; - __get_cpu_var(hrtimer_interrupts_saved) = hrint; + __this_cpu_write(hrtimer_interrupts_saved, hrint); return 0; } #endif @@ -203,8 +203,8 @@ static void watchdog_overflow_callback(struct perf_event *event, int nmi, /* Ensure the watchdog never gets throttled */ event->hw.interrupts = 0; - if (__get_cpu_var(watchdog_nmi_touch) == true) { - __get_cpu_var(watchdog_nmi_touch) = false; + if (__this_cpu_read(watchdog_nmi_touch) == true) { + __this_cpu_write(watchdog_nmi_touch, false); return; } @@ -218,7 +218,7 @@ static void watchdog_overflow_callback(struct perf_event *event, int nmi, int this_cpu = smp_processor_id(); /* only print hardlockups once */ - if (__get_cpu_var(hard_watchdog_warn) == true) + if (__this_cpu_read(hard_watchdog_warn) == true) return; if (hardlockup_panic) @@ -226,16 +226,16 @@ static void watchdog_overflow_callback(struct perf_event *event, int nmi, else WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu); - __get_cpu_var(hard_watchdog_warn) = true; + __this_cpu_write(hard_watchdog_warn, true); return; } - __get_cpu_var(hard_watchdog_warn) = false; + __this_cpu_write(hard_watchdog_warn, false); return; } static void watchdog_interrupt_count(void) { - __get_cpu_var(hrtimer_interrupts)++; + __this_cpu_inc(hrtimer_interrupts); } #else static inline void watchdog_interrupt_count(void) { return; } @@ -244,7 +244,7 @@ static inline void watchdog_interrupt_count(void) { return; } /* watchdog kicker functions */ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) { - unsigned long touch_ts = __get_cpu_var(watchdog_touch_ts); + unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts); struct pt_regs *regs = get_irq_regs(); int duration; @@ -252,18 +252,18 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) watchdog_interrupt_count(); /* kick the softlockup detector */ - wake_up_process(__get_cpu_var(softlockup_watchdog)); + wake_up_process(__this_cpu_read(softlockup_watchdog)); /* .. and repeat */ hrtimer_forward_now(hrtimer, ns_to_ktime(get_sample_period())); if (touch_ts == 0) { - if (unlikely(__get_cpu_var(softlockup_touch_sync))) { + if (unlikely(__this_cpu_read(softlockup_touch_sync))) { /* * If the time stamp was touched atomically * make sure the scheduler tick is up to date. */ - __get_cpu_var(softlockup_touch_sync) = false; + __this_cpu_write(softlockup_touch_sync, false); sched_clock_tick(); } __touch_watchdog(); @@ -279,7 +279,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) duration = is_softlockup(touch_ts); if (unlikely(duration)) { /* only warn once */ - if (__get_cpu_var(soft_watchdog_warn) == true) + if (__this_cpu_read(soft_watchdog_warn) == true) return HRTIMER_RESTART; printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n", @@ -294,9 +294,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) if (softlockup_panic) panic("softlockup: hung tasks"); - __get_cpu_var(soft_watchdog_warn) = true; + __this_cpu_write(soft_watchdog_warn, true); } else - __get_cpu_var(soft_watchdog_warn) = false; + __this_cpu_write(soft_watchdog_warn, false); return HRTIMER_RESTART; } diff --git a/mm/slab.c b/mm/slab.c index b1e40dafbab3..316d75596f3c 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -829,12 +829,12 @@ static void init_reap_node(int cpu) static void next_reap_node(void) { - int node = __get_cpu_var(slab_reap_node); + int node = __this_cpu_read(slab_reap_node); node = next_node(node, node_online_map); if (unlikely(node >= MAX_NUMNODES)) node = first_node(node_online_map); - __get_cpu_var(slab_reap_node) = node; + __this_cpu_write(slab_reap_node, node); } #else @@ -1012,7 +1012,7 @@ static void __drain_alien_cache(struct kmem_cache *cachep, */ static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3) { - int node = __get_cpu_var(slab_reap_node); + int node = __this_cpu_read(slab_reap_node); if (l3->alien) { struct array_cache *ac = l3->alien[node]; -- cgit v1.2.3 From a663ffff1d2e94a7c549a37d08ed9169ce83bdd6 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 6 Dec 2010 11:39:59 -0600 Subject: percpu: Generic support for this_cpu_add, sub, dec, inc_return Introduce generic support for this_cpu_add_return etc. The fallback is to realize these operations with simpler __this_cpu_ops. tj: - Reformatted __cpu_size_call_return2() to make it more consistent with its neighbors. - Dropped unnecessary temp variable ret__ from __this_cpu_generic_add_return(). Reviewed-by: Tejun Heo Reviewed-by: Mathieu Desnoyers Acked-by: H. Peter Anvin Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- include/linux/percpu.h | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 5095b834a6fb..4d593defc47d 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -240,6 +240,21 @@ extern void __bad_size_call_parameter(void); pscr_ret__; \ }) +#define __pcpu_size_call_return2(stem, variable, ...) \ +({ \ + typeof(variable) pscr2_ret__; \ + __verify_pcpu_ptr(&(variable)); \ + switch(sizeof(variable)) { \ + case 1: pscr2_ret__ = stem##1(variable, __VA_ARGS__); break; \ + case 2: pscr2_ret__ = stem##2(variable, __VA_ARGS__); break; \ + case 4: pscr2_ret__ = stem##4(variable, __VA_ARGS__); break; \ + case 8: pscr2_ret__ = stem##8(variable, __VA_ARGS__); break; \ + default: \ + __bad_size_call_parameter(); break; \ + } \ + pscr2_ret__; \ +}) + #define __pcpu_size_call(stem, variable, ...) \ do { \ __verify_pcpu_ptr(&(variable)); \ @@ -529,6 +544,62 @@ do { \ # define __this_cpu_xor(pcp, val) __pcpu_size_call(__this_cpu_xor_, (pcp), (val)) #endif +#define _this_cpu_generic_add_return(pcp, val) \ +({ \ + typeof(pcp) ret__; \ + preempt_disable(); \ + __this_cpu_add(pcp, val); \ + ret__ = __this_cpu_read(pcp); \ + preempt_enable(); \ + ret__; \ +}) + +#ifndef this_cpu_add_return +# ifndef this_cpu_add_return_1 +# define this_cpu_add_return_1(pcp, val) _this_cpu_generic_add_return(pcp, val) +# endif +# ifndef this_cpu_add_return_2 +# define this_cpu_add_return_2(pcp, val) _this_cpu_generic_add_return(pcp, val) +# endif +# ifndef this_cpu_add_return_4 +# define this_cpu_add_return_4(pcp, val) _this_cpu_generic_add_return(pcp, val) +# endif +# ifndef this_cpu_add_return_8 +# define this_cpu_add_return_8(pcp, val) _this_cpu_generic_add_return(pcp, val) +# endif +# define this_cpu_add_return(pcp, val) __pcpu_size_call_return2(this_cpu_add_return_, pcp, val) +#endif + +#define this_cpu_sub_return(pcp, val) this_cpu_add_return(pcp, -(val)) +#define this_cpu_inc_return(pcp) this_cpu_add_return(pcp, 1) +#define this_cpu_dec_return(pcp) this_cpu_add_return(pcp, -1) + +#define __this_cpu_generic_add_return(pcp, val) \ +({ \ + __this_cpu_add(pcp, val); \ + __this_cpu_read(pcp); \ +}) + +#ifndef __this_cpu_add_return +# ifndef __this_cpu_add_return_1 +# define __this_cpu_add_return_1(pcp, val) __this_cpu_generic_add_return(pcp, val) +# endif +# ifndef __this_cpu_add_return_2 +# define __this_cpu_add_return_2(pcp, val) __this_cpu_generic_add_return(pcp, val) +# endif +# ifndef __this_cpu_add_return_4 +# define __this_cpu_add_return_4(pcp, val) __this_cpu_generic_add_return(pcp, val) +# endif +# ifndef __this_cpu_add_return_8 +# define __this_cpu_add_return_8(pcp, val) __this_cpu_generic_add_return(pcp, val) +# endif +# define __this_cpu_add_return(pcp, val) __pcpu_size_call_return2(this_cpu_add_return_, pcp, val) +#endif + +#define __this_cpu_sub_return(pcp, val) this_cpu_add_return(pcp, -(val)) +#define __this_cpu_inc_return(pcp) this_cpu_add_return(pcp, 1) +#define __this_cpu_dec_return(pcp) this_cpu_add_return(pcp, -1) + /* * IRQ safe versions of the per cpu RMW operations. Note that these operations * are *not* safe against modification of the same variable from another -- cgit v1.2.3 From cfb824349556904b319464139be5c75fce983b0d Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 6 Dec 2010 11:40:03 -0600 Subject: highmem: Use this_cpu_xx_return() operations Use this_cpu operations to optimize access primitives for highmem. The main effect is the avoidance of address calculations through the use of a segment prefix. V3->V4 - kmap_atomic_idx: Do not return a value. - Use __this_cpu_dec without HIGHMEM_DEBUG Cc: Peter Zijlstra Cc: Catalin Marinas Acked-by: H. Peter Anvin Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- include/linux/highmem.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index b676c585574e..3a93f73a8acc 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -81,7 +81,8 @@ DECLARE_PER_CPU(int, __kmap_atomic_idx); static inline int kmap_atomic_idx_push(void) { - int idx = __get_cpu_var(__kmap_atomic_idx)++; + int idx = __this_cpu_inc_return(__kmap_atomic_idx) - 1; + #ifdef CONFIG_DEBUG_HIGHMEM WARN_ON_ONCE(in_irq() && !irqs_disabled()); BUG_ON(idx > KM_TYPE_NR); @@ -91,16 +92,18 @@ static inline int kmap_atomic_idx_push(void) static inline int kmap_atomic_idx(void) { - return __get_cpu_var(__kmap_atomic_idx) - 1; + return __this_cpu_read(__kmap_atomic_idx) - 1; } -static inline int kmap_atomic_idx_pop(void) +static inline void kmap_atomic_idx_pop(void) { - int idx = --__get_cpu_var(__kmap_atomic_idx); #ifdef CONFIG_DEBUG_HIGHMEM + int idx = __this_cpu_dec_return(__kmap_atomic_idx); + BUG_ON(idx < 0); +#else + __this_cpu_dec(__kmap_atomic_idx); #endif - return idx; } #endif -- cgit v1.2.3 From 403047754cf690b012369b8fb563b738b88086e6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 17 Dec 2010 15:47:04 +0100 Subject: percpu,x86: relocate this_cpu_add_return() and friends - include/linux/percpu.h: this_cpu_add_return() and friends were located next to __this_cpu_add_return(). However, the overall organization is to first group by preemption safeness. Relocate this_cpu_add_return() and friends to preemption-safe area. - arch/x86/include/asm/percpu.h: Relocate percpu_add_return_op() after other more basic operations. Relocate [__]this_cpu_add_return_8() so that they're first grouped by preemption safeness. Signed-off-by: Tejun Heo Cc: Christoph Lameter --- arch/x86/include/asm/percpu.h | 71 +++++++++++++++++++++---------------------- include/linux/percpu.h | 60 ++++++++++++++++++------------------ 2 files changed, 65 insertions(+), 66 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 38f9e965ff96..dd0cd4b6a76f 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -177,39 +177,6 @@ do { \ } \ } while (0) -/* - * Add return operation - */ -#define percpu_add_return_op(var, val) \ -({ \ - typeof(var) paro_ret__ = val; \ - switch (sizeof(var)) { \ - case 1: \ - asm("xaddb %0, "__percpu_arg(1) \ - : "+q" (paro_ret__), "+m" (var) \ - : : "memory"); \ - break; \ - case 2: \ - asm("xaddw %0, "__percpu_arg(1) \ - : "+r" (paro_ret__), "+m" (var) \ - : : "memory"); \ - break; \ - case 4: \ - asm("xaddl %0, "__percpu_arg(1) \ - : "+r" (paro_ret__), "+m" (var) \ - : : "memory"); \ - break; \ - case 8: \ - asm("xaddq %0, "__percpu_arg(1) \ - : "+re" (paro_ret__), "+m" (var) \ - : : "memory"); \ - break; \ - default: __bad_percpu_size(); \ - } \ - paro_ret__ += val; \ - paro_ret__; \ -}) - #define percpu_from_op(op, var, constraint) \ ({ \ typeof(var) pfo_ret__; \ @@ -262,6 +229,39 @@ do { \ } \ }) +/* + * Add return operation + */ +#define percpu_add_return_op(var, val) \ +({ \ + typeof(var) paro_ret__ = val; \ + switch (sizeof(var)) { \ + case 1: \ + asm("xaddb %0, "__percpu_arg(1) \ + : "+q" (paro_ret__), "+m" (var) \ + : : "memory"); \ + break; \ + case 2: \ + asm("xaddw %0, "__percpu_arg(1) \ + : "+r" (paro_ret__), "+m" (var) \ + : : "memory"); \ + break; \ + case 4: \ + asm("xaddl %0, "__percpu_arg(1) \ + : "+r" (paro_ret__), "+m" (var) \ + : : "memory"); \ + break; \ + case 8: \ + asm("xaddq %0, "__percpu_arg(1) \ + : "+re" (paro_ret__), "+m" (var) \ + : : "memory"); \ + break; \ + default: __bad_percpu_size(); \ + } \ + paro_ret__ += val; \ + paro_ret__; \ +}) + /* * percpu_read() makes gcc load the percpu variable every time it is * accessed while percpu_read_stable() allows the value to be cached. @@ -352,6 +352,7 @@ do { \ #define __this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) #define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) #define __this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) +#define __this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) #define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) #define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) @@ -359,14 +360,12 @@ do { \ #define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) #define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) #define this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) +#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) #define irqsafe_cpu_add_8(pcp, val) percpu_add_op((pcp), val) #define irqsafe_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) #define irqsafe_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) #define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) - -#define __this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) -#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) #endif /* This is not atomic against other CPUs -- CPU preemption needs to be off */ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 4d593defc47d..3484e88d93f8 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -417,6 +417,36 @@ do { \ # define this_cpu_xor(pcp, val) __pcpu_size_call(this_cpu_or_, (pcp), (val)) #endif +#define _this_cpu_generic_add_return(pcp, val) \ +({ \ + typeof(pcp) ret__; \ + preempt_disable(); \ + __this_cpu_add(pcp, val); \ + ret__ = __this_cpu_read(pcp); \ + preempt_enable(); \ + ret__; \ +}) + +#ifndef this_cpu_add_return +# ifndef this_cpu_add_return_1 +# define this_cpu_add_return_1(pcp, val) _this_cpu_generic_add_return(pcp, val) +# endif +# ifndef this_cpu_add_return_2 +# define this_cpu_add_return_2(pcp, val) _this_cpu_generic_add_return(pcp, val) +# endif +# ifndef this_cpu_add_return_4 +# define this_cpu_add_return_4(pcp, val) _this_cpu_generic_add_return(pcp, val) +# endif +# ifndef this_cpu_add_return_8 +# define this_cpu_add_return_8(pcp, val) _this_cpu_generic_add_return(pcp, val) +# endif +# define this_cpu_add_return(pcp, val) __pcpu_size_call_return2(this_cpu_add_return_, pcp, val) +#endif + +#define this_cpu_sub_return(pcp, val) this_cpu_add_return(pcp, -(val)) +#define this_cpu_inc_return(pcp) this_cpu_add_return(pcp, 1) +#define this_cpu_dec_return(pcp) this_cpu_add_return(pcp, -1) + /* * Generic percpu operations that do not require preemption handling. * Either we do not care about races or the caller has the @@ -544,36 +574,6 @@ do { \ # define __this_cpu_xor(pcp, val) __pcpu_size_call(__this_cpu_xor_, (pcp), (val)) #endif -#define _this_cpu_generic_add_return(pcp, val) \ -({ \ - typeof(pcp) ret__; \ - preempt_disable(); \ - __this_cpu_add(pcp, val); \ - ret__ = __this_cpu_read(pcp); \ - preempt_enable(); \ - ret__; \ -}) - -#ifndef this_cpu_add_return -# ifndef this_cpu_add_return_1 -# define this_cpu_add_return_1(pcp, val) _this_cpu_generic_add_return(pcp, val) -# endif -# ifndef this_cpu_add_return_2 -# define this_cpu_add_return_2(pcp, val) _this_cpu_generic_add_return(pcp, val) -# endif -# ifndef this_cpu_add_return_4 -# define this_cpu_add_return_4(pcp, val) _this_cpu_generic_add_return(pcp, val) -# endif -# ifndef this_cpu_add_return_8 -# define this_cpu_add_return_8(pcp, val) _this_cpu_generic_add_return(pcp, val) -# endif -# define this_cpu_add_return(pcp, val) __pcpu_size_call_return2(this_cpu_add_return_, pcp, val) -#endif - -#define this_cpu_sub_return(pcp, val) this_cpu_add_return(pcp, -(val)) -#define this_cpu_inc_return(pcp) this_cpu_add_return(pcp, 1) -#define this_cpu_dec_return(pcp) this_cpu_add_return(pcp, -1) - #define __this_cpu_generic_add_return(pcp, val) \ ({ \ __this_cpu_add(pcp, val); \ -- cgit v1.2.3 From 2b7124428561c7c3cfa4a58cc4c6feea53f3148e Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sat, 18 Dec 2010 15:54:04 +0100 Subject: percpu: Generic this_cpu_cmpxchg() and this_cpu_xchg support Generic code to provide new per cpu atomic features this_cpu_cmpxchg this_cpu_xchg Fallback occurs to functions using interrupts disable/enable to ensure correct per cpu atomicity. Fallback to regular cmpxchg and xchg is not possible since per cpu atomic semantics include the guarantee that the current cpus per cpu data is accessed atomically. Use of regular cmpxchg and xchg requires the determination of the address of the per cpu data before regular cmpxchg or xchg which therefore cannot be atomically included in an xchg or cmpxchg without segment override. tj: - Relocated new ops to conform better to the general organization. - This patch contains a trivial comment fix. Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- include/linux/percpu.h | 134 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 133 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 3484e88d93f8..27c3c6fcfad3 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -447,6 +447,59 @@ do { \ #define this_cpu_inc_return(pcp) this_cpu_add_return(pcp, 1) #define this_cpu_dec_return(pcp) this_cpu_add_return(pcp, -1) +#define _this_cpu_generic_xchg(pcp, nval) \ +({ typeof(pcp) ret__; \ + preempt_disable(); \ + ret__ = __this_cpu_read(pcp); \ + __this_cpu_write(pcp, nval); \ + preempt_enable(); \ + ret__; \ +}) + +#ifndef this_cpu_xchg +# ifndef this_cpu_xchg_1 +# define this_cpu_xchg_1(pcp, nval) _this_cpu_generic_xchg(pcp, nval) +# endif +# ifndef this_cpu_xchg_2 +# define this_cpu_xchg_2(pcp, nval) _this_cpu_generic_xchg(pcp, nval) +# endif +# ifndef this_cpu_xchg_4 +# define this_cpu_xchg_4(pcp, nval) _this_cpu_generic_xchg(pcp, nval) +# endif +# ifndef this_cpu_xchg_8 +# define this_cpu_xchg_8(pcp, nval) _this_cpu_generic_xchg(pcp, nval) +# endif +# define this_cpu_xchg(pcp, nval) \ + __pcpu_size_call_return2(this_cpu_xchg_, (pcp), nval) +#endif + +#define _this_cpu_generic_cmpxchg(pcp, oval, nval) \ +({ typeof(pcp) ret__; \ + preempt_disable(); \ + ret__ = __this_cpu_read(pcp); \ + if (ret__ == (oval)) \ + __this_cpu_write(pcp, nval); \ + preempt_enable(); \ + ret__; \ +}) + +#ifndef this_cpu_cmpxchg +# ifndef this_cpu_cmpxchg_1 +# define this_cpu_cmpxchg_1(pcp, oval, nval) _this_cpu_generic_cmpxchg(pcp, oval, nval) +# endif +# ifndef this_cpu_cmpxchg_2 +# define this_cpu_cmpxchg_2(pcp, oval, nval) _this_cpu_generic_cmpxchg(pcp, oval, nval) +# endif +# ifndef this_cpu_cmpxchg_4 +# define this_cpu_cmpxchg_4(pcp, oval, nval) _this_cpu_generic_cmpxchg(pcp, oval, nval) +# endif +# ifndef this_cpu_cmpxchg_8 +# define this_cpu_cmpxchg_8(pcp, oval, nval) _this_cpu_generic_cmpxchg(pcp, oval, nval) +# endif +# define this_cpu_cmpxchg(pcp, oval, nval) \ + __pcpu_size_call_return2(this_cpu_cmpxchg_, pcp, oval, nval) +#endif + /* * Generic percpu operations that do not require preemption handling. * Either we do not care about races or the caller has the @@ -600,11 +653,61 @@ do { \ #define __this_cpu_inc_return(pcp) this_cpu_add_return(pcp, 1) #define __this_cpu_dec_return(pcp) this_cpu_add_return(pcp, -1) +#define __this_cpu_generic_xchg(pcp, nval) \ +({ typeof(pcp) ret__; \ + ret__ = __this_cpu_read(pcp); \ + __this_cpu_write(pcp, nval); \ + ret__; \ +}) + +#ifndef __this_cpu_xchg +# ifndef __this_cpu_xchg_1 +# define __this_cpu_xchg_1(pcp, nval) __this_cpu_generic_xchg(pcp, nval) +# endif +# ifndef __this_cpu_xchg_2 +# define __this_cpu_xchg_2(pcp, nval) __this_cpu_generic_xchg(pcp, nval) +# endif +# ifndef __this_cpu_xchg_4 +# define __this_cpu_xchg_4(pcp, nval) __this_cpu_generic_xchg(pcp, nval) +# endif +# ifndef __this_cpu_xchg_8 +# define __this_cpu_xchg_8(pcp, nval) __this_cpu_generic_xchg(pcp, nval) +# endif +# define __this_cpu_xchg(pcp, nval) \ + __pcpu_size_call_return2(__this_cpu_xchg_, (pcp), nval) +#endif + +#define __this_cpu_generic_cmpxchg(pcp, oval, nval) \ +({ \ + typeof(pcp) ret__; \ + ret__ = __this_cpu_read(pcp); \ + if (ret__ == (oval)) \ + __this_cpu_write(pcp, nval); \ + ret__; \ +}) + +#ifndef __this_cpu_cmpxchg +# ifndef __this_cpu_cmpxchg_1 +# define __this_cpu_cmpxchg_1(pcp, oval, nval) __this_cpu_generic_cmpxchg(pcp, oval, nval) +# endif +# ifndef __this_cpu_cmpxchg_2 +# define __this_cpu_cmpxchg_2(pcp, oval, nval) __this_cpu_generic_cmpxchg(pcp, oval, nval) +# endif +# ifndef __this_cpu_cmpxchg_4 +# define __this_cpu_cmpxchg_4(pcp, oval, nval) __this_cpu_generic_cmpxchg(pcp, oval, nval) +# endif +# ifndef __this_cpu_cmpxchg_8 +# define __this_cpu_cmpxchg_8(pcp, oval, nval) __this_cpu_generic_cmpxchg(pcp, oval, nval) +# endif +# define __this_cpu_cmpxchg(pcp, oval, nval) \ + __pcpu_size_call_return2(__this_cpu_cmpxchg_, pcp, oval, nval) +#endif + /* * IRQ safe versions of the per cpu RMW operations. Note that these operations * are *not* safe against modification of the same variable from another * processors (which one gets when using regular atomic operations) - . They are guaranteed to be atomic vs. local interrupts and + * They are guaranteed to be atomic vs. local interrupts and * preemption only. */ #define irqsafe_cpu_generic_to_op(pcp, val, op) \ @@ -691,4 +794,33 @@ do { \ # define irqsafe_cpu_xor(pcp, val) __pcpu_size_call(irqsafe_cpu_xor_, (val)) #endif +#define irqsafe_cpu_generic_cmpxchg(pcp, oval, nval) \ +({ \ + typeof(pcp) ret__; \ + unsigned long flags; \ + local_irq_save(flags); \ + ret__ = __this_cpu_read(pcp); \ + if (ret__ == (oval)) \ + __this_cpu_write(pcp, nval); \ + local_irq_restore(flags); \ + ret__; \ +}) + +#ifndef irqsafe_cpu_cmpxchg +# ifndef irqsafe_cpu_cmpxchg_1 +# define irqsafe_cpu_cmpxchg_1(pcp, oval, nval) irqsafe_cpu_generic_cmpxchg(pcp, oval, nval) +# endif +# ifndef irqsafe_cpu_cmpxchg_2 +# define irqsafe_cpu_cmpxchg_2(pcp, oval, nval) irqsafe_cpu_generic_cmpxchg(pcp, oval, nval) +# endif +# ifndef irqsafe_cpu_cmpxchg_4 +# define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) irqsafe_cpu_generic_cmpxchg(pcp, oval, nval) +# endif +# ifndef irqsafe_cpu_cmpxchg_8 +# define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) irqsafe_cpu_generic_cmpxchg(pcp, oval, nval) +# endif +# define irqsafe_cpu_cmpxchg(pcp, oval, nval) \ + __pcpu_size_call_return2(irqsafe_cpu_cmpxchg_, (pcp), oval, nval) +#endif + #endif /* __LINUX_PERCPU_H */ -- cgit v1.2.3