From 30ed1a79f5bf271d33e782afee3323582dcc621e Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sat, 3 Oct 2009 19:48:22 +0900 Subject: this_cpu: Implement X86 optimized this_cpu operations Basically the existing percpu ops can be used for this_cpu variants that allow operations also on dynamically allocated percpu data. However, we do not pass a reference to a percpu variable in. Instead a dynamically or statically allocated percpu variable is provided. Preempt, the non preempt and the irqsafe operations generate the same code. It will always be possible to have the requires per cpu atomicness in a single RMW instruction with segment override on x86. 64 bit this_cpu operations are not supported on 32 bit. Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/x86/include/asm/percpu.h | 78 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index b65a36defeb7..8b5ec19bdef4 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -153,6 +153,84 @@ do { \ #define percpu_or(var, val) percpu_to_op("or", per_cpu__##var, val) #define percpu_xor(var, val) percpu_to_op("xor", per_cpu__##var, val) +#define __this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) +#define __this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) +#define __this_cpu_read_4(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) + +#define __this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) +#define __this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) +#define __this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) +#define __this_cpu_add_1(pcp, val) percpu_to_op("add", (pcp), val) +#define __this_cpu_add_2(pcp, val) percpu_to_op("add", (pcp), val) +#define __this_cpu_add_4(pcp, val) percpu_to_op("add", (pcp), val) +#define __this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) +#define __this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) +#define __this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) +#define __this_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val) +#define __this_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val) +#define __this_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val) +#define __this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) +#define __this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) +#define __this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) + +#define this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) +#define this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) +#define this_cpu_read_4(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) +#define this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) +#define this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) +#define this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) +#define this_cpu_add_1(pcp, val) percpu_to_op("add", (pcp), val) +#define this_cpu_add_2(pcp, val) percpu_to_op("add", (pcp), val) +#define this_cpu_add_4(pcp, val) percpu_to_op("add", (pcp), val) +#define this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) +#define this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) +#define this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) +#define this_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val) +#define this_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val) +#define this_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val) +#define this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) +#define this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) +#define this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) + +#define irqsafe_cpu_add_1(pcp, val) percpu_to_op("add", (pcp), val) +#define irqsafe_cpu_add_2(pcp, val) percpu_to_op("add", (pcp), val) +#define irqsafe_cpu_add_4(pcp, val) percpu_to_op("add", (pcp), val) +#define irqsafe_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) +#define irqsafe_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) +#define irqsafe_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) +#define irqsafe_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val) +#define irqsafe_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val) +#define irqsafe_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val) +#define irqsafe_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) +#define irqsafe_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) +#define irqsafe_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) + +/* + * Per cpu atomic 64 bit operations are only available under 64 bit. + * 32 bit must fall back to generic operations. + */ +#ifdef CONFIG_X86_64 +#define __this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) +#define __this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) +#define __this_cpu_add_8(pcp, val) percpu_to_op("add", (pcp), val) +#define __this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) +#define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) +#define __this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) + +#define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) +#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) +#define this_cpu_add_8(pcp, val) percpu_to_op("add", (pcp), val) +#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) +#define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) +#define this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) + +#define irqsafe_cpu_add_8(pcp, val) percpu_to_op("add", (pcp), val) +#define irqsafe_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) +#define irqsafe_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) +#define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) + +#endif + /* This is not atomic against other CPUs -- CPU preemption needs to be off */ #define x86_test_and_clear_bit_percpu(bit, var) \ ({ \ -- cgit v1.2.3 From 0f5e4816dbf38ce9488e611ca2296925c1e90d5e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 29 Oct 2009 22:34:12 +0900 Subject: percpu: remove some sparse warnings Make the following changes to remove some sparse warnings. * Make DEFINE_PER_CPU_SECTION() declare __pcpu_unique_* before defining it. * Annotate pcpu_extend_area_map() that it is entered with pcpu_lock held, releases it and then reacquires it. * Make percpu related macros use unique nested variable names. * While at it, add pcpu prefix to __size_call[_return]() macros as to-be-implemented sparse annotations will add percpu specific stuff to these macros. Signed-off-by: Tejun Heo Reviewed-by: Christoph Lameter Cc: Rusty Russell --- arch/x86/include/asm/percpu.h | 26 +++++++++++------------ include/linux/percpu-defs.h | 1 + include/linux/percpu.h | 48 +++++++++++++++++++++---------------------- mm/percpu.c | 1 + 4 files changed, 39 insertions(+), 37 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 8b5ec19bdef4..0c44196b78ac 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -74,31 +74,31 @@ extern void __bad_percpu_size(void); #define percpu_to_op(op, var, val) \ do { \ - typedef typeof(var) T__; \ + typedef typeof(var) pto_T__; \ if (0) { \ - T__ tmp__; \ - tmp__ = (val); \ + pto_T__ pto_tmp__; \ + pto_tmp__ = (val); \ } \ switch (sizeof(var)) { \ case 1: \ asm(op "b %1,"__percpu_arg(0) \ : "+m" (var) \ - : "qi" ((T__)(val))); \ + : "qi" ((pto_T__)(val))); \ break; \ case 2: \ asm(op "w %1,"__percpu_arg(0) \ : "+m" (var) \ - : "ri" ((T__)(val))); \ + : "ri" ((pto_T__)(val))); \ break; \ case 4: \ asm(op "l %1,"__percpu_arg(0) \ : "+m" (var) \ - : "ri" ((T__)(val))); \ + : "ri" ((pto_T__)(val))); \ break; \ case 8: \ asm(op "q %1,"__percpu_arg(0) \ : "+m" (var) \ - : "re" ((T__)(val))); \ + : "re" ((pto_T__)(val))); \ break; \ default: __bad_percpu_size(); \ } \ @@ -106,31 +106,31 @@ do { \ #define percpu_from_op(op, var, constraint) \ ({ \ - typeof(var) ret__; \ + typeof(var) pfo_ret__; \ switch (sizeof(var)) { \ case 1: \ asm(op "b "__percpu_arg(1)",%0" \ - : "=q" (ret__) \ + : "=q" (pfo_ret__) \ : constraint); \ break; \ case 2: \ asm(op "w "__percpu_arg(1)",%0" \ - : "=r" (ret__) \ + : "=r" (pfo_ret__) \ : constraint); \ break; \ case 4: \ asm(op "l "__percpu_arg(1)",%0" \ - : "=r" (ret__) \ + : "=r" (pfo_ret__) \ : constraint); \ break; \ case 8: \ asm(op "q "__percpu_arg(1)",%0" \ - : "=r" (ret__) \ + : "=r" (pfo_ret__) \ : constraint); \ break; \ default: __bad_percpu_size(); \ } \ - ret__; \ + pfo_ret__; \ }) /* diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 9bd03193ecd4..5a5d6ce4bd55 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -60,6 +60,7 @@ #define DEFINE_PER_CPU_SECTION(type, name, sec) \ __PCPU_DUMMY_ATTRS char __pcpu_scope_##name; \ + extern __PCPU_DUMMY_ATTRS char __pcpu_unique_##name; \ __PCPU_DUMMY_ATTRS char __pcpu_unique_##name; \ __PCPU_ATTRS(sec) PER_CPU_DEF_ATTRIBUTES __weak \ __typeof__(type) per_cpu__##name diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 519d6876590f..522f421ec213 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -226,20 +226,20 @@ do { \ extern void __bad_size_call_parameter(void); -#define __size_call_return(stem, variable) \ -({ typeof(variable) ret__; \ +#define __pcpu_size_call_return(stem, variable) \ +({ typeof(variable) pscr_ret__; \ switch(sizeof(variable)) { \ - case 1: ret__ = stem##1(variable);break; \ - case 2: ret__ = stem##2(variable);break; \ - case 4: ret__ = stem##4(variable);break; \ - case 8: ret__ = stem##8(variable);break; \ + case 1: pscr_ret__ = stem##1(variable);break; \ + case 2: pscr_ret__ = stem##2(variable);break; \ + case 4: pscr_ret__ = stem##4(variable);break; \ + case 8: pscr_ret__ = stem##8(variable);break; \ default: \ __bad_size_call_parameter();break; \ } \ - ret__; \ + pscr_ret__; \ }) -#define __size_call(stem, variable, ...) \ +#define __pcpu_size_call(stem, variable, ...) \ do { \ switch(sizeof(variable)) { \ case 1: stem##1(variable, __VA_ARGS__);break; \ @@ -299,7 +299,7 @@ do { \ # ifndef this_cpu_read_8 # define this_cpu_read_8(pcp) _this_cpu_generic_read(pcp) # endif -# define this_cpu_read(pcp) __size_call_return(this_cpu_read_, (pcp)) +# define this_cpu_read(pcp) __pcpu_size_call_return(this_cpu_read_, (pcp)) #endif #define _this_cpu_generic_to_op(pcp, val, op) \ @@ -322,7 +322,7 @@ do { \ # ifndef this_cpu_write_8 # define this_cpu_write_8(pcp, val) _this_cpu_generic_to_op((pcp), (val), =) # endif -# define this_cpu_write(pcp, val) __size_call(this_cpu_write_, (pcp), (val)) +# define this_cpu_write(pcp, val) __pcpu_size_call(this_cpu_write_, (pcp), (val)) #endif #ifndef this_cpu_add @@ -338,7 +338,7 @@ do { \ # ifndef this_cpu_add_8 # define this_cpu_add_8(pcp, val) _this_cpu_generic_to_op((pcp), (val), +=) # endif -# define this_cpu_add(pcp, val) __size_call(this_cpu_add_, (pcp), (val)) +# define this_cpu_add(pcp, val) __pcpu_size_call(this_cpu_add_, (pcp), (val)) #endif #ifndef this_cpu_sub @@ -366,7 +366,7 @@ do { \ # ifndef this_cpu_and_8 # define this_cpu_and_8(pcp, val) _this_cpu_generic_to_op((pcp), (val), &=) # endif -# define this_cpu_and(pcp, val) __size_call(this_cpu_and_, (pcp), (val)) +# define this_cpu_and(pcp, val) __pcpu_size_call(this_cpu_and_, (pcp), (val)) #endif #ifndef this_cpu_or @@ -382,7 +382,7 @@ do { \ # ifndef this_cpu_or_8 # define this_cpu_or_8(pcp, val) _this_cpu_generic_to_op((pcp), (val), |=) # endif -# define this_cpu_or(pcp, val) __size_call(this_cpu_or_, (pcp), (val)) +# define this_cpu_or(pcp, val) __pcpu_size_call(this_cpu_or_, (pcp), (val)) #endif #ifndef this_cpu_xor @@ -398,7 +398,7 @@ do { \ # ifndef this_cpu_xor_8 # define this_cpu_xor_8(pcp, val) _this_cpu_generic_to_op((pcp), (val), ^=) # endif -# define this_cpu_xor(pcp, val) __size_call(this_cpu_or_, (pcp), (val)) +# define this_cpu_xor(pcp, val) __pcpu_size_call(this_cpu_or_, (pcp), (val)) #endif /* @@ -428,7 +428,7 @@ do { \ # ifndef __this_cpu_read_8 # define __this_cpu_read_8(pcp) (*__this_cpu_ptr(&(pcp))) # endif -# define __this_cpu_read(pcp) __size_call_return(__this_cpu_read_, (pcp)) +# define __this_cpu_read(pcp) __pcpu_size_call_return(__this_cpu_read_, (pcp)) #endif #define __this_cpu_generic_to_op(pcp, val, op) \ @@ -449,7 +449,7 @@ do { \ # ifndef __this_cpu_write_8 # define __this_cpu_write_8(pcp, val) __this_cpu_generic_to_op((pcp), (val), =) # endif -# define __this_cpu_write(pcp, val) __size_call(__this_cpu_write_, (pcp), (val)) +# define __this_cpu_write(pcp, val) __pcpu_size_call(__this_cpu_write_, (pcp), (val)) #endif #ifndef __this_cpu_add @@ -465,7 +465,7 @@ do { \ # ifndef __this_cpu_add_8 # define __this_cpu_add_8(pcp, val) __this_cpu_generic_to_op((pcp), (val), +=) # endif -# define __this_cpu_add(pcp, val) __size_call(__this_cpu_add_, (pcp), (val)) +# define __this_cpu_add(pcp, val) __pcpu_size_call(__this_cpu_add_, (pcp), (val)) #endif #ifndef __this_cpu_sub @@ -493,7 +493,7 @@ do { \ # ifndef __this_cpu_and_8 # define __this_cpu_and_8(pcp, val) __this_cpu_generic_to_op((pcp), (val), &=) # endif -# define __this_cpu_and(pcp, val) __size_call(__this_cpu_and_, (pcp), (val)) +# define __this_cpu_and(pcp, val) __pcpu_size_call(__this_cpu_and_, (pcp), (val)) #endif #ifndef __this_cpu_or @@ -509,7 +509,7 @@ do { \ # ifndef __this_cpu_or_8 # define __this_cpu_or_8(pcp, val) __this_cpu_generic_to_op((pcp), (val), |=) # endif -# define __this_cpu_or(pcp, val) __size_call(__this_cpu_or_, (pcp), (val)) +# define __this_cpu_or(pcp, val) __pcpu_size_call(__this_cpu_or_, (pcp), (val)) #endif #ifndef __this_cpu_xor @@ -525,7 +525,7 @@ do { \ # ifndef __this_cpu_xor_8 # define __this_cpu_xor_8(pcp, val) __this_cpu_generic_to_op((pcp), (val), ^=) # endif -# define __this_cpu_xor(pcp, val) __size_call(__this_cpu_xor_, (pcp), (val)) +# define __this_cpu_xor(pcp, val) __pcpu_size_call(__this_cpu_xor_, (pcp), (val)) #endif /* @@ -556,7 +556,7 @@ do { \ # ifndef irqsafe_cpu_add_8 # define irqsafe_cpu_add_8(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), +=) # endif -# define irqsafe_cpu_add(pcp, val) __size_call(irqsafe_cpu_add_, (pcp), (val)) +# define irqsafe_cpu_add(pcp, val) __pcpu_size_call(irqsafe_cpu_add_, (pcp), (val)) #endif #ifndef irqsafe_cpu_sub @@ -584,7 +584,7 @@ do { \ # ifndef irqsafe_cpu_and_8 # define irqsafe_cpu_and_8(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), &=) # endif -# define irqsafe_cpu_and(pcp, val) __size_call(irqsafe_cpu_and_, (val)) +# define irqsafe_cpu_and(pcp, val) __pcpu_size_call(irqsafe_cpu_and_, (val)) #endif #ifndef irqsafe_cpu_or @@ -600,7 +600,7 @@ do { \ # ifndef irqsafe_cpu_or_8 # define irqsafe_cpu_or_8(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), |=) # endif -# define irqsafe_cpu_or(pcp, val) __size_call(irqsafe_cpu_or_, (val)) +# define irqsafe_cpu_or(pcp, val) __pcpu_size_call(irqsafe_cpu_or_, (val)) #endif #ifndef irqsafe_cpu_xor @@ -616,7 +616,7 @@ do { \ # ifndef irqsafe_cpu_xor_8 # define irqsafe_cpu_xor_8(pcp, val) irqsafe_cpu_generic_to_op((pcp), (val), ^=) # endif -# define irqsafe_cpu_xor(pcp, val) __size_call(irqsafe_cpu_xor_, (val)) +# define irqsafe_cpu_xor(pcp, val) __pcpu_size_call(irqsafe_cpu_xor_, (val)) #endif #endif /* __LINUX_PERCPU_H */ diff --git a/mm/percpu.c b/mm/percpu.c index ec158bb5f86d..e2e80fc78601 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -365,6 +365,7 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) * 0 if noop, 1 if successfully extended, -errno on failure. */ static int pcpu_extend_area_map(struct pcpu_chunk *chunk) + __releases(lock) __acquires(lock) { int new_alloc; int *new; -- cgit v1.2.3