From b8d317d10cca76cabe6b03ebfeb23cc99118b731 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Thu, 24 Jul 2008 18:21:29 -0700 Subject: cpumask: make cpumask_of_cpu_map generic If an arch doesn't define cpumask_of_cpu_map, create a generic statically-initialized one for them. This allows removal of the buggy cpumask_of_cpu() macro (&cpumask_of_cpu() gives address of out-of-scope var). An arch with NR_CPUS of 4096 probably wants to allocate this itself based on the actual number of CPUs, since otherwise they're using 2MB of rodata (1024 cpus means 128k). That's what CONFIG_HAVE_CPUMASK_OF_CPU_MAP is for (only x86/64 does so at the moment). In future as we support more CPUs, we'll need to resort to a get_cpu_map()/put_cpu_map() allocation scheme. Signed-off-by: Mike Travis Signed-off-by: Rusty Russell Cc: Andrew Morton Cc: Jack Steiner Signed-off-by: Ingo Molnar --- include/linux/cpumask.h | 41 +++-------------------------------------- 1 file changed, 3 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 1b5c98e7fef7..8fa3b6d4a320 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -62,15 +62,7 @@ * int next_cpu_nr(cpu, mask) Next cpu past 'cpu', or nr_cpu_ids * * cpumask_t cpumask_of_cpu(cpu) Return cpumask with bit 'cpu' set - *ifdef CONFIG_HAS_CPUMASK_OF_CPU - * cpumask_of_cpu_ptr_declare(v) Declares cpumask_t *v - * cpumask_of_cpu_ptr_next(v, cpu) Sets v = &cpumask_of_cpu_map[cpu] - * cpumask_of_cpu_ptr(v, cpu) Combines above two operations - *else - * cpumask_of_cpu_ptr_declare(v) Declares cpumask_t _v and *v = &_v - * cpumask_of_cpu_ptr_next(v, cpu) Sets _v = cpumask_of_cpu(cpu) - * cpumask_of_cpu_ptr(v, cpu) Combines above two operations - *endif + * (can be used as an lvalue) * CPU_MASK_ALL Initializer - all bits set * CPU_MASK_NONE Initializer - no bits set * unsigned long *cpus_addr(mask) Array of unsigned long's in mask @@ -274,36 +266,9 @@ static inline void __cpus_shift_left(cpumask_t *dstp, } -#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP -extern cpumask_t *cpumask_of_cpu_map; +/* cpumask_of_cpu_map[] is in kernel/cpu.c */ +extern const cpumask_t *cpumask_of_cpu_map; #define cpumask_of_cpu(cpu) (cpumask_of_cpu_map[cpu]) -#define cpumask_of_cpu_ptr(v, cpu) \ - const cpumask_t *v = &cpumask_of_cpu(cpu) -#define cpumask_of_cpu_ptr_declare(v) \ - const cpumask_t *v -#define cpumask_of_cpu_ptr_next(v, cpu) \ - v = &cpumask_of_cpu(cpu) -#else -#define cpumask_of_cpu(cpu) \ -({ \ - typeof(_unused_cpumask_arg_) m; \ - if (sizeof(m) == sizeof(unsigned long)) { \ - m.bits[0] = 1UL<<(cpu); \ - } else { \ - cpus_clear(m); \ - cpu_set((cpu), m); \ - } \ - m; \ -}) -#define cpumask_of_cpu_ptr(v, cpu) \ - cpumask_t _##v = cpumask_of_cpu(cpu); \ - const cpumask_t *v = &_##v -#define cpumask_of_cpu_ptr_declare(v) \ - cpumask_t _##v; \ - const cpumask_t *v = &_##v -#define cpumask_of_cpu_ptr_next(v, cpu) \ - _##v = cpumask_of_cpu(cpu) -#endif #define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS) -- cgit v1.2.3 From e56b3bc7942982ac2589c942fb345e38bc7a341a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 28 Jul 2008 11:32:33 -0700 Subject: cpu masks: optimize and clean up cpumask_of_cpu() Clean up and optimize cpumask_of_cpu(), by sharing all the zero words. Instead of stupidly generating all possible i=0...NR_CPUS 2^i patterns creating a huge array of constant bitmasks, realize that the zero words can be shared. In other words, on a 64-bit architecture, we only ever need 64 of these arrays - with a different bit set in one single world (with enough zero words around it so that we can create any bitmask by just offsetting in that big array). And then we just put enough zeroes around it that we can point every single cpumask to be one of those things. So when we have 4k CPU's, instead of having 4k arrays (of 4k bits each, with one bit set in each array - 2MB memory total), we have exactly 64 arrays instead, each 8k bits in size (64kB total). And then we just point cpumask(n) to the right position (which we can calculate dynamically). Once we have the right arrays, getting "cpumask(n)" ends up being: static inline const cpumask_t *get_cpu_mask(unsigned int cpu) { const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG]; p -= cpu / BITS_PER_LONG; return (const cpumask_t *)p; } This brings other advantages and simplifications as well: - we are not wasting memory that is just filled with a single bit in various different places - we don't need all those games to re-create the arrays in some dense format, because they're already going to be dense enough. if we compile a kernel for up to 4k CPU's, "wasting" that 64kB of memory is a non-issue (especially since by doing this "overlapping" trick we probably get better cache behaviour anyway). [ mingo@elte.hu: Converted Linus's mails into a commit. See: http://lkml.org/lkml/2008/7/27/156 http://lkml.org/lkml/2008/7/28/320 Also applied a family filter - which also has the side-effect of leaving out the bits where Linus calls me an idio... Oh, never mind ;-) ] Signed-off-by: Ingo Molnar Cc: Rusty Russell Cc: Andrew Morton Cc: Al Viro Cc: Mike Travis Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup_percpu.c | 23 -------- include/linux/cpumask.h | 26 ++++++++- kernel/cpu.c | 128 +++++++---------------------------------- 3 files changed, 43 insertions(+), 134 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 1cd53dfcd309..76e305e064f9 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -80,26 +80,6 @@ static void __init setup_per_cpu_maps(void) #endif } -#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP -/* - * Replace static cpumask_of_cpu_map in the initdata section, - * with one that's allocated sized by the possible number of cpus. - * - * (requires nr_cpu_ids to be initialized) - */ -static void __init setup_cpumask_of_cpu(void) -{ - int i; - - /* alloc_bootmem zeroes memory */ - cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids); - for (i = 0; i < nr_cpu_ids; i++) - cpu_set(i, cpumask_of_cpu_map[i]); -} -#else -static inline void setup_cpumask_of_cpu(void) { } -#endif - #ifdef CONFIG_X86_32 /* * Great future not-so-futuristic plan: make i386 and x86_64 do it @@ -199,9 +179,6 @@ void __init setup_per_cpu_areas(void) /* Setup node to cpumask map */ setup_node_to_cpumask_map(); - - /* Setup cpumask_of_cpu map */ - setup_cpumask_of_cpu(); } #endif diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 8fa3b6d4a320..96d0509fb8d8 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -265,10 +265,30 @@ static inline void __cpus_shift_left(cpumask_t *dstp, bitmap_shift_left(dstp->bits, srcp->bits, n, nbits); } +/* + * Special-case data structure for "single bit set only" constant CPU masks. + * + * We pre-generate all the 64 (or 32) possible bit positions, with enough + * padding to the left and the right, and return the constant pointer + * appropriately offset. + */ +extern const unsigned long + cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)]; + +static inline const cpumask_t *get_cpu_mask(unsigned int cpu) +{ + const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG]; + p -= cpu / BITS_PER_LONG; + return (const cpumask_t *)p; +} + +/* + * In cases where we take the address of the cpumask immediately, + * gcc optimizes it out (it's a constant) and there's no huge stack + * variable created: + */ +#define cpumask_of_cpu(cpu) ({ *get_cpu_mask(cpu); }) -/* cpumask_of_cpu_map[] is in kernel/cpu.c */ -extern const cpumask_t *cpumask_of_cpu_map; -#define cpumask_of_cpu(cpu) (cpumask_of_cpu_map[cpu]) #define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS) diff --git a/kernel/cpu.c b/kernel/cpu.c index a35d8995dc8c..06a8358bb418 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -462,115 +462,27 @@ out: #endif /* CONFIG_SMP */ -/* 64 bits of zeros, for initializers. */ -#if BITS_PER_LONG == 32 -#define Z64 0, 0 -#else -#define Z64 0 -#endif +/* + * cpu_bit_bitmap[] is a special, "compressed" data structure that + * represents all NR_CPUS bits binary values of 1< 4 - CMI0(4), CMI0(5), CMI0(6), CMI0(7), -#endif -#if NR_CPUS > 8 - CMI0(8), CMI0(9), CMI0(10), CMI0(11), - CMI0(12), CMI0(13), CMI0(14), CMI0(15), -#endif -#if NR_CPUS > 16 - CMI0(16), CMI0(17), CMI0(18), CMI0(19), - CMI0(20), CMI0(21), CMI0(22), CMI0(23), - CMI0(24), CMI0(25), CMI0(26), CMI0(27), - CMI0(28), CMI0(29), CMI0(30), CMI0(31), -#endif -#if NR_CPUS > 32 -#if BITS_PER_LONG == 32 - CMI(32, 0), CMI(33, 0), CMI(34, 0), CMI(35, 0), - CMI(36, 0), CMI(37, 0), CMI(38, 0), CMI(39, 0), - CMI(40, 0), CMI(41, 0), CMI(42, 0), CMI(43, 0), - CMI(44, 0), CMI(45, 0), CMI(46, 0), CMI(47, 0), - CMI(48, 0), CMI(49, 0), CMI(50, 0), CMI(51, 0), - CMI(52, 0), CMI(53, 0), CMI(54, 0), CMI(55, 0), - CMI(56, 0), CMI(57, 0), CMI(58, 0), CMI(59, 0), - CMI(60, 0), CMI(61, 0), CMI(62, 0), CMI(63, 0), -#else - CMI0(32), CMI0(33), CMI0(34), CMI0(35), - CMI0(36), CMI0(37), CMI0(38), CMI0(39), - CMI0(40), CMI0(41), CMI0(42), CMI0(43), - CMI0(44), CMI0(45), CMI0(46), CMI0(47), - CMI0(48), CMI0(49), CMI0(50), CMI0(51), - CMI0(52), CMI0(53), CMI0(54), CMI0(55), - CMI0(56), CMI0(57), CMI0(58), CMI0(59), - CMI0(60), CMI0(61), CMI0(62), CMI0(63), -#endif /* BITS_PER_LONG == 64 */ -#endif -#if NR_CPUS > 64 - CMI64(64, Z64), -#endif -#if NR_CPUS > 128 - CMI64(128, Z64, Z64), CMI64(192, Z64, Z64, Z64), -#endif -#if NR_CPUS > 256 - CMI256(256, Z256), -#endif -#if NR_CPUS > 512 - CMI256(512, Z256, Z256), CMI256(768, Z256, Z256, Z256), -#endif -#if NR_CPUS > 1024 - CMI1024(1024, Z1024), -#endif -#if NR_CPUS > 2048 - CMI1024(2048, Z1024, Z1024), CMI1024(3072, Z1024, Z1024, Z1024), -#endif -#if NR_CPUS > 4096 -#error NR_CPUS too big. Fix initializers or set CONFIG_HAVE_CPUMASK_OF_CPU_MAP +const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = { + + MASK_DECLARE_8(0), MASK_DECLARE_8(8), + MASK_DECLARE_8(16), MASK_DECLARE_8(24), +#if BITS_PER_LONG > 32 + MASK_DECLARE_8(32), MASK_DECLARE_8(40), + MASK_DECLARE_8(48), MASK_DECLARE_8(56), #endif }; - -const cpumask_t *cpumask_of_cpu_map = cpumask_map; - -EXPORT_SYMBOL_GPL(cpumask_of_cpu_map); +EXPORT_SYMBOL_GPL(cpu_bit_bitmap); -- cgit v1.2.3