From e1783a240f491fb233f04edc042e16b18a7a79ba Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 5 Jan 2010 15:34:50 +0900 Subject: module: Use this_cpu_xx to dynamically allocate counters Use cpu ops to deal with the per cpu data instead of a local_t. Reduces memory requirements, cache footprint and decreases cycle counts. The this_cpu_xx operations are also used for !SMP mode. Otherwise we could not drop the use of __module_ref_addr() which would make per cpu data handling complicated. this_cpu_xx operations have their own fallback for !SMP. V8-V9: - Leave include asm/module.h since ringbuffer.c depends on it. Nothing else does though. Another patch will deal with that. - Remove spurious free. Signed-off-by: Christoph Lameter Acked-by: Rusty Russell Signed-off-by: Tejun Heo --- include/linux/module.h | 36 ++++++++++++++---------------------- 1 file changed, 14 insertions(+), 22 deletions(-) (limited to 'include/linux/module.h') diff --git a/include/linux/module.h b/include/linux/module.h index 6cb1a3cab5d3..2302f09ea2d9 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -363,11 +364,9 @@ struct module /* Destruction function. */ void (*exit)(void); -#ifdef CONFIG_SMP - char *refptr; -#else - local_t ref; -#endif + struct module_ref { + int count; + } *refptr; #endif #ifdef CONFIG_CONSTRUCTORS @@ -454,25 +453,16 @@ void __symbol_put(const char *symbol); #define symbol_put(x) __symbol_put(MODULE_SYMBOL_PREFIX #x) void symbol_put_addr(void *addr); -static inline local_t *__module_ref_addr(struct module *mod, int cpu) -{ -#ifdef CONFIG_SMP - return (local_t *) (mod->refptr + per_cpu_offset(cpu)); -#else - return &mod->ref; -#endif -} - /* Sometimes we know we already have a refcount, and it's easier not to handle the error case (which only happens with rmmod --wait). */ static inline void __module_get(struct module *module) { if (module) { - unsigned int cpu = get_cpu(); - local_inc(__module_ref_addr(module, cpu)); + preempt_disable(); + __this_cpu_inc(module->refptr->count); trace_module_get(module, _THIS_IP_, - local_read(__module_ref_addr(module, cpu))); - put_cpu(); + __this_cpu_read(module->refptr->count)); + preempt_enable(); } } @@ -481,15 +471,17 @@ static inline int try_module_get(struct module *module) int ret = 1; if (module) { - unsigned int cpu = get_cpu(); + preempt_disable(); + if (likely(module_is_live(module))) { - local_inc(__module_ref_addr(module, cpu)); + __this_cpu_inc(module->refptr->count); trace_module_get(module, _THIS_IP_, - local_read(__module_ref_addr(module, cpu))); + __this_cpu_read(module->refptr->count)); } else ret = 0; - put_cpu(); + + preempt_enable(); } return ret; } -- cgit v1.2.3 From 79615760f380ec86cd58204744e774c33fab9211 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 5 Jan 2010 15:34:50 +0900 Subject: local_t: Move local.h include to ringbuffer.c and ring_buffer_benchmark.c ringbuffer*.c are the last users of local.h. Remove the include from modules.h and add it to ringbuffer files. Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- include/linux/module.h | 1 - kernel/trace/ring_buffer.c | 1 + kernel/trace/ring_buffer_benchmark.c | 1 + 3 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/module.h') diff --git a/include/linux/module.h b/include/linux/module.h index 2302f09ea2d9..7e74ae0051cc 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -17,7 +17,6 @@ #include #include -#include #include #include diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 2326b04c95c4..eb6c8988c31a 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -20,6 +20,7 @@ #include #include +#include #include "trace.h" /* diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index b2477caf09c2..df74c7982255 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c @@ -8,6 +8,7 @@ #include #include #include +#include struct rb_page { u64 ts; -- cgit v1.2.3 From 43cf38eb5cea91245502df3fcee4dbfc1c74dd1c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 2 Feb 2010 14:38:57 +0900 Subject: percpu: add __percpu sparse annotations to core kernel subsystems Add __percpu sparse annotations to core subsystems. These annotations are to make sparse consider percpu variables to be in a different address space and warn if accessed without going through percpu accessors. This patch doesn't affect normal builds. Signed-off-by: Tejun Heo Reviewed-by: Christoph Lameter Acked-by: Paul E. McKenney Cc: Jens Axboe Cc: linux-mm@kvack.org Cc: Rusty Russell Cc: Dipankar Sarma Cc: Peter Zijlstra Cc: Andrew Morton Cc: Eric Biederman --- include/linux/blktrace_api.h | 4 ++-- include/linux/genhd.h | 2 +- include/linux/kexec.h | 2 +- include/linux/mmzone.h | 2 +- include/linux/module.h | 2 +- include/linux/percpu_counter.h | 2 +- include/linux/srcu.h | 2 +- kernel/kexec.c | 2 +- kernel/sched.c | 4 ++-- kernel/stop_machine.c | 2 +- mm/percpu.c | 18 ++++++++++-------- 11 files changed, 22 insertions(+), 20 deletions(-) (limited to 'include/linux/module.h') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 3b73b9992b26..416bf62d6d46 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -150,8 +150,8 @@ struct blk_user_trace_setup { struct blk_trace { int trace_state; struct rchan *rchan; - unsigned long *sequence; - unsigned char *msg_data; + unsigned long __percpu *sequence; + unsigned char __percpu *msg_data; u16 act_mask; u64 start_lba; u64 end_lba; diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 9717081c75ad..56b50514ab25 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -101,7 +101,7 @@ struct hd_struct { unsigned long stamp; int in_flight[2]; #ifdef CONFIG_SMP - struct disk_stats *dkstats; + struct disk_stats __percpu *dkstats; #else struct disk_stats dkstats; #endif diff --git a/include/linux/kexec.h b/include/linux/kexec.h index c356b6914ffd..03e8e8dbc577 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -199,7 +199,7 @@ extern struct kimage *kexec_crash_image; */ extern struct resource crashk_res; typedef u32 note_buf_t[KEXEC_NOTE_BYTES/4]; -extern note_buf_t *crash_notes; +extern note_buf_t __percpu *crash_notes; extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4]; extern size_t vmcoreinfo_size; extern size_t vmcoreinfo_max_size; diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 7874201a3556..41acd4bf7664 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -301,7 +301,7 @@ struct zone { unsigned long min_unmapped_pages; unsigned long min_slab_pages; #endif - struct per_cpu_pageset *pageset; + struct per_cpu_pageset __percpu *pageset; /* * free areas of different sizes */ diff --git a/include/linux/module.h b/include/linux/module.h index 7e74ae0051cc..dd618eb026aa 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -365,7 +365,7 @@ struct module struct module_ref { int count; - } *refptr; + } __percpu *refptr; #endif #ifdef CONFIG_CONSTRUCTORS diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index a7684a513994..9bd103c844ee 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -21,7 +21,7 @@ struct percpu_counter { #ifdef CONFIG_HOTPLUG_CPU struct list_head list; /* All percpu_counters are on a list */ #endif - s32 *counters; + s32 __percpu *counters; }; extern int percpu_counter_batch; diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 4765d97dcafb..41eedccc962c 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -33,7 +33,7 @@ struct srcu_struct_array { struct srcu_struct { int completed; - struct srcu_struct_array *per_cpu_ref; + struct srcu_struct_array __percpu *per_cpu_ref; struct mutex mutex; }; diff --git a/kernel/kexec.c b/kernel/kexec.c index ef077fb73155..87ebe8adc474 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -41,7 +41,7 @@ #include /* Per cpu memory for storing cpu states in case of system crash. */ -note_buf_t* crash_notes; +note_buf_t __percpu *crash_notes; /* vmcoreinfo stuff */ static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES]; diff --git a/kernel/sched.c b/kernel/sched.c index 3a8fb30a91b1..978edfd35a96 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1566,7 +1566,7 @@ static unsigned long cpu_avg_load_per_task(int cpu) #ifdef CONFIG_FAIR_GROUP_SCHED -static __read_mostly unsigned long *update_shares_data; +static __read_mostly unsigned long __percpu *update_shares_data; static void __set_se_shares(struct sched_entity *se, unsigned long shares); @@ -10683,7 +10683,7 @@ struct cgroup_subsys cpu_cgroup_subsys = { struct cpuacct { struct cgroup_subsys_state css; /* cpuusage holds pointer to a u64-type object on every cpu */ - u64 *cpuusage; + u64 __percpu *cpuusage; struct percpu_counter cpustat[CPUACCT_STAT_NSTATS]; struct cpuacct *parent; }; diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 912823e2a11b..9bb9fb1bd79c 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -45,7 +45,7 @@ static int refcount; static struct workqueue_struct *stop_machine_wq; static struct stop_machine_data active, idle; static const struct cpumask *active_cpus; -static void *stop_machine_work; +static void __percpu *stop_machine_work; static void set_state(enum stopmachine_state newstate) { diff --git a/mm/percpu.c b/mm/percpu.c index b336638d20e7..768419d44ad7 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -80,13 +80,15 @@ /* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */ #ifndef __addr_to_pcpu_ptr #define __addr_to_pcpu_ptr(addr) \ - (void *)((unsigned long)(addr) - (unsigned long)pcpu_base_addr \ - + (unsigned long)__per_cpu_start) + (void __percpu *)((unsigned long)(addr) - \ + (unsigned long)pcpu_base_addr + \ + (unsigned long)__per_cpu_start) #endif #ifndef __pcpu_ptr_to_addr #define __pcpu_ptr_to_addr(ptr) \ - (void *)((unsigned long)(ptr) + (unsigned long)pcpu_base_addr \ - - (unsigned long)__per_cpu_start) + (void __force *)((unsigned long)(ptr) + \ + (unsigned long)pcpu_base_addr - \ + (unsigned long)__per_cpu_start) #endif struct pcpu_chunk { @@ -1065,7 +1067,7 @@ static struct pcpu_chunk *alloc_pcpu_chunk(void) * RETURNS: * Percpu pointer to the allocated area on success, NULL on failure. */ -static void *pcpu_alloc(size_t size, size_t align, bool reserved) +static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved) { static int warn_limit = 10; struct pcpu_chunk *chunk; @@ -1194,7 +1196,7 @@ fail_unlock_mutex: * RETURNS: * Percpu pointer to the allocated area on success, NULL on failure. */ -void *__alloc_percpu(size_t size, size_t align) +void __percpu *__alloc_percpu(size_t size, size_t align) { return pcpu_alloc(size, align, false); } @@ -1215,7 +1217,7 @@ EXPORT_SYMBOL_GPL(__alloc_percpu); * RETURNS: * Percpu pointer to the allocated area on success, NULL on failure. */ -void *__alloc_reserved_percpu(size_t size, size_t align) +void __percpu *__alloc_reserved_percpu(size_t size, size_t align) { return pcpu_alloc(size, align, true); } @@ -1267,7 +1269,7 @@ static void pcpu_reclaim(struct work_struct *work) * CONTEXT: * Can be called from atomic context. */ -void free_percpu(void *ptr) +void free_percpu(void __percpu *ptr) { void *addr; struct pcpu_chunk *chunk; -- cgit v1.2.3 From 5ed109103d73b0bafc92e860cead56725231384d Mon Sep 17 00:00:00 2001 From: Dave Young Date: Wed, 10 Mar 2010 15:24:06 -0800 Subject: sysctl extern cleanup: module Extern declarations in sysctl.c should be moved to their own header file, and then include them in relavant .c files. Move modprobe_path extern declaration to linux/kmod.h Move modules_disabled extern declaration to linux/module.h Signed-off-by: Dave Young Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kmod.h | 1 + include/linux/module.h | 1 + kernel/sysctl.c | 4 ---- 3 files changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux/module.h') diff --git a/include/linux/kmod.h b/include/linux/kmod.h index 384ca8bbf1ac..facb27fe7de0 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -27,6 +27,7 @@ #define KMOD_PATH_LEN 256 #ifdef CONFIG_MODULES +extern char modprobe_path[]; /* for sysctl */ /* modprobe exit status on success, -ve on error. Return value * usually useless though. */ extern int __request_module(bool wait, const char *name, ...) \ diff --git a/include/linux/module.h b/include/linux/module.h index dd618eb026aa..5e869ffd34aa 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -175,6 +175,7 @@ struct notifier_block; #ifdef CONFIG_MODULES +extern int modules_disabled; /* for sysctl */ /* Get/put a kernel symbol (calls must be symmetric) */ void *__symbol_get(const char *symbol); void *__symbol_get_gpl(const char *symbol); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index f18aaa7b0d65..44e9492368fd 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -116,10 +116,6 @@ static int min_percpu_pagelist_fract = 8; static int ngroups_max = NGROUPS_MAX; -#ifdef CONFIG_MODULES -extern char modprobe_path[]; -extern int modules_disabled; -#endif #ifdef CONFIG_CHR_DEV_SG extern int sg_big_buff; #endif -- cgit v1.2.3 From 259354deaaf03d49a02dbb9975d6ec2a54675672 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 10 Mar 2010 18:56:10 +0900 Subject: module: encapsulate percpu handling better and record percpu_size Better encapsulate module static percpu area handling so that code outsidef of CONFIG_SMP ifdef doesn't deal with mod->percpu directly and add mod->percpu_size and record percpu_size in it. Both percpu fields are compiled out on UP. While at it, mark mod->percpu w/ __percpu. This is to prepare for is_module_percpu_address(). Signed-off-by: Tejun Heo Acked-by: Rusty Russell --- include/linux/module.h | 5 +++- kernel/module.c | 66 ++++++++++++++++++++++++++------------------------ 2 files changed, 39 insertions(+), 32 deletions(-) (limited to 'include/linux/module.h') diff --git a/include/linux/module.h b/include/linux/module.h index 5e869ffd34aa..87d247ac6761 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -330,8 +330,11 @@ struct module struct module_notes_attrs *notes_attrs; #endif +#ifdef CONFIG_SMP /* Per-cpu data. */ - void *percpu; + void __percpu *percpu; + unsigned int percpu_size; +#endif /* The command line arguments (may be mangled). People like keeping pointers to this stuff */ diff --git a/kernel/module.c b/kernel/module.c index c968d3606dca..e7a6e53fc73e 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -370,27 +370,33 @@ EXPORT_SYMBOL_GPL(find_module); #ifdef CONFIG_SMP -static void *percpu_modalloc(unsigned long size, unsigned long align, - const char *name) +static inline void __percpu *mod_percpu(struct module *mod) { - void *ptr; + return mod->percpu; +} +static int percpu_modalloc(struct module *mod, + unsigned long size, unsigned long align) +{ if (align > PAGE_SIZE) { printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n", - name, align, PAGE_SIZE); + mod->name, align, PAGE_SIZE); align = PAGE_SIZE; } - ptr = __alloc_reserved_percpu(size, align); - if (!ptr) + mod->percpu = __alloc_reserved_percpu(size, align); + if (!mod->percpu) { printk(KERN_WARNING "Could not allocate %lu bytes percpu data\n", size); - return ptr; + return -ENOMEM; + } + mod->percpu_size = size; + return 0; } -static void percpu_modfree(void *freeme) +static void percpu_modfree(struct module *mod) { - free_percpu(freeme); + free_percpu(mod->percpu); } static unsigned int find_pcpusec(Elf_Ehdr *hdr, @@ -400,24 +406,28 @@ static unsigned int find_pcpusec(Elf_Ehdr *hdr, return find_sec(hdr, sechdrs, secstrings, ".data.percpu"); } -static void percpu_modcopy(void *pcpudest, const void *from, unsigned long size) +static void percpu_modcopy(struct module *mod, + const void *from, unsigned long size) { int cpu; for_each_possible_cpu(cpu) - memcpy(pcpudest + per_cpu_offset(cpu), from, size); + memcpy(per_cpu_ptr(mod->percpu, cpu), from, size); } #else /* ... !CONFIG_SMP */ -static inline void *percpu_modalloc(unsigned long size, unsigned long align, - const char *name) +static inline void __percpu *mod_percpu(struct module *mod) { return NULL; } -static inline void percpu_modfree(void *pcpuptr) +static inline int percpu_modalloc(struct module *mod, + unsigned long size, unsigned long align) +{ + return -ENOMEM; +} +static inline void percpu_modfree(struct module *mod) { - BUG(); } static inline unsigned int find_pcpusec(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, @@ -425,8 +435,8 @@ static inline unsigned int find_pcpusec(Elf_Ehdr *hdr, { return 0; } -static inline void percpu_modcopy(void *pcpudst, const void *src, - unsigned long size) +static inline void percpu_modcopy(struct module *mod, + const void *from, unsigned long size) { /* pcpusec should be 0, and size of that section should be 0. */ BUG_ON(size != 0); @@ -1400,8 +1410,7 @@ static void free_module(struct module *mod) /* This may be NULL, but that's OK */ module_free(mod, mod->module_init); kfree(mod->args); - if (mod->percpu) - percpu_modfree(mod->percpu); + percpu_modfree(mod); #if defined(CONFIG_MODULE_UNLOAD) if (mod->refptr) free_percpu(mod->refptr); @@ -1520,7 +1529,7 @@ static int simplify_symbols(Elf_Shdr *sechdrs, default: /* Divert to percpu allocation if a percpu var. */ if (sym[i].st_shndx == pcpuindex) - secbase = (unsigned long)mod->percpu; + secbase = (unsigned long)mod_percpu(mod); else secbase = sechdrs[sym[i].st_shndx].sh_addr; sym[i].st_value += secbase; @@ -1954,7 +1963,7 @@ static noinline struct module *load_module(void __user *umod, unsigned int modindex, versindex, infoindex, pcpuindex; struct module *mod; long err = 0; - void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ + void *ptr = NULL; /* Stops spurious gcc warning */ unsigned long symoffs, stroffs, *strmap; mm_segment_t old_fs; @@ -2094,15 +2103,11 @@ static noinline struct module *load_module(void __user *umod, if (pcpuindex) { /* We have a special allocation for this section. */ - percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size, - sechdrs[pcpuindex].sh_addralign, - mod->name); - if (!percpu) { - err = -ENOMEM; + err = percpu_modalloc(mod, sechdrs[pcpuindex].sh_size, + sechdrs[pcpuindex].sh_addralign); + if (err) goto free_mod; - } sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC; - mod->percpu = percpu; } /* Determine total sizes, and put offsets in sh_entsize. For now @@ -2317,7 +2322,7 @@ static noinline struct module *load_module(void __user *umod, sort_extable(mod->extable, mod->extable + mod->num_exentries); /* Finally, copy percpu area over. */ - percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr, + percpu_modcopy(mod, (void *)sechdrs[pcpuindex].sh_addr, sechdrs[pcpuindex].sh_size); add_kallsyms(mod, sechdrs, hdr->e_shnum, symindex, strindex, @@ -2409,8 +2414,7 @@ static noinline struct module *load_module(void __user *umod, module_free(mod, mod->module_core); /* mod will be freed with core. Don't access it beyond this line! */ free_percpu: - if (percpu) - percpu_modfree(percpu); + percpu_modfree(mod); free_mod: kfree(args); kfree(strmap); -- cgit v1.2.3 From 10fad5e46f6c7bdfb01b1a012380a38e3c6ab346 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 10 Mar 2010 18:57:54 +0900 Subject: percpu, module: implement and use is_kernel/module_percpu_address() lockdep has custom code to check whether a pointer belongs to static percpu area which is somewhat broken. Implement proper is_kernel/module_percpu_address() and replace the custom code. On UP, percpu variables are regular static variables and can't be distinguished from them. Always return %false on UP. Signed-off-by: Tejun Heo Acked-by: Peter Zijlstra Cc: Rusty Russell Cc: Ingo Molnar --- include/linux/module.h | 1 + include/linux/percpu.h | 7 +++++++ kernel/lockdep.c | 21 +++++---------------- kernel/module.c | 38 ++++++++++++++++++++++++++++++++++++++ mm/percpu.c | 26 ++++++++++++++++++++++++++ 5 files changed, 77 insertions(+), 16 deletions(-) (limited to 'include/linux/module.h') diff --git a/include/linux/module.h b/include/linux/module.h index 87d247ac6761..f0e2659f4e3e 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -395,6 +395,7 @@ static inline int module_is_live(struct module *mod) struct module *__module_text_address(unsigned long addr); struct module *__module_address(unsigned long addr); bool is_module_address(unsigned long addr); +bool is_module_percpu_address(unsigned long addr); bool is_module_text_address(unsigned long addr); static inline int within_module_core(unsigned long addr, struct module *mod) diff --git a/include/linux/percpu.h b/include/linux/percpu.h index a93e5bfdccb8..11d5f834b54a 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -137,6 +137,7 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size, extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align); extern void __percpu *__alloc_percpu(size_t size, size_t align); extern void free_percpu(void __percpu *__pdata); +extern bool is_kernel_percpu_address(unsigned long addr); extern phys_addr_t per_cpu_ptr_to_phys(void *addr); #ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA @@ -163,6 +164,12 @@ static inline void free_percpu(void __percpu *p) kfree(p); } +/* can't distinguish from other static vars, always false */ +static inline bool is_kernel_percpu_address(unsigned long addr) +{ + return false; +} + static inline phys_addr_t per_cpu_ptr_to_phys(void *addr) { return __pa(addr); diff --git a/kernel/lockdep.c b/kernel/lockdep.c index c927a549db2c..9bbb9c841e48 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -582,9 +582,6 @@ static int static_obj(void *obj) unsigned long start = (unsigned long) &_stext, end = (unsigned long) &_end, addr = (unsigned long) obj; -#ifdef CONFIG_SMP - int i; -#endif /* * static variable? @@ -595,24 +592,16 @@ static int static_obj(void *obj) if (arch_is_kernel_data(addr)) return 1; -#ifdef CONFIG_SMP /* - * percpu var? + * in-kernel percpu var? */ - for_each_possible_cpu(i) { - start = (unsigned long) &__per_cpu_start + per_cpu_offset(i); - end = (unsigned long) &__per_cpu_start + PERCPU_ENOUGH_ROOM - + per_cpu_offset(i); - - if ((addr >= start) && (addr < end)) - return 1; - } -#endif + if (is_kernel_percpu_address(addr)) + return 1; /* - * module var? + * module static or percpu var? */ - return is_module_address(addr); + return is_module_address(addr) || is_module_percpu_address(addr); } /* diff --git a/kernel/module.c b/kernel/module.c index e7a6e53fc73e..9f8d23d8b3a8 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -415,6 +415,40 @@ static void percpu_modcopy(struct module *mod, memcpy(per_cpu_ptr(mod->percpu, cpu), from, size); } +/** + * is_module_percpu_address - test whether address is from module static percpu + * @addr: address to test + * + * Test whether @addr belongs to module static percpu area. + * + * RETURNS: + * %true if @addr is from module static percpu area + */ +bool is_module_percpu_address(unsigned long addr) +{ + struct module *mod; + unsigned int cpu; + + preempt_disable(); + + list_for_each_entry_rcu(mod, &modules, list) { + if (!mod->percpu_size) + continue; + for_each_possible_cpu(cpu) { + void *start = per_cpu_ptr(mod->percpu, cpu); + + if ((void *)addr >= start && + (void *)addr < start + mod->percpu_size) { + preempt_enable(); + return true; + } + } + } + + preempt_enable(); + return false; +} + #else /* ... !CONFIG_SMP */ static inline void __percpu *mod_percpu(struct module *mod) @@ -441,6 +475,10 @@ static inline void percpu_modcopy(struct module *mod, /* pcpusec should be 0, and size of that section should be 0. */ BUG_ON(size != 0); } +bool is_module_percpu_address(unsigned long addr) +{ + return false; +} #endif /* CONFIG_SMP */ diff --git a/mm/percpu.c b/mm/percpu.c index 768419d44ad7..6e09741ddc62 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1303,6 +1303,32 @@ void free_percpu(void __percpu *ptr) } EXPORT_SYMBOL_GPL(free_percpu); +/** + * is_kernel_percpu_address - test whether address is from static percpu area + * @addr: address to test + * + * Test whether @addr belongs to in-kernel static percpu area. Module + * static percpu areas are not considered. For those, use + * is_module_percpu_address(). + * + * RETURNS: + * %true if @addr is from in-kernel static percpu area, %false otherwise. + */ +bool is_kernel_percpu_address(unsigned long addr) +{ + const size_t static_size = __per_cpu_end - __per_cpu_start; + void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr); + unsigned int cpu; + + for_each_possible_cpu(cpu) { + void *start = per_cpu_ptr(base, cpu); + + if ((void *)addr >= start && (void *)addr < start + static_size) + return true; + } + return false; +} + /** * per_cpu_ptr_to_phys - convert translated percpu address to physical address * @addr: the address to be converted to physical address -- cgit v1.2.3 From d5e50daf92df8afcb701fd717b301985691e802f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 31 Mar 2010 11:33:42 +0900 Subject: module: add stub for is_module_percpu_address Fix build for CONFIG_MODULES not enabled by providing a stub for is_module_percpu_address(). kernel/lockdep.c:605: error: implicit declaration of function 'is_module_percpu_address' Signed-off-by: Randy Dunlap Signed-off-by: Tejun Heo --- include/linux/module.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux/module.h') diff --git a/include/linux/module.h b/include/linux/module.h index f0e2659f4e3e..8bd399a00343 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -567,6 +567,11 @@ static inline bool is_module_address(unsigned long addr) return false; } +static inline bool is_module_percpu_address(unsigned long addr) +{ + return false; +} + static inline bool is_module_text_address(unsigned long addr) { return false; -- cgit v1.2.3 From 5fbfb18d7a5b846946d52c4a10e3aaa213ec31b6 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Thu, 1 Apr 2010 19:09:40 +1100 Subject: Fix up possibly racy module refcounting Module refcounting is implemented with a per-cpu counter for speed. However there is a race when tallying the counter where a reference may be taken by one CPU and released by another. Reference count summation may then see the decrement without having seen the previous increment, leading to lower than expected count. A module which never has its actual reference drop below 1 may return a reference count of 0 due to this race. Module removal generally runs under stop_machine, which prevents this race causing bugs due to removal of in-use modules. However there are other real bugs in module.c code and driver code (module_refcount is exported) where the callers do not run under stop_machine. Fix this by maintaining running per-cpu counters for the number of module refcount increments and the number of refcount decrements. The increments are tallied after the decrements, so any decrement seen will always have its corresponding increment counted. The final refcount is the difference of the total increments and decrements, preventing a low-refcount from being returned. Signed-off-by: Nick Piggin Acked-by: Rusty Russell Signed-off-by: Linus Torvalds --- include/linux/module.h | 14 +++++++------- kernel/module.c | 35 +++++++++++++++++++++++++++-------- 2 files changed, 34 insertions(+), 15 deletions(-) (limited to 'include/linux/module.h') diff --git a/include/linux/module.h b/include/linux/module.h index 8bd399a00343..515d53ae6a79 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -368,7 +368,8 @@ struct module void (*exit)(void); struct module_ref { - int count; + unsigned int incs; + unsigned int decs; } __percpu *refptr; #endif @@ -463,9 +464,9 @@ static inline void __module_get(struct module *module) { if (module) { preempt_disable(); - __this_cpu_inc(module->refptr->count); + __this_cpu_inc(module->refptr->incs); trace_module_get(module, _THIS_IP_, - __this_cpu_read(module->refptr->count)); + __this_cpu_read(module->refptr->incs)); preempt_enable(); } } @@ -478,11 +479,10 @@ static inline int try_module_get(struct module *module) preempt_disable(); if (likely(module_is_live(module))) { - __this_cpu_inc(module->refptr->count); + __this_cpu_inc(module->refptr->incs); trace_module_get(module, _THIS_IP_, - __this_cpu_read(module->refptr->count)); - } - else + __this_cpu_read(module->refptr->incs)); + } else ret = 0; preempt_enable(); diff --git a/kernel/module.c b/kernel/module.c index 9f8d23d8b3a8..1016b75b026a 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -521,11 +521,13 @@ static void module_unload_init(struct module *mod) int cpu; INIT_LIST_HEAD(&mod->modules_which_use_me); - for_each_possible_cpu(cpu) - per_cpu_ptr(mod->refptr, cpu)->count = 0; + for_each_possible_cpu(cpu) { + per_cpu_ptr(mod->refptr, cpu)->incs = 0; + per_cpu_ptr(mod->refptr, cpu)->decs = 0; + } /* Hold reference count during initialization. */ - __this_cpu_write(mod->refptr->count, 1); + __this_cpu_write(mod->refptr->incs, 1); /* Backwards compatibility macros put refcount during init. */ mod->waiter = current; } @@ -664,12 +666,28 @@ static int try_stop_module(struct module *mod, int flags, int *forced) unsigned int module_refcount(struct module *mod) { - unsigned int total = 0; + unsigned int incs = 0, decs = 0; int cpu; for_each_possible_cpu(cpu) - total += per_cpu_ptr(mod->refptr, cpu)->count; - return total; + decs += per_cpu_ptr(mod->refptr, cpu)->decs; + /* + * ensure the incs are added up after the decs. + * module_put ensures incs are visible before decs with smp_wmb. + * + * This 2-count scheme avoids the situation where the refcount + * for CPU0 is read, then CPU0 increments the module refcount, + * then CPU1 drops that refcount, then the refcount for CPU1 is + * read. We would record a decrement but not its corresponding + * increment so we would see a low count (disaster). + * + * Rare situation? But module_refcount can be preempted, and we + * might be tallying up 4096+ CPUs. So it is not impossible. + */ + smp_rmb(); + for_each_possible_cpu(cpu) + incs += per_cpu_ptr(mod->refptr, cpu)->incs; + return incs - decs; } EXPORT_SYMBOL(module_refcount); @@ -846,10 +864,11 @@ void module_put(struct module *module) { if (module) { preempt_disable(); - __this_cpu_dec(module->refptr->count); + smp_wmb(); /* see comment in module_refcount */ + __this_cpu_inc(module->refptr->decs); trace_module_put(module, _RET_IP_, - __this_cpu_read(module->refptr->count)); + __this_cpu_read(module->refptr->decs)); /* Maybe they're waiting for us to drop reference? */ if (unlikely(!module_is_live(module))) wake_up_process(module->waiter); -- cgit v1.2.3