From 90f62cf30a78721641e08737bda787552428061e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 23 Apr 2014 14:29:27 -0700 Subject: net: Use netlink_ns_capable to verify the permisions of netlink messages It is possible by passing a netlink socket to a more privileged executable and then to fool that executable into writing to the socket data that happens to be valid netlink message to do something that privileged executable did not intend to do. To keep this from happening replace bare capable and ns_capable calls with netlink_capable, netlink_net_calls and netlink_ns_capable calls. Which act the same as the previous calls except they verify that the opener of the socket had the desired permissions as well. Reported-by: Andy Lutomirski Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- kernel/audit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/audit.c b/kernel/audit.c index 7c2893602d06..47845c57eb19 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -643,13 +643,13 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type) if ((task_active_pid_ns(current) != &init_pid_ns)) return -EPERM; - if (!capable(CAP_AUDIT_CONTROL)) + if (!netlink_capable(skb, CAP_AUDIT_CONTROL)) err = -EPERM; break; case AUDIT_USER: case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG: case AUDIT_FIRST_USER_MSG2 ... AUDIT_LAST_USER_MSG2: - if (!capable(CAP_AUDIT_WRITE)) + if (!netlink_capable(skb, CAP_AUDIT_WRITE)) err = -EPERM; break; default: /* bad msg */ -- cgit v1.2.3 From 79465d2fd48e68940c2bdecddbdecd45bbba06fe Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 28 Apr 2014 11:05:43 +0930 Subject: module: remove warning about waiting module removal. We remove the waiting module removal in commit 3f2b9c9cdf38 (September 2013), but it turns out that modprobe in kmod (< version 16) was asking for waiting module removal. No one noticed since modprobe would check for 0 usage immediately before trying to remove the module, and the race is unlikely. However, it means that anyone running old (but not ancient) kmod versions is hitting the printk designed to see if anyone was running "rmmod -w". All reports so far have been false positives, so remove the warning. Fixes: 3f2b9c9cdf389e303b2273679af08aab5f153517 Reported-by: Valerio Vanni Cc: Elliott, Robert (Server Storage) Cc: stable@kernel.org Acked-by: Lucas De Marchi Signed-off-by: Rusty Russell --- kernel/module.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'kernel') diff --git a/kernel/module.c b/kernel/module.c index 11869408f79b..ae7821898bf2 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -815,9 +815,6 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, return -EFAULT; name[MODULE_NAME_LEN-1] = '\0'; - if (!(flags & O_NONBLOCK)) - pr_warn("waiting module removal not supported: please upgrade\n"); - if (mutex_lock_interruptible(&module_mutex) != 0) return -EINTR; -- cgit v1.2.3 From 62a08ae2a5763aabeee98264605236b001503e0c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 24 Apr 2014 09:50:53 +0200 Subject: genirq: x86: Ensure that dynamic irq allocation does not conflict On x86 the allocation of irq descriptors may allocate interrupts which are in the range of the GSI interrupts. That's wrong as those interrupts are hardwired and we don't have the irq domain translation like PPC. So one of these interrupts can be hooked up later to one of the devices which are hard wired to it and the io_apic init code for that particular interrupt line happily reuses that descriptor with a completely different configuration so hell breaks lose. Inside x86 we allocate dynamic interrupts from above nr_gsi_irqs, except for a few usage sites which have not yet blown up in our face for whatever reason. But for drivers which need an irq range, like the GPIO drivers, we have no limit in place and we don't want to expose such a detail to a driver. To cure this introduce a function which an architecture can implement to impose a lower bound on the dynamic interrupt allocations. Implement it for x86 and set the lower bound to nr_gsi_irqs, which is the end of the hardwired interrupt space, so all dynamic allocations happen above. That not only allows the GPIO driver to work sanely, it also protects the bogus callsites of create_irq_nr() in hpet, uv, irq_remapping and htirq code. They need to be cleaned up as well, but that's a separate issue. Reported-by: Jin Yao Signed-off-by: Thomas Gleixner Tested-by: Mika Westerberg Cc: Mathias Nyman Cc: Linus Torvalds Cc: Grant Likely Cc: H. Peter Anvin Cc: Rafael J. Wysocki Cc: Andy Shevchenko Cc: Krogerus Heikki Cc: Linus Walleij Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1404241617360.28206@ionos.tec.linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/io_apic.c | 5 +++++ include/linux/irq.h | 2 ++ kernel/irq/irqdesc.c | 7 +++++++ kernel/softirq.c | 5 +++++ 4 files changed, 19 insertions(+) (limited to 'kernel') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 6ad4658de705..d23aa82e7a7b 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3425,6 +3425,11 @@ int get_nr_irqs_gsi(void) return nr_irqs_gsi; } +unsigned int arch_dynirq_lower_bound(unsigned int from) +{ + return from < nr_irqs_gsi ? nr_irqs_gsi : from; +} + int __init arch_probe_nr_irqs(void) { int nr; diff --git a/include/linux/irq.h b/include/linux/irq.h index 10a0b1ac4ea0..5c57efb863d0 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -603,6 +603,8 @@ static inline u32 irq_get_trigger_type(unsigned int irq) return d ? irqd_get_trigger_type(d) : 0; } +unsigned int arch_dynirq_lower_bound(unsigned int from); + int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, struct module *owner); diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index a7174617616b..bb07f2928f4b 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -363,6 +363,13 @@ __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, if (from > irq) return -EINVAL; from = irq; + } else { + /* + * For interrupts which are freely allocated the + * architecture can force a lower bound to the @from + * argument. x86 uses this to exclude the GSI space. + */ + from = arch_dynirq_lower_bound(from); } mutex_lock(&sparse_irq_lock); diff --git a/kernel/softirq.c b/kernel/softirq.c index b50990a5bea0..33e4648ae0e7 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -779,3 +779,8 @@ int __init __weak arch_early_irq_init(void) { return 0; } + +unsigned int __weak arch_dynirq_lower_bound(unsigned int from) +{ + return from; +} -- cgit v1.2.3 From a949ae560a511fe4e3adf48fa44fefded93e5c2b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 24 Apr 2014 10:40:12 -0400 Subject: ftrace/module: Hardcode ftrace_module_init() call into load_module() A race exists between module loading and enabling of function tracer. CPU 1 CPU 2 ----- ----- load_module() module->state = MODULE_STATE_COMING register_ftrace_function() mutex_lock(&ftrace_lock); ftrace_startup() update_ftrace_function(); ftrace_arch_code_modify_prepare() set_all_module_text_rw(); ftrace_arch_code_modify_post_process() set_all_module_text_ro(); [ here all module text is set to RO, including the module that is loading!! ] blocking_notifier_call_chain(MODULE_STATE_COMING); ftrace_init_module() [ tries to modify code, but it's RO, and fails! ftrace_bug() is called] When this race happens, ftrace_bug() will produces a nasty warning and all of the function tracing features will be disabled until reboot. The simple solution is to treate module load the same way the core kernel is treated at boot. To hardcode the ftrace function modification of converting calls to mcount into nops. This is done in init/main.c there's no reason it could not be done in load_module(). This gives a better control of the changes and doesn't tie the state of the module to its notifiers as much. Ftrace is special, it needs to be treated as such. The reason this would work, is that the ftrace_module_init() would be called while the module is in MODULE_STATE_UNFORMED, which is ignored by the set_all_module_text_ro() call. Link: http://lkml.kernel.org/r/1395637826-3312-1-git-send-email-indou.takao@jp.fujitsu.com Reported-by: Takao Indoh Acked-by: Rusty Russell Cc: stable@vger.kernel.org # 2.6.38+ Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 2 ++ kernel/module.c | 3 +++ kernel/trace/ftrace.c | 27 ++++----------------------- 3 files changed, 9 insertions(+), 23 deletions(-) (limited to 'kernel') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 9212b017bc72..ae9504b4b67d 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -535,6 +535,7 @@ static inline int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_a extern int ftrace_arch_read_dyn_info(char *buf, int size); extern int skip_trace(unsigned long ip); +extern void ftrace_module_init(struct module *mod); extern void ftrace_disable_daemon(void); extern void ftrace_enable_daemon(void); @@ -544,6 +545,7 @@ static inline int ftrace_force_update(void) { return 0; } static inline void ftrace_disable_daemon(void) { } static inline void ftrace_enable_daemon(void) { } static inline void ftrace_release_mod(struct module *mod) {} +static inline void ftrace_module_init(struct module *mod) {} static inline __init int register_ftrace_command(struct ftrace_func_command *cmd) { return -EINVAL; diff --git a/kernel/module.c b/kernel/module.c index 11869408f79b..5f14fec9f825 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3271,6 +3271,9 @@ static int load_module(struct load_info *info, const char __user *uargs, dynamic_debug_setup(info->debug, info->num_debug); + /* Ftrace init must be called in the MODULE_STATE_UNFORMED state */ + ftrace_module_init(mod); + /* Finally it's fully formed, ready to start executing. */ err = complete_formation(mod, info); if (err) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 1fd4b9479210..4a54a25afa2f 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -4330,16 +4330,11 @@ static void ftrace_init_module(struct module *mod, ftrace_process_locs(mod, start, end); } -static int ftrace_module_notify_enter(struct notifier_block *self, - unsigned long val, void *data) +void ftrace_module_init(struct module *mod) { - struct module *mod = data; - - if (val == MODULE_STATE_COMING) - ftrace_init_module(mod, mod->ftrace_callsites, - mod->ftrace_callsites + - mod->num_ftrace_callsites); - return 0; + ftrace_init_module(mod, mod->ftrace_callsites, + mod->ftrace_callsites + + mod->num_ftrace_callsites); } static int ftrace_module_notify_exit(struct notifier_block *self, @@ -4353,11 +4348,6 @@ static int ftrace_module_notify_exit(struct notifier_block *self, return 0; } #else -static int ftrace_module_notify_enter(struct notifier_block *self, - unsigned long val, void *data) -{ - return 0; -} static int ftrace_module_notify_exit(struct notifier_block *self, unsigned long val, void *data) { @@ -4365,11 +4355,6 @@ static int ftrace_module_notify_exit(struct notifier_block *self, } #endif /* CONFIG_MODULES */ -struct notifier_block ftrace_module_enter_nb = { - .notifier_call = ftrace_module_notify_enter, - .priority = INT_MAX, /* Run before anything that can use kprobes */ -}; - struct notifier_block ftrace_module_exit_nb = { .notifier_call = ftrace_module_notify_exit, .priority = INT_MIN, /* Run after anything that can remove kprobes */ @@ -4403,10 +4388,6 @@ void __init ftrace_init(void) __start_mcount_loc, __stop_mcount_loc); - ret = register_module_notifier(&ftrace_module_enter_nb); - if (ret) - pr_warning("Failed to register trace ftrace module enter notifier\n"); - ret = register_module_notifier(&ftrace_module_exit_nb); if (ret) pr_warning("Failed to register trace ftrace module exit notifier\n"); -- cgit v1.2.3 From 6c6c0d5a1c949d2e084706f9e5fb1fccc175b265 Mon Sep 17 00:00:00 2001 From: Stuart Hayes Date: Tue, 29 Apr 2014 17:55:02 -0500 Subject: hrtimer: Prevent all reprogramming if hang detected If the last hrtimer interrupt detected a hang it sets hang_detected=1 and programs the clock event device with a delay to let the system make progress. If hang_detected == 1, we prevent reprogramming of the clock event device in hrtimer_reprogram() but not in hrtimer_force_reprogram(). This can lead to the following situation: hrtimer_interrupt() hang_detected = 1; program ce device to Xms from now (hang delay) We have two timers pending: T1 expires 50ms from now T2 expires 5s from now Now T1 gets canceled, which causes hrtimer_force_reprogram() to be invoked, which in turn programs the clock event device to T2 (5 seconds from now). Any hrtimer_start after that will not reprogram the hardware due to hang_detected still being set. So we effectivly block all timers until the T2 event fires and cleans up the hang situation. Add a check for hang_detected to hrtimer_force_reprogram() which prevents the reprogramming of the hang delay in the hardware timer. The subsequent hrtimer_interrupt will resolve all outstanding issues. [ tglx: Rewrote subject and changelog and fixed up the comment in hrtimer_force_reprogram() ] Signed-off-by: Stuart Hayes Link: http://lkml.kernel.org/r/53602DC6.2060101@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Thomas Gleixner --- kernel/hrtimer.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'kernel') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index d55092ceee29..e3724fdac2da 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -569,6 +569,23 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) cpu_base->expires_next.tv64 = expires_next.tv64; + /* + * If a hang was detected in the last timer interrupt then we + * leave the hang delay active in the hardware. We want the + * system to make progress. That also prevents the following + * scenario: + * T1 expires 50ms from now + * T2 expires 5s from now + * + * T1 is removed, so this code is called and would reprogram + * the hardware to 5s from now. Any hrtimer_start after that + * will not reprogram the hardware due to hang_detected being + * set. So we'd effectivly block all timers until the T2 event + * fires. + */ + if (cpu_base->hang_detected) + return; + if (cpu_base->expires_next.tv64 != KTIME_MAX) tick_program_event(cpu_base->expires_next, 1); } -- cgit v1.2.3 From 012a45e3f4af68e86d85cce060c6c2fed56498b2 Mon Sep 17 00:00:00 2001 From: Leon Ma Date: Wed, 30 Apr 2014 16:43:10 +0800 Subject: hrtimer: Prevent remote enqueue of leftmost timers If a cpu is idle and starts an hrtimer which is not pinned on that same cpu, the nohz code might target the timer to a different cpu. In the case that we switch the cpu base of the timer we already have a sanity check in place, which determines whether the timer is earlier than the current leftmost timer on the target cpu. In that case we enqueue the timer on the current cpu because we cannot reprogram the clock event device on the target. If the timers base is already the target CPU we do not have this sanity check in place so we enqueue the timer as the leftmost timer in the target cpus rb tree, but we cannot reprogram the clock event device on the target cpu. So the timer expires late and subsequently prevents the reprogramming of the target cpu clock event device until the previously programmed event fires or a timer with an earlier expiry time gets enqueued on the target cpu itself. Add the same target check as we have for the switch base case and start the timer on the current cpu if it would become the leftmost timer on the target. [ tglx: Rewrote subject and changelog ] Signed-off-by: Leon Ma Link: http://lkml.kernel.org/r/1398847391-5994-1-git-send-email-xindong.ma@intel.com Cc: stable@vger.kernel.org Signed-off-by: Thomas Gleixner --- kernel/hrtimer.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'kernel') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index e3724fdac2da..6b715c0af1b1 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -234,6 +234,11 @@ again: goto again; } timer->base = new_base; + } else { + if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) { + cpu = this_cpu; + goto again; + } } return new_base; } -- cgit v1.2.3 From 98a01e779f3c66b0b11cd7e64d531c0e41c95762 Mon Sep 17 00:00:00 2001 From: Jiri Bohac Date: Fri, 18 Apr 2014 17:23:11 +0200 Subject: timer: Prevent overflow in apply_slack On architectures with sizeof(int) < sizeof (long), the computation of mask inside apply_slack() can be undefined if the computed bit is > 32. E.g. with: expires = 0xffffe6f5 and slack = 25, we get: expires_limit = 0x20000000e bit = 33 mask = (1 << 33) - 1 /* undefined */ On x86, mask becomes 1 and and the slack is not applied properly. On s390, mask is -1, expires is set to 0 and the timer fires immediately. Use 1UL << bit to solve that issue. Suggested-by: Deborah Townsend Signed-off-by: Jiri Bohac Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20140418152310.GA13654@midget.suse.cz Signed-off-by: Thomas Gleixner --- kernel/timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/timer.c b/kernel/timer.c index 87bd529879c2..3bb01a323b2a 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -838,7 +838,7 @@ unsigned long apply_slack(struct timer_list *timer, unsigned long expires) bit = find_last_bit(&mask, BITS_PER_LONG); - mask = (1 << bit) - 1; + mask = (1UL << bit) - 1; expires_limit = expires_limit & ~(mask); -- cgit v1.2.3 From 561a4fe851ccab9dd0d14989ab566f9392d9f8b5 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 2 May 2014 13:30:04 -0400 Subject: tracing: Use rcu_dereference_sched() for trace event triggers As trace event triggers are now part of the mainline kernel, I added my trace event trigger tests to my test suite I run on all my kernels. Now these tests get run under different config options, and one of those options is CONFIG_PROVE_RCU, which checks under lockdep that the rcu locking primitives are being used correctly. This triggered the following splat: =============================== [ INFO: suspicious RCU usage. ] 3.15.0-rc2-test+ #11 Not tainted ------------------------------- kernel/trace/trace_events_trigger.c:80 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 0 4 locks held by swapper/1/0: #0: ((&(&j_cdbs->work)->timer)){..-...}, at: [] call_timer_fn+0x5/0x1be #1: (&(&pool->lock)->rlock){-.-...}, at: [] __queue_work+0x140/0x283 #2: (&p->pi_lock){-.-.-.}, at: [] try_to_wake_up+0x2e/0x1e8 #3: (&rq->lock){-.-.-.}, at: [] try_to_wake_up+0x1a0/0x1e8 stack backtrace: CPU: 1 PID: 0 Comm: swapper/1 Not tainted 3.15.0-rc2-test+ #11 Hardware name: /DG965MQ, BIOS MQ96510J.86A.0372.2006.0605.1717 06/05/2006 0000000000000001 ffff88007e083b98 ffffffff819f53a5 0000000000000006 ffff88007b0942c0 ffff88007e083bc8 ffffffff81081307 ffff88007ad96d20 0000000000000000 ffff88007af2d840 ffff88007b2e701c ffff88007e083c18 Call Trace: [] dump_stack+0x4f/0x7c [] lockdep_rcu_suspicious+0x107/0x110 [] event_triggers_call+0x99/0x108 [] ftrace_event_buffer_commit+0x42/0xa4 [] ftrace_raw_event_sched_wakeup_template+0x71/0x7c [] ttwu_do_wakeup+0x7f/0xff [] ttwu_do_activate.constprop.126+0x5c/0x61 [] try_to_wake_up+0x1ac/0x1e8 [] wake_up_process+0x36/0x3b [] wake_up_worker+0x24/0x26 [] insert_work+0x5c/0x65 [] __queue_work+0x26c/0x283 [] ? __queue_work+0x283/0x283 [] delayed_work_timer_fn+0x1e/0x20 [] call_timer_fn+0xdf/0x1be^M [] ? call_timer_fn+0x5/0x1be [] ? __queue_work+0x283/0x283 [] run_timer_softirq+0x1a4/0x22f^M [] __do_softirq+0x17b/0x31b^M [] irq_exit+0x42/0x97 [] smp_apic_timer_interrupt+0x37/0x44 [] apic_timer_interrupt+0x6f/0x80 [] ? default_idle+0x21/0x32 [] ? default_idle+0x1f/0x32 [] arch_cpu_idle+0xf/0x11 [] cpu_startup_entry+0x1a3/0x213 [] start_secondary+0x212/0x219 The cause is that the triggers are protected by rcu_read_lock_sched() but the data is dereferenced with rcu_dereference() which expects it to be protected with rcu_read_lock(). The proper reference should be rcu_dereference_sched(). Cc: Tom Zanussi Cc: stable@vger.kernel.org # 3.14+ Signed-off-by: Steven Rostedt --- kernel/trace/trace_events_trigger.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 925f537f07d1..4747b476a030 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -77,7 +77,7 @@ event_triggers_call(struct ftrace_event_file *file, void *rec) data->ops->func(data); continue; } - filter = rcu_dereference(data->filter); + filter = rcu_dereference_sched(data->filter); if (filter && !filter_match_preds(filter, rec)) continue; if (data->cmd_ops->post_trigger) { -- cgit v1.2.3 From 722a9f9299ca720a3f14660e7c0dce7b76a9cb42 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 2 May 2014 00:44:38 +0200 Subject: asmlinkage: Add explicit __visible to drivers/*, lib/*, kernel/* As requested by Linus add explicit __visible to the asmlinkage users. This marks functions visible to assembler. Tree sweep for rest of tree. Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/1398984278-29319-4-git-send-email-andi@firstfloor.org Signed-off-by: H. Peter Anvin --- drivers/pnp/pnpbios/bioscalls.c | 2 +- init/main.c | 2 +- kernel/context_tracking.c | 2 +- kernel/locking/lockdep.c | 2 +- kernel/power/snapshot.c | 2 +- kernel/printk/printk.c | 4 ++-- kernel/sched/core.c | 10 +++++----- kernel/softirq.c | 4 ++-- lib/dump_stack.c | 4 ++-- 9 files changed, 16 insertions(+), 16 deletions(-) (limited to 'kernel') diff --git a/drivers/pnp/pnpbios/bioscalls.c b/drivers/pnp/pnpbios/bioscalls.c index deb7f4bcdb7b..438d4c72c7b3 100644 --- a/drivers/pnp/pnpbios/bioscalls.c +++ b/drivers/pnp/pnpbios/bioscalls.c @@ -37,7 +37,7 @@ __visible struct { * kernel begins at offset 3GB... */ -asmlinkage void pnp_bios_callfunc(void); +asmlinkage __visible void pnp_bios_callfunc(void); __asm__(".text \n" __ALIGN_STR "\n" diff --git a/init/main.c b/init/main.c index 9c7fd4c9249f..48655ceb66f4 100644 --- a/init/main.c +++ b/init/main.c @@ -476,7 +476,7 @@ static void __init mm_init(void) vmalloc_init(); } -asmlinkage void __init start_kernel(void) +asmlinkage __visible void __init start_kernel(void) { char * command_line; extern const struct kernel_param __start___param[], __stop___param[]; diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index 6cb20d2e7ee0..019d45008448 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c @@ -120,7 +120,7 @@ void context_tracking_user_enter(void) * instead of preempt_schedule() to exit user context if needed before * calling the scheduler. */ -asmlinkage void __sched notrace preempt_schedule_context(void) +asmlinkage __visible void __sched notrace preempt_schedule_context(void) { enum ctx_state prev_ctx; diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index b0e9467922e1..d24e4339b46d 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -4188,7 +4188,7 @@ void debug_show_held_locks(struct task_struct *task) } EXPORT_SYMBOL_GPL(debug_show_held_locks); -asmlinkage void lockdep_sys_exit(void) +asmlinkage __visible void lockdep_sys_exit(void) { struct task_struct *curr = current; diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 18fb7a2fb14b..1ea328aafdc9 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -1586,7 +1586,7 @@ swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm, return -ENOMEM; } -asmlinkage int swsusp_save(void) +asmlinkage __visible int swsusp_save(void) { unsigned int nr_pages, nr_highmem; diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index a45b50962295..7228258b85ec 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1674,7 +1674,7 @@ EXPORT_SYMBOL(printk_emit); * * See the vsnprintf() documentation for format string extensions over C99. */ -asmlinkage int printk(const char *fmt, ...) +asmlinkage __visible int printk(const char *fmt, ...) { va_list args; int r; @@ -1737,7 +1737,7 @@ void early_vprintk(const char *fmt, va_list ap) } } -asmlinkage void early_printk(const char *fmt, ...) +asmlinkage __visible void early_printk(const char *fmt, ...) { va_list ap; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 268a45ea238c..d9d8ece46a15 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2192,7 +2192,7 @@ static inline void post_schedule(struct rq *rq) * schedule_tail - first thing a freshly forked thread must call. * @prev: the thread we just switched away from. */ -asmlinkage void schedule_tail(struct task_struct *prev) +asmlinkage __visible void schedule_tail(struct task_struct *prev) __releases(rq->lock) { struct rq *rq = this_rq(); @@ -2741,7 +2741,7 @@ static inline void sched_submit_work(struct task_struct *tsk) blk_schedule_flush_plug(tsk); } -asmlinkage void __sched schedule(void) +asmlinkage __visible void __sched schedule(void) { struct task_struct *tsk = current; @@ -2751,7 +2751,7 @@ asmlinkage void __sched schedule(void) EXPORT_SYMBOL(schedule); #ifdef CONFIG_CONTEXT_TRACKING -asmlinkage void __sched schedule_user(void) +asmlinkage __visible void __sched schedule_user(void) { /* * If we come here after a random call to set_need_resched(), @@ -2783,7 +2783,7 @@ void __sched schedule_preempt_disabled(void) * off of preempt_enable. Kernel preemptions off return from interrupt * occur there and call schedule directly. */ -asmlinkage void __sched notrace preempt_schedule(void) +asmlinkage __visible void __sched notrace preempt_schedule(void) { /* * If there is a non-zero preempt_count or interrupts are disabled, @@ -2813,7 +2813,7 @@ EXPORT_SYMBOL(preempt_schedule); * Note, that this is called and return with irqs disabled. This will * protect us against recursive calling from irq. */ -asmlinkage void __sched preempt_schedule_irq(void) +asmlinkage __visible void __sched preempt_schedule_irq(void) { enum ctx_state prev_state; diff --git a/kernel/softirq.c b/kernel/softirq.c index 33e4648ae0e7..92f24f5e8d52 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -223,7 +223,7 @@ static inline bool lockdep_softirq_start(void) { return false; } static inline void lockdep_softirq_end(bool in_hardirq) { } #endif -asmlinkage void __do_softirq(void) +asmlinkage __visible void __do_softirq(void) { unsigned long end = jiffies + MAX_SOFTIRQ_TIME; unsigned long old_flags = current->flags; @@ -299,7 +299,7 @@ restart: tsk_restore_flags(current, old_flags, PF_MEMALLOC); } -asmlinkage void do_softirq(void) +asmlinkage __visible void do_softirq(void) { __u32 pending; unsigned long flags; diff --git a/lib/dump_stack.c b/lib/dump_stack.c index f23b63f0a1c3..6745c6230db3 100644 --- a/lib/dump_stack.c +++ b/lib/dump_stack.c @@ -23,7 +23,7 @@ static void __dump_stack(void) #ifdef CONFIG_SMP static atomic_t dump_lock = ATOMIC_INIT(-1); -asmlinkage void dump_stack(void) +asmlinkage __visible void dump_stack(void) { int was_locked; int old; @@ -55,7 +55,7 @@ retry: preempt_enable(); } #else -asmlinkage void dump_stack(void) +asmlinkage __visible void dump_stack(void) { __dump_stack(); } -- cgit v1.2.3 From 8058bd0faad860e75547cc5cb5d4ade016247a79 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 8 May 2014 07:47:49 -0400 Subject: tracepoint: Fix use of tracepoint funcs after rcu free Commit de7b2973903c "tracepoint: Use struct pointer instead of name hash for reg/unreg tracepoints" introduces a use after free by calling release_probes on the old struct tracepoint array before the newly allocated array is published with rcu_assign_pointer. There is a race window where tracepoints (RCU readers) can perform a "use-after-grace-period-after-free", which shows up as a GPF in stress-tests. Link: http://lkml.kernel.org/r/53698021.5020108@oracle.com Link: http://lkml.kernel.org/p/1399549669-25465-1-git-send-email-mathieu.desnoyers@efficios.com Reported-by: Sasha Levin CC: Oleg Nesterov CC: Dave Jones Fixes: de7b2973903c "tracepoint: Use struct pointer instead of name hash for reg/unreg tracepoints" Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt --- kernel/tracepoint.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index ac5b23cf7212..6620e5837ce2 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -188,7 +188,6 @@ static int tracepoint_add_func(struct tracepoint *tp, WARN_ON_ONCE(1); return PTR_ERR(old); } - release_probes(old); /* * rcu_assign_pointer has a smp_wmb() which makes sure that the new @@ -200,6 +199,7 @@ static int tracepoint_add_func(struct tracepoint *tp, rcu_assign_pointer(tp->funcs, tp_funcs); if (!static_key_enabled(&tp->key)) static_key_slow_inc(&tp->key); + release_probes(old); return 0; } @@ -221,7 +221,6 @@ static int tracepoint_remove_func(struct tracepoint *tp, WARN_ON_ONCE(1); return PTR_ERR(old); } - release_probes(old); if (!tp_funcs) { /* Removed last function */ @@ -232,6 +231,7 @@ static int tracepoint_remove_func(struct tracepoint *tp, static_key_slow_dec(&tp->key); } rcu_assign_pointer(tp->funcs, tp_funcs); + release_probes(old); return 0; } -- cgit v1.2.3 From 1f0b63866fc1be700260547be8edf8e6f0af37f2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 15 May 2014 23:29:57 +0200 Subject: ACPI / PM: Hold ACPI scan lock over the "freeze" sleep state The "freeze" sleep state suffers from the same issue that was addressed by commit ad07277e82de (ACPI / PM: Hold acpi_scan_lock over system PM transitions) for ACPI sleep states, that is, things break if ->remove() is called for devices whose system resume callbacks haven't been executed yet. It also can be addressed in the same way, by holding the ACPI scan lock over the "freeze" sleep state and PM transitions to and from that state, but ->begin() and ->end() platform operations for the "freeze" sleep state are needed for this purpose. This change has been tested on Acer Aspire S5 with Thunderbolt. Signed-off-by: Rafael J. Wysocki --- drivers/acpi/sleep.c | 18 ++++++++++++++++++ include/linux/suspend.h | 7 +++++++ kernel/power/suspend.c | 15 +++++++++++++++ 3 files changed, 40 insertions(+) (limited to 'kernel') diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 2281ca31c1bc..c11e3795431b 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -612,6 +612,22 @@ static const struct platform_suspend_ops acpi_suspend_ops_old = { .recover = acpi_pm_finish, }; +static int acpi_freeze_begin(void) +{ + acpi_scan_lock_acquire(); + return 0; +} + +static void acpi_freeze_end(void) +{ + acpi_scan_lock_release(); +} + +static const struct platform_freeze_ops acpi_freeze_ops = { + .begin = acpi_freeze_begin, + .end = acpi_freeze_end, +}; + static void acpi_sleep_suspend_setup(void) { int i; @@ -622,7 +638,9 @@ static void acpi_sleep_suspend_setup(void) suspend_set_ops(old_suspend_ordering ? &acpi_suspend_ops_old : &acpi_suspend_ops); + freeze_set_ops(&acpi_freeze_ops); } + #else /* !CONFIG_SUSPEND */ static inline void acpi_sleep_suspend_setup(void) {} #endif /* !CONFIG_SUSPEND */ diff --git a/include/linux/suspend.h b/include/linux/suspend.h index f73cabf59012..91d66fd8dce1 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -187,6 +187,11 @@ struct platform_suspend_ops { void (*recover)(void); }; +struct platform_freeze_ops { + int (*begin)(void); + void (*end)(void); +}; + #ifdef CONFIG_SUSPEND /** * suspend_set_ops - set platform dependent suspend operations @@ -194,6 +199,7 @@ struct platform_suspend_ops { */ extern void suspend_set_ops(const struct platform_suspend_ops *ops); extern int suspend_valid_only_mem(suspend_state_t state); +extern void freeze_set_ops(const struct platform_freeze_ops *ops); extern void freeze_wake(void); /** @@ -220,6 +226,7 @@ extern int pm_suspend(suspend_state_t state); static inline void suspend_set_ops(const struct platform_suspend_ops *ops) {} static inline int pm_suspend(suspend_state_t state) { return -ENOSYS; } +static inline void freeze_set_ops(const struct platform_freeze_ops *ops) {} static inline void freeze_wake(void) {} #endif /* !CONFIG_SUSPEND */ diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 8233cd4047d7..73a905f83972 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -38,6 +38,7 @@ const char *const pm_states[PM_SUSPEND_MAX] = { }; static const struct platform_suspend_ops *suspend_ops; +static const struct platform_freeze_ops *freeze_ops; static bool need_suspend_ops(suspend_state_t state) { @@ -47,6 +48,13 @@ static bool need_suspend_ops(suspend_state_t state) static DECLARE_WAIT_QUEUE_HEAD(suspend_freeze_wait_head); static bool suspend_freeze_wake; +void freeze_set_ops(const struct platform_freeze_ops *ops) +{ + lock_system_sleep(); + freeze_ops = ops; + unlock_system_sleep(); +} + static void freeze_begin(void) { suspend_freeze_wake = false; @@ -269,6 +277,10 @@ int suspend_devices_and_enter(suspend_state_t state) error = suspend_ops->begin(state); if (error) goto Close; + } else if (state == PM_SUSPEND_FREEZE && freeze_ops->begin) { + error = freeze_ops->begin(); + if (error) + goto Close; } suspend_console(); suspend_test_start(); @@ -294,6 +306,9 @@ int suspend_devices_and_enter(suspend_state_t state) Close: if (need_suspend_ops(state) && suspend_ops->end) suspend_ops->end(); + else if (state == PM_SUSPEND_FREEZE && freeze_ops->end) + freeze_ops->end(); + trace_machine_suspend(PWR_EVENT_EXIT); return error; -- cgit v1.2.3