summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2017-10-10 06:11:09 +0300
committerDavid S. Miller <davem@davemloft.net>2017-10-10 06:11:09 +0300
commitd93fa2ba64384a0bbee4ae7409d85fccb2cfcf14 (patch)
tree8e55d9d74f46ce13e4299d23c23c72b9306798c1 /kernel
parentd0e60206bea2dec46c0a28fd6b116646aa67c5ae (diff)
parent529a86e063e9ff625c4ff247d8aa17d8072444fb (diff)
downloadlinux-d93fa2ba64384a0bbee4ae7409d85fccb2cfcf14.tar.xz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/inode.c1
-rw-r--r--kernel/bpf/verifier.c5
-rw-r--r--kernel/cpu.c6
-rw-r--r--kernel/smpboot.c25
-rw-r--r--kernel/sysctl.c22
-rw-r--r--kernel/watchdog.c643
-rw-r--r--kernel/watchdog_hld.c196
7 files changed, 378 insertions, 520 deletions
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index e833ed914358..be1dde967208 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -363,6 +363,7 @@ out:
putname(pname);
return ret;
}
+EXPORT_SYMBOL_GPL(bpf_obj_get_user);
static void bpf_evict_inode(struct inode *inode)
{
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 590125e29161..6352a88ca6d1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -704,6 +704,10 @@ static void mark_reg_read(const struct bpf_verifier_state *state, u32 regno)
{
struct bpf_verifier_state *parent = state->parent;
+ if (regno == BPF_REG_FP)
+ /* We don't need to worry about FP liveness because it's read-only */
+ return;
+
while (parent) {
/* if read wasn't screened by an earlier write ... */
if (state->regs[regno].live & REG_LIVE_WRITTEN)
@@ -2402,6 +2406,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
* copy register state to dest reg
*/
regs[insn->dst_reg] = regs[insn->src_reg];
+ regs[insn->dst_reg].live |= REG_LIVE_WRITTEN;
} else {
/* R1 = (u32) R2 */
if (is_pointer_value(env, insn->src_reg)) {
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 8de11a29e495..d851df22f5c5 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -24,6 +24,7 @@
#include <linux/lockdep.h>
#include <linux/tick.h>
#include <linux/irq.h>
+#include <linux/nmi.h>
#include <linux/smpboot.h>
#include <linux/relay.h>
#include <linux/slab.h>
@@ -897,6 +898,11 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
out:
cpus_write_unlock();
+ /*
+ * Do post unplug cleanup. This is still protected against
+ * concurrent CPU hotplug via cpu_add_remove_lock.
+ */
+ lockup_detector_cleanup();
return ret;
}
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index 1d71c051a951..5043e7433f4b 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -344,39 +344,30 @@ EXPORT_SYMBOL_GPL(smpboot_unregister_percpu_thread);
* by the client, but only by calling this function.
* This function can only be called on a registered smp_hotplug_thread.
*/
-int smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread,
- const struct cpumask *new)
+void smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread,
+ const struct cpumask *new)
{
struct cpumask *old = plug_thread->cpumask;
- cpumask_var_t tmp;
+ static struct cpumask tmp;
unsigned int cpu;
- if (!alloc_cpumask_var(&tmp, GFP_KERNEL))
- return -ENOMEM;
-
- get_online_cpus();
+ lockdep_assert_cpus_held();
mutex_lock(&smpboot_threads_lock);
/* Park threads that were exclusively enabled on the old mask. */
- cpumask_andnot(tmp, old, new);
- for_each_cpu_and(cpu, tmp, cpu_online_mask)
+ cpumask_andnot(&tmp, old, new);
+ for_each_cpu_and(cpu, &tmp, cpu_online_mask)
smpboot_park_thread(plug_thread, cpu);
/* Unpark threads that are exclusively enabled on the new mask. */
- cpumask_andnot(tmp, new, old);
- for_each_cpu_and(cpu, tmp, cpu_online_mask)
+ cpumask_andnot(&tmp, new, old);
+ for_each_cpu_and(cpu, &tmp, cpu_online_mask)
smpboot_unpark_thread(plug_thread, cpu);
cpumask_copy(old, new);
mutex_unlock(&smpboot_threads_lock);
- put_online_cpus();
-
- free_cpumask_var(tmp);
-
- return 0;
}
-EXPORT_SYMBOL_GPL(smpboot_update_cpumask_percpu_thread);
static DEFINE_PER_CPU(atomic_t, cpu_hotplug_state) = ATOMIC_INIT(CPU_POST_DEAD);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 4da9e622471f..d9c31bc2eaea 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -872,9 +872,9 @@ static struct ctl_table kern_table[] = {
#if defined(CONFIG_LOCKUP_DETECTOR)
{
.procname = "watchdog",
- .data = &watchdog_user_enabled,
- .maxlen = sizeof (int),
- .mode = 0644,
+ .data = &watchdog_user_enabled,
+ .maxlen = sizeof(int),
+ .mode = 0644,
.proc_handler = proc_watchdog,
.extra1 = &zero,
.extra2 = &one,
@@ -890,16 +890,12 @@ static struct ctl_table kern_table[] = {
},
{
.procname = "nmi_watchdog",
- .data = &nmi_watchdog_enabled,
- .maxlen = sizeof (int),
- .mode = 0644,
+ .data = &nmi_watchdog_user_enabled,
+ .maxlen = sizeof(int),
+ .mode = NMI_WATCHDOG_SYSCTL_PERM,
.proc_handler = proc_nmi_watchdog,
.extra1 = &zero,
-#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
.extra2 = &one,
-#else
- .extra2 = &zero,
-#endif
},
{
.procname = "watchdog_cpumask",
@@ -911,9 +907,9 @@ static struct ctl_table kern_table[] = {
#ifdef CONFIG_SOFTLOCKUP_DETECTOR
{
.procname = "soft_watchdog",
- .data = &soft_watchdog_enabled,
- .maxlen = sizeof (int),
- .mode = 0644,
+ .data = &soft_watchdog_user_enabled,
+ .maxlen = sizeof(int),
+ .mode = 0644,
.proc_handler = proc_soft_watchdog,
.extra1 = &zero,
.extra2 = &one,
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index f5d52024f6b7..6bcb854909c0 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -29,20 +29,29 @@
#include <linux/kvm_para.h>
#include <linux/kthread.h>
-/* Watchdog configuration */
-static DEFINE_MUTEX(watchdog_proc_mutex);
-
-int __read_mostly nmi_watchdog_enabled;
+static DEFINE_MUTEX(watchdog_mutex);
#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG)
-unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED |
- NMI_WATCHDOG_ENABLED;
+# define WATCHDOG_DEFAULT (SOFT_WATCHDOG_ENABLED | NMI_WATCHDOG_ENABLED)
+# define NMI_WATCHDOG_DEFAULT 1
#else
-unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED;
+# define WATCHDOG_DEFAULT (SOFT_WATCHDOG_ENABLED)
+# define NMI_WATCHDOG_DEFAULT 0
#endif
+unsigned long __read_mostly watchdog_enabled;
+int __read_mostly watchdog_user_enabled = 1;
+int __read_mostly nmi_watchdog_user_enabled = NMI_WATCHDOG_DEFAULT;
+int __read_mostly soft_watchdog_user_enabled = 1;
+int __read_mostly watchdog_thresh = 10;
+int __read_mostly nmi_watchdog_available;
+
+struct cpumask watchdog_allowed_mask __read_mostly;
+
+struct cpumask watchdog_cpumask __read_mostly;
+unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
+
#ifdef CONFIG_HARDLOCKUP_DETECTOR
-/* boot commands */
/*
* Should we panic when a soft-lockup or hard-lockup occurs:
*/
@@ -56,9 +65,9 @@ unsigned int __read_mostly hardlockup_panic =
* kernel command line parameters are parsed, because otherwise it is not
* possible to override this in hardlockup_panic_setup().
*/
-void hardlockup_detector_disable(void)
+void __init hardlockup_detector_disable(void)
{
- watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
+ nmi_watchdog_user_enabled = 0;
}
static int __init hardlockup_panic_setup(char *str)
@@ -68,48 +77,24 @@ static int __init hardlockup_panic_setup(char *str)
else if (!strncmp(str, "nopanic", 7))
hardlockup_panic = 0;
else if (!strncmp(str, "0", 1))
- watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
+ nmi_watchdog_user_enabled = 0;
else if (!strncmp(str, "1", 1))
- watchdog_enabled |= NMI_WATCHDOG_ENABLED;
+ nmi_watchdog_user_enabled = 1;
return 1;
}
__setup("nmi_watchdog=", hardlockup_panic_setup);
-#endif
-
-#ifdef CONFIG_SOFTLOCKUP_DETECTOR
-int __read_mostly soft_watchdog_enabled;
-#endif
-
-int __read_mostly watchdog_user_enabled;
-int __read_mostly watchdog_thresh = 10;
-
-#ifdef CONFIG_SMP
-int __read_mostly sysctl_softlockup_all_cpu_backtrace;
+# ifdef CONFIG_SMP
int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
-#endif
-struct cpumask watchdog_cpumask __read_mostly;
-unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
-/*
- * The 'watchdog_running' variable is set to 1 when the watchdog threads
- * are registered/started and is set to 0 when the watchdog threads are
- * unregistered/stopped, so it is an indicator whether the threads exist.
- */
-static int __read_mostly watchdog_running;
-/*
- * If a subsystem has a need to deactivate the watchdog temporarily, it
- * can use the suspend/resume interface to achieve this. The content of
- * the 'watchdog_suspended' variable reflects this state. Existing threads
- * are parked/unparked by the lockup_detector_{suspend|resume} functions
- * (see comment blocks pertaining to those functions for further details).
- *
- * 'watchdog_suspended' also prevents threads from being registered/started
- * or unregistered/stopped via parameters in /proc/sys/kernel, so the state
- * of 'watchdog_running' cannot change while the watchdog is deactivated
- * temporarily (see related code in 'proc' handlers).
- */
-int __read_mostly watchdog_suspended;
+static int __init hardlockup_all_cpu_backtrace_setup(char *str)
+{
+ sysctl_hardlockup_all_cpu_backtrace = !!simple_strtol(str, NULL, 0);
+ return 1;
+}
+__setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup);
+# endif /* CONFIG_SMP */
+#endif /* CONFIG_HARDLOCKUP_DETECTOR */
/*
* These functions can be overridden if an architecture implements its
@@ -121,36 +106,68 @@ int __read_mostly watchdog_suspended;
*/
int __weak watchdog_nmi_enable(unsigned int cpu)
{
+ hardlockup_detector_perf_enable();
return 0;
}
+
void __weak watchdog_nmi_disable(unsigned int cpu)
{
+ hardlockup_detector_perf_disable();
}
-/*
- * watchdog_nmi_reconfigure can be implemented to be notified after any
- * watchdog configuration change. The arch hardlockup watchdog should
- * respond to the following variables:
- * - nmi_watchdog_enabled
+/* Return 0, if a NMI watchdog is available. Error code otherwise */
+int __weak __init watchdog_nmi_probe(void)
+{
+ return hardlockup_detector_perf_init();
+}
+
+/**
+ * watchdog_nmi_stop - Stop the watchdog for reconfiguration
+ *
+ * The reconfiguration steps are:
+ * watchdog_nmi_stop();
+ * update_variables();
+ * watchdog_nmi_start();
+ */
+void __weak watchdog_nmi_stop(void) { }
+
+/**
+ * watchdog_nmi_start - Start the watchdog after reconfiguration
+ *
+ * Counterpart to watchdog_nmi_stop().
+ *
+ * The following variables have been updated in update_variables() and
+ * contain the currently valid configuration:
+ * - watchdog_enabled
* - watchdog_thresh
* - watchdog_cpumask
- * - sysctl_hardlockup_all_cpu_backtrace
- * - hardlockup_panic
- * - watchdog_suspended
*/
-void __weak watchdog_nmi_reconfigure(void)
+void __weak watchdog_nmi_start(void) { }
+
+/**
+ * lockup_detector_update_enable - Update the sysctl enable bit
+ *
+ * Caller needs to make sure that the NMI/perf watchdogs are off, so this
+ * can't race with watchdog_nmi_disable().
+ */
+static void lockup_detector_update_enable(void)
{
+ watchdog_enabled = 0;
+ if (!watchdog_user_enabled)
+ return;
+ if (nmi_watchdog_available && nmi_watchdog_user_enabled)
+ watchdog_enabled |= NMI_WATCHDOG_ENABLED;
+ if (soft_watchdog_user_enabled)
+ watchdog_enabled |= SOFT_WATCHDOG_ENABLED;
}
-
#ifdef CONFIG_SOFTLOCKUP_DETECTOR
-/* Helper for online, unparked cpus. */
-#define for_each_watchdog_cpu(cpu) \
- for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask)
-
-atomic_t watchdog_park_in_progress = ATOMIC_INIT(0);
+/* Global variables, exported for sysctl */
+unsigned int __read_mostly softlockup_panic =
+ CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
+static bool softlockup_threads_initialized __read_mostly;
static u64 __read_mostly sample_period;
static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
@@ -164,50 +181,40 @@ static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved);
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
static unsigned long soft_lockup_nmi_warn;
-unsigned int __read_mostly softlockup_panic =
- CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
-
static int __init softlockup_panic_setup(char *str)
{
softlockup_panic = simple_strtoul(str, NULL, 0);
-
return 1;
}
__setup("softlockup_panic=", softlockup_panic_setup);
static int __init nowatchdog_setup(char *str)
{
- watchdog_enabled = 0;
+ watchdog_user_enabled = 0;
return 1;
}
__setup("nowatchdog", nowatchdog_setup);
static int __init nosoftlockup_setup(char *str)
{
- watchdog_enabled &= ~SOFT_WATCHDOG_ENABLED;
+ soft_watchdog_user_enabled = 0;
return 1;
}
__setup("nosoftlockup", nosoftlockup_setup);
#ifdef CONFIG_SMP
+int __read_mostly sysctl_softlockup_all_cpu_backtrace;
+
static int __init softlockup_all_cpu_backtrace_setup(char *str)
{
- sysctl_softlockup_all_cpu_backtrace =
- !!simple_strtol(str, NULL, 0);
+ sysctl_softlockup_all_cpu_backtrace = !!simple_strtol(str, NULL, 0);
return 1;
}
__setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup);
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
-static int __init hardlockup_all_cpu_backtrace_setup(char *str)
-{
- sysctl_hardlockup_all_cpu_backtrace =
- !!simple_strtol(str, NULL, 0);
- return 1;
-}
-__setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup);
-#endif
#endif
+static void __lockup_detector_cleanup(void);
+
/*
* Hard-lockup warnings should be triggered after just a few seconds. Soft-
* lockups can have false positives under extreme conditions. So we generally
@@ -278,11 +285,15 @@ void touch_all_softlockup_watchdogs(void)
int cpu;
/*
- * this is done lockless
- * do we care if a 0 races with a timestamp?
- * all it means is the softlock check starts one cycle later
+ * watchdog_mutex cannpt be taken here, as this might be called
+ * from (soft)interrupt context, so the access to
+ * watchdog_allowed_cpumask might race with a concurrent update.
+ *
+ * The watchdog time stamp can race against a concurrent real
+ * update as well, the only side effect might be a cycle delay for
+ * the softlockup check.
*/
- for_each_watchdog_cpu(cpu)
+ for_each_cpu(cpu, &watchdog_allowed_mask)
per_cpu(watchdog_touch_ts, cpu) = 0;
wq_watchdog_touch(-1);
}
@@ -322,9 +333,6 @@ static void watchdog_interrupt_count(void)
__this_cpu_inc(hrtimer_interrupts);
}
-static int watchdog_enable_all_cpus(void);
-static void watchdog_disable_all_cpus(void);
-
/* watchdog kicker functions */
static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
{
@@ -333,7 +341,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
int duration;
int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
- if (atomic_read(&watchdog_park_in_progress) != 0)
+ if (!watchdog_enabled)
return HRTIMER_NORESTART;
/* kick the hardlockup detector */
@@ -447,32 +455,38 @@ static void watchdog_set_prio(unsigned int policy, unsigned int prio)
static void watchdog_enable(unsigned int cpu)
{
- struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer);
+ struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
- /* kick off the timer for the hardlockup detector */
+ /*
+ * Start the timer first to prevent the NMI watchdog triggering
+ * before the timer has a chance to fire.
+ */
hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
hrtimer->function = watchdog_timer_fn;
-
- /* Enable the perf event */
- watchdog_nmi_enable(cpu);
-
- /* done here because hrtimer_start can only pin to smp_processor_id() */
hrtimer_start(hrtimer, ns_to_ktime(sample_period),
HRTIMER_MODE_REL_PINNED);
- /* initialize timestamp */
- watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1);
+ /* Initialize timestamp */
__touch_watchdog();
+ /* Enable the perf event */
+ if (watchdog_enabled & NMI_WATCHDOG_ENABLED)
+ watchdog_nmi_enable(cpu);
+
+ watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1);
}
static void watchdog_disable(unsigned int cpu)
{
- struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer);
+ struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
watchdog_set_prio(SCHED_NORMAL, 0);
- hrtimer_cancel(hrtimer);
- /* disable the perf event */
+ /*
+ * Disable the perf event first. That prevents that a large delay
+ * between disabling the timer and disabling the perf event causes
+ * the perf NMI to detect a false positive.
+ */
watchdog_nmi_disable(cpu);
+ hrtimer_cancel(hrtimer);
}
static void watchdog_cleanup(unsigned int cpu, bool online)
@@ -499,21 +513,6 @@ static void watchdog(unsigned int cpu)
__this_cpu_write(soft_lockup_hrtimer_cnt,
__this_cpu_read(hrtimer_interrupts));
__touch_watchdog();
-
- /*
- * watchdog_nmi_enable() clears the NMI_WATCHDOG_ENABLED bit in the
- * failure path. Check for failures that can occur asynchronously -
- * for example, when CPUs are on-lined - and shut down the hardware
- * perf event on each CPU accordingly.
- *
- * The only non-obvious place this bit can be cleared is through
- * watchdog_nmi_enable(), so a pr_info() is placed there. Placing a
- * pr_info here would be too noisy as it would result in a message
- * every few seconds if the hardlockup was disabled but the softlockup
- * enabled.
- */
- if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
- watchdog_nmi_disable(cpu);
}
static struct smp_hotplug_thread watchdog_threads = {
@@ -527,295 +526,174 @@ static struct smp_hotplug_thread watchdog_threads = {
.unpark = watchdog_enable,
};
-/*
- * park all watchdog threads that are specified in 'watchdog_cpumask'
- *
- * This function returns an error if kthread_park() of a watchdog thread
- * fails. In this situation, the watchdog threads of some CPUs can already
- * be parked and the watchdog threads of other CPUs can still be runnable.
- * Callers are expected to handle this special condition as appropriate in
- * their context.
- *
- * This function may only be called in a context that is protected against
- * races with CPU hotplug - for example, via get_online_cpus().
- */
-static int watchdog_park_threads(void)
+static void softlockup_update_smpboot_threads(void)
{
- int cpu, ret = 0;
+ lockdep_assert_held(&watchdog_mutex);
- atomic_set(&watchdog_park_in_progress, 1);
+ if (!softlockup_threads_initialized)
+ return;
- for_each_watchdog_cpu(cpu) {
- ret = kthread_park(per_cpu(softlockup_watchdog, cpu));
- if (ret)
- break;
- }
-
- atomic_set(&watchdog_park_in_progress, 0);
-
- return ret;
+ smpboot_update_cpumask_percpu_thread(&watchdog_threads,
+ &watchdog_allowed_mask);
}
-/*
- * unpark all watchdog threads that are specified in 'watchdog_cpumask'
- *
- * This function may only be called in a context that is protected against
- * races with CPU hotplug - for example, via get_online_cpus().
- */
-static void watchdog_unpark_threads(void)
+/* Temporarily park all watchdog threads */
+static void softlockup_park_all_threads(void)
{
- int cpu;
-
- for_each_watchdog_cpu(cpu)
- kthread_unpark(per_cpu(softlockup_watchdog, cpu));
+ cpumask_clear(&watchdog_allowed_mask);
+ softlockup_update_smpboot_threads();
}
-static int update_watchdog_all_cpus(void)
+/* Unpark enabled threads */
+static void softlockup_unpark_threads(void)
{
- int ret;
-
- ret = watchdog_park_threads();
- if (ret)
- return ret;
-
- watchdog_unpark_threads();
-
- return 0;
+ cpumask_copy(&watchdog_allowed_mask, &watchdog_cpumask);
+ softlockup_update_smpboot_threads();
}
-static int watchdog_enable_all_cpus(void)
+static void lockup_detector_reconfigure(void)
{
- int err = 0;
-
- if (!watchdog_running) {
- err = smpboot_register_percpu_thread_cpumask(&watchdog_threads,
- &watchdog_cpumask);
- if (err)
- pr_err("Failed to create watchdog threads, disabled\n");
- else
- watchdog_running = 1;
- } else {
- /*
- * Enable/disable the lockup detectors or
- * change the sample period 'on the fly'.
- */
- err = update_watchdog_all_cpus();
-
- if (err) {
- watchdog_disable_all_cpus();
- pr_err("Failed to update lockup detectors, disabled\n");
- }
- }
-
- if (err)
- watchdog_enabled = 0;
-
- return err;
+ cpus_read_lock();
+ watchdog_nmi_stop();
+ softlockup_park_all_threads();
+ set_sample_period();
+ lockup_detector_update_enable();
+ if (watchdog_enabled && watchdog_thresh)
+ softlockup_unpark_threads();
+ watchdog_nmi_start();
+ cpus_read_unlock();
+ /*
+ * Must be called outside the cpus locked section to prevent
+ * recursive locking in the perf code.
+ */
+ __lockup_detector_cleanup();
}
-static void watchdog_disable_all_cpus(void)
+/*
+ * Create the watchdog thread infrastructure and configure the detector(s).
+ *
+ * The threads are not unparked as watchdog_allowed_mask is empty. When
+ * the threads are sucessfully initialized, take the proper locks and
+ * unpark the threads in the watchdog_cpumask if the watchdog is enabled.
+ */
+static __init void lockup_detector_setup(void)
{
- if (watchdog_running) {
- watchdog_running = 0;
- smpboot_unregister_percpu_thread(&watchdog_threads);
- }
-}
+ int ret;
-#ifdef CONFIG_SYSCTL
-static int watchdog_update_cpus(void)
-{
- return smpboot_update_cpumask_percpu_thread(
- &watchdog_threads, &watchdog_cpumask);
-}
-#endif
+ /*
+ * If sysctl is off and watchdog got disabled on the command line,
+ * nothing to do here.
+ */
+ lockup_detector_update_enable();
-#else /* SOFTLOCKUP */
-static int watchdog_park_threads(void)
-{
- return 0;
-}
+ if (!IS_ENABLED(CONFIG_SYSCTL) &&
+ !(watchdog_enabled && watchdog_thresh))
+ return;
-static void watchdog_unpark_threads(void)
-{
-}
+ ret = smpboot_register_percpu_thread_cpumask(&watchdog_threads,
+ &watchdog_allowed_mask);
+ if (ret) {
+ pr_err("Failed to initialize soft lockup detector threads\n");
+ return;
+ }
-static int watchdog_enable_all_cpus(void)
-{
- return 0;
+ mutex_lock(&watchdog_mutex);
+ softlockup_threads_initialized = true;
+ lockup_detector_reconfigure();
+ mutex_unlock(&watchdog_mutex);
}
-static void watchdog_disable_all_cpus(void)
+#else /* CONFIG_SOFTLOCKUP_DETECTOR */
+static inline int watchdog_park_threads(void) { return 0; }
+static inline void watchdog_unpark_threads(void) { }
+static inline int watchdog_enable_all_cpus(void) { return 0; }
+static inline void watchdog_disable_all_cpus(void) { }
+static void lockup_detector_reconfigure(void)
{
+ cpus_read_lock();
+ watchdog_nmi_stop();
+ lockup_detector_update_enable();
+ watchdog_nmi_start();
+ cpus_read_unlock();
}
-
-#ifdef CONFIG_SYSCTL
-static int watchdog_update_cpus(void)
+static inline void lockup_detector_setup(void)
{
- return 0;
+ lockup_detector_reconfigure();
}
-#endif
+#endif /* !CONFIG_SOFTLOCKUP_DETECTOR */
-static void set_sample_period(void)
+static void __lockup_detector_cleanup(void)
{
+ lockdep_assert_held(&watchdog_mutex);
+ hardlockup_detector_perf_cleanup();
}
-#endif /* SOFTLOCKUP */
-/*
- * Suspend the hard and soft lockup detector by parking the watchdog threads.
+/**
+ * lockup_detector_cleanup - Cleanup after cpu hotplug or sysctl changes
+ *
+ * Caller must not hold the cpu hotplug rwsem.
*/
-int lockup_detector_suspend(void)
+void lockup_detector_cleanup(void)
{
- int ret = 0;
-
- get_online_cpus();
- mutex_lock(&watchdog_proc_mutex);
- /*
- * Multiple suspend requests can be active in parallel (counted by
- * the 'watchdog_suspended' variable). If the watchdog threads are
- * running, the first caller takes care that they will be parked.
- * The state of 'watchdog_running' cannot change while a suspend
- * request is active (see related code in 'proc' handlers).
- */
- if (watchdog_running && !watchdog_suspended)
- ret = watchdog_park_threads();
-
- if (ret == 0)
- watchdog_suspended++;
- else {
- watchdog_disable_all_cpus();
- pr_err("Failed to suspend lockup detectors, disabled\n");
- watchdog_enabled = 0;
- }
-
- watchdog_nmi_reconfigure();
-
- mutex_unlock(&watchdog_proc_mutex);
-
- return ret;
+ mutex_lock(&watchdog_mutex);
+ __lockup_detector_cleanup();
+ mutex_unlock(&watchdog_mutex);
}
-/*
- * Resume the hard and soft lockup detector by unparking the watchdog threads.
+/**
+ * lockup_detector_soft_poweroff - Interface to stop lockup detector(s)
+ *
+ * Special interface for parisc. It prevents lockup detector warnings from
+ * the default pm_poweroff() function which busy loops forever.
*/
-void lockup_detector_resume(void)
+void lockup_detector_soft_poweroff(void)
{
- mutex_lock(&watchdog_proc_mutex);
-
- watchdog_suspended--;
- /*
- * The watchdog threads are unparked if they were previously running
- * and if there is no more active suspend request.
- */
- if (watchdog_running && !watchdog_suspended)
- watchdog_unpark_threads();
-
- watchdog_nmi_reconfigure();
-
- mutex_unlock(&watchdog_proc_mutex);
- put_online_cpus();
+ watchdog_enabled = 0;
}
#ifdef CONFIG_SYSCTL
-/*
- * Update the run state of the lockup detectors.
- */
-static int proc_watchdog_update(void)
+/* Propagate any changes to the watchdog threads */
+static void proc_watchdog_update(void)
{
- int err = 0;
-
- /*
- * Watchdog threads won't be started if they are already active.
- * The 'watchdog_running' variable in watchdog_*_all_cpus() takes
- * care of this. If those threads are already active, the sample
- * period will be updated and the lockup detectors will be enabled
- * or disabled 'on the fly'.
- */
- if (watchdog_enabled && watchdog_thresh)
- err = watchdog_enable_all_cpus();
- else
- watchdog_disable_all_cpus();
-
- watchdog_nmi_reconfigure();
-
- return err;
-
+ /* Remove impossible cpus to keep sysctl output clean. */
+ cpumask_and(&watchdog_cpumask, &watchdog_cpumask, cpu_possible_mask);
+ lockup_detector_reconfigure();
}
/*
* common function for watchdog, nmi_watchdog and soft_watchdog parameter
*
- * caller | table->data points to | 'which' contains the flag(s)
- * -------------------|-----------------------|-----------------------------
- * proc_watchdog | watchdog_user_enabled | NMI_WATCHDOG_ENABLED or'ed
- * | | with SOFT_WATCHDOG_ENABLED
- * -------------------|-----------------------|-----------------------------
- * proc_nmi_watchdog | nmi_watchdog_enabled | NMI_WATCHDOG_ENABLED
- * -------------------|-----------------------|-----------------------------
- * proc_soft_watchdog | soft_watchdog_enabled | SOFT_WATCHDOG_ENABLED
+ * caller | table->data points to | 'which'
+ * -------------------|----------------------------|--------------------------
+ * proc_watchdog | watchdog_user_enabled | NMI_WATCHDOG_ENABLED |
+ * | | SOFT_WATCHDOG_ENABLED
+ * -------------------|----------------------------|--------------------------
+ * proc_nmi_watchdog | nmi_watchdog_user_enabled | NMI_WATCHDOG_ENABLED
+ * -------------------|----------------------------|--------------------------
+ * proc_soft_watchdog | soft_watchdog_user_enabled | SOFT_WATCHDOG_ENABLED
*/
static int proc_watchdog_common(int which, struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- int err, old, new;
- int *watchdog_param = (int *)table->data;
+ int err, old, *param = table->data;
- get_online_cpus();
- mutex_lock(&watchdog_proc_mutex);
+ mutex_lock(&watchdog_mutex);
- if (watchdog_suspended) {
- /* no parameter changes allowed while watchdog is suspended */
- err = -EAGAIN;
- goto out;
- }
-
- /*
- * If the parameter is being read return the state of the corresponding
- * bit(s) in 'watchdog_enabled', else update 'watchdog_enabled' and the
- * run state of the lockup detectors.
- */
if (!write) {
- *watchdog_param = (watchdog_enabled & which) != 0;
+ /*
+ * On read synchronize the userspace interface. This is a
+ * racy snapshot.
+ */
+ *param = (watchdog_enabled & which) != 0;
err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
} else {
+ old = READ_ONCE(*param);
err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
- if (err)
- goto out;
-
- /*
- * There is a race window between fetching the current value
- * from 'watchdog_enabled' and storing the new value. During
- * this race window, watchdog_nmi_enable() can sneak in and
- * clear the NMI_WATCHDOG_ENABLED bit in 'watchdog_enabled'.
- * The 'cmpxchg' detects this race and the loop retries.
- */
- do {
- old = watchdog_enabled;
- /*
- * If the parameter value is not zero set the
- * corresponding bit(s), else clear it(them).
- */
- if (*watchdog_param)
- new = old | which;
- else
- new = old & ~which;
- } while (cmpxchg(&watchdog_enabled, old, new) != old);
-
- /*
- * Update the run state of the lockup detectors. There is _no_
- * need to check the value returned by proc_watchdog_update()
- * and to restore the previous value of 'watchdog_enabled' as
- * both lockup detectors are disabled if proc_watchdog_update()
- * returns an error.
- */
- if (old == new)
- goto out;
-
- err = proc_watchdog_update();
+ if (!err && old != READ_ONCE(*param))
+ proc_watchdog_update();
}
-out:
- mutex_unlock(&watchdog_proc_mutex);
- put_online_cpus();
+ mutex_unlock(&watchdog_mutex);
return err;
}
@@ -835,6 +713,8 @@ int proc_watchdog(struct ctl_table *table, int write,
int proc_nmi_watchdog(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
+ if (!nmi_watchdog_available && write)
+ return -ENOTSUPP;
return proc_watchdog_common(NMI_WATCHDOG_ENABLED,
table, write, buffer, lenp, ppos);
}
@@ -855,39 +735,17 @@ int proc_soft_watchdog(struct ctl_table *table, int write,
int proc_watchdog_thresh(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- int err, old, new;
-
- get_online_cpus();
- mutex_lock(&watchdog_proc_mutex);
+ int err, old;
- if (watchdog_suspended) {
- /* no parameter changes allowed while watchdog is suspended */
- err = -EAGAIN;
- goto out;
- }
+ mutex_lock(&watchdog_mutex);
- old = ACCESS_ONCE(watchdog_thresh);
+ old = READ_ONCE(watchdog_thresh);
err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
- if (err || !write)
- goto out;
-
- /*
- * Update the sample period. Restore on failure.
- */
- new = ACCESS_ONCE(watchdog_thresh);
- if (old == new)
- goto out;
+ if (!err && write && old != READ_ONCE(watchdog_thresh))
+ proc_watchdog_update();
- set_sample_period();
- err = proc_watchdog_update();
- if (err) {
- watchdog_thresh = old;
- set_sample_period();
- }
-out:
- mutex_unlock(&watchdog_proc_mutex);
- put_online_cpus();
+ mutex_unlock(&watchdog_mutex);
return err;
}
@@ -902,45 +760,19 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write,
{
int err;
- get_online_cpus();
- mutex_lock(&watchdog_proc_mutex);
-
- if (watchdog_suspended) {
- /* no parameter changes allowed while watchdog is suspended */
- err = -EAGAIN;
- goto out;
- }
+ mutex_lock(&watchdog_mutex);
err = proc_do_large_bitmap(table, write, buffer, lenp, ppos);
- if (!err && write) {
- /* Remove impossible cpus to keep sysctl output cleaner. */
- cpumask_and(&watchdog_cpumask, &watchdog_cpumask,
- cpu_possible_mask);
-
- if (watchdog_running) {
- /*
- * Failure would be due to being unable to allocate
- * a temporary cpumask, so we are likely not in a
- * position to do much else to make things better.
- */
- if (watchdog_update_cpus() != 0)
- pr_err("cpumask update failed\n");
- }
+ if (!err && write)
+ proc_watchdog_update();
- watchdog_nmi_reconfigure();
- }
-out:
- mutex_unlock(&watchdog_proc_mutex);
- put_online_cpus();
+ mutex_unlock(&watchdog_mutex);
return err;
}
-
#endif /* CONFIG_SYSCTL */
void __init lockup_detector_init(void)
{
- set_sample_period();
-
#ifdef CONFIG_NO_HZ_FULL
if (tick_nohz_full_enabled()) {
pr_info("Disabling watchdog on nohz_full cores by default\n");
@@ -951,6 +783,7 @@ void __init lockup_detector_init(void)
cpumask_copy(&watchdog_cpumask, cpu_possible_mask);
#endif
- if (watchdog_enabled)
- watchdog_enable_all_cpus();
+ if (!watchdog_nmi_probe())
+ nmi_watchdog_available = true;
+ lockup_detector_setup();
}
diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
index 3a09ea1b1d3d..71a62ceacdc8 100644
--- a/kernel/watchdog_hld.c
+++ b/kernel/watchdog_hld.c
@@ -21,8 +21,10 @@
static DEFINE_PER_CPU(bool, hard_watchdog_warn);
static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
+static struct cpumask dead_events_mask;
static unsigned long hardlockup_allcpu_dumped;
+static unsigned int watchdog_cpus;
void arch_touch_nmi_watchdog(void)
{
@@ -103,15 +105,12 @@ static struct perf_event_attr wd_hw_attr = {
/* Callback function for perf event subsystem */
static void watchdog_overflow_callback(struct perf_event *event,
- struct perf_sample_data *data,
- struct pt_regs *regs)
+ struct perf_sample_data *data,
+ struct pt_regs *regs)
{
/* Ensure the watchdog never gets throttled */
event->hw.interrupts = 0;
- if (atomic_read(&watchdog_park_in_progress) != 0)
- return;
-
if (__this_cpu_read(watchdog_nmi_touch) == true) {
__this_cpu_write(watchdog_nmi_touch, false);
return;
@@ -160,104 +159,131 @@ static void watchdog_overflow_callback(struct perf_event *event,
return;
}
-/*
- * People like the simple clean cpu node info on boot.
- * Reduce the watchdog noise by only printing messages
- * that are different from what cpu0 displayed.
- */
-static unsigned long firstcpu_err;
-static atomic_t watchdog_cpus;
-
-int watchdog_nmi_enable(unsigned int cpu)
+static int hardlockup_detector_event_create(void)
{
+ unsigned int cpu = smp_processor_id();
struct perf_event_attr *wd_attr;
- struct perf_event *event = per_cpu(watchdog_ev, cpu);
- int firstcpu = 0;
-
- /* nothing to do if the hard lockup detector is disabled */
- if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
- goto out;
-
- /* is it already setup and enabled? */
- if (event && event->state > PERF_EVENT_STATE_OFF)
- goto out;
-
- /* it is setup but not enabled */
- if (event != NULL)
- goto out_enable;
-
- if (atomic_inc_return(&watchdog_cpus) == 1)
- firstcpu = 1;
+ struct perf_event *evt;
wd_attr = &wd_hw_attr;
wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
/* Try to register using hardware perf events */
- event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL);
+ evt = perf_event_create_kernel_counter(wd_attr, cpu, NULL,
+ watchdog_overflow_callback, NULL);
+ if (IS_ERR(evt)) {
+ pr_info("Perf event create on CPU %d failed with %ld\n", cpu,
+ PTR_ERR(evt));
+ return PTR_ERR(evt);
+ }
+ this_cpu_write(watchdog_ev, evt);
+ return 0;
+}
- /* save the first cpu's error for future comparision */
- if (firstcpu && IS_ERR(event))
- firstcpu_err = PTR_ERR(event);
+/**
+ * hardlockup_detector_perf_enable - Enable the local event
+ */
+void hardlockup_detector_perf_enable(void)
+{
+ if (hardlockup_detector_event_create())
+ return;
- if (!IS_ERR(event)) {
- /* only print for the first cpu initialized */
- if (firstcpu || firstcpu_err)
- pr_info("enabled on all CPUs, permanently consumes one hw-PMU counter.\n");
- goto out_save;
- }
+ if (!watchdog_cpus++)
+ pr_info("Enabled. Permanently consumes one hw-PMU counter.\n");
- /*
- * Disable the hard lockup detector if _any_ CPU fails to set up
- * set up the hardware perf event. The watchdog() function checks
- * the NMI_WATCHDOG_ENABLED bit periodically.
- *
- * The barriers are for syncing up watchdog_enabled across all the
- * cpus, as clear_bit() does not use barriers.
- */
- smp_mb__before_atomic();
- clear_bit(NMI_WATCHDOG_ENABLED_BIT, &watchdog_enabled);
- smp_mb__after_atomic();
-
- /* skip displaying the same error again */
- if (!firstcpu && (PTR_ERR(event) == firstcpu_err))
- return PTR_ERR(event);
-
- /* vary the KERN level based on the returned errno */
- if (PTR_ERR(event) == -EOPNOTSUPP)
- pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu);
- else if (PTR_ERR(event) == -ENOENT)
- pr_warn("disabled (cpu%i): hardware events not enabled\n",
- cpu);
- else
- pr_err("disabled (cpu%i): unable to create perf event: %ld\n",
- cpu, PTR_ERR(event));
-
- pr_info("Shutting down hard lockup detector on all cpus\n");
-
- return PTR_ERR(event);
-
- /* success path */
-out_save:
- per_cpu(watchdog_ev, cpu) = event;
-out_enable:
- perf_event_enable(per_cpu(watchdog_ev, cpu));
-out:
- return 0;
+ perf_event_enable(this_cpu_read(watchdog_ev));
}
-void watchdog_nmi_disable(unsigned int cpu)
+/**
+ * hardlockup_detector_perf_disable - Disable the local event
+ */
+void hardlockup_detector_perf_disable(void)
{
- struct perf_event *event = per_cpu(watchdog_ev, cpu);
+ struct perf_event *event = this_cpu_read(watchdog_ev);
if (event) {
perf_event_disable(event);
+ cpumask_set_cpu(smp_processor_id(), &dead_events_mask);
+ watchdog_cpus--;
+ }
+}
+
+/**
+ * hardlockup_detector_perf_cleanup - Cleanup disabled events and destroy them
+ *
+ * Called from lockup_detector_cleanup(). Serialized by the caller.
+ */
+void hardlockup_detector_perf_cleanup(void)
+{
+ int cpu;
+
+ for_each_cpu(cpu, &dead_events_mask) {
+ struct perf_event *event = per_cpu(watchdog_ev, cpu);
+
+ /*
+ * Required because for_each_cpu() reports unconditionally
+ * CPU0 as set on UP kernels. Sigh.
+ */
+ if (event)
+ perf_event_release_kernel(event);
per_cpu(watchdog_ev, cpu) = NULL;
+ }
+ cpumask_clear(&dead_events_mask);
+}
+
+/**
+ * hardlockup_detector_perf_stop - Globally stop watchdog events
+ *
+ * Special interface for x86 to handle the perf HT bug.
+ */
+void __init hardlockup_detector_perf_stop(void)
+{
+ int cpu;
+
+ lockdep_assert_cpus_held();
+
+ for_each_online_cpu(cpu) {
+ struct perf_event *event = per_cpu(watchdog_ev, cpu);
+
+ if (event)
+ perf_event_disable(event);
+ }
+}
- /* should be in cleanup, but blocks oprofile */
- perf_event_release_kernel(event);
+/**
+ * hardlockup_detector_perf_restart - Globally restart watchdog events
+ *
+ * Special interface for x86 to handle the perf HT bug.
+ */
+void __init hardlockup_detector_perf_restart(void)
+{
+ int cpu;
+
+ lockdep_assert_cpus_held();
+
+ if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
+ return;
+
+ for_each_online_cpu(cpu) {
+ struct perf_event *event = per_cpu(watchdog_ev, cpu);
+
+ if (event)
+ perf_event_enable(event);
+ }
+}
+
+/**
+ * hardlockup_detector_perf_init - Probe whether NMI event is available at all
+ */
+int __init hardlockup_detector_perf_init(void)
+{
+ int ret = hardlockup_detector_event_create();
- /* watchdog_nmi_enable() expects this to be zero initially. */
- if (atomic_dec_and_test(&watchdog_cpus))
- firstcpu_err = 0;
+ if (ret) {
+ pr_info("Perf NMI watchdog permanently disabled\n");
+ } else {
+ perf_event_release_kernel(this_cpu_read(watchdog_ev));
+ this_cpu_write(watchdog_ev, NULL);
}
+ return ret;
}