summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile4
-rw-r--r--arch/x86/include/asm/cpufeature.h1
-rw-r--r--arch/x86/kernel/apic/apic.c6
-rw-r--r--arch/x86/kernel/cpu/addon_cpuid_features.c1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c55
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longhaul.c1
-rw-r--r--arch/x86/kernel/ftrace.c1
-rw-r--r--block/blk-core.c1
-rw-r--r--drivers/acpi/acpica/hwvalid.c1
-rw-r--r--drivers/acpi/battery.c2
-rw-r--r--drivers/acpi/proc.c13
-rw-r--r--drivers/acpi/processor_idle.c3
-rw-r--r--drivers/acpi/scan.c31
-rw-r--r--drivers/acpi/sleep.h3
-rw-r--r--drivers/acpi/thermal.c41
-rw-r--r--drivers/acpi/wakeup.c30
-rw-r--r--drivers/platform/x86/panasonic-laptop.c2
-rw-r--r--fs/nfs/file.c2
-rw-r--r--include/acpi/acpi_bus.h1
-rw-r--r--include/linux/compiler.h3
-rw-r--r--include/linux/ftrace.h8
-rw-r--r--include/linux/hardirq.h2
-rw-r--r--include/linux/init_task.h2
-rw-r--r--include/linux/interrupt.h75
-rw-r--r--include/linux/irq.h5
-rw-r--r--include/linux/irqreturn.h2
-rw-r--r--include/linux/sched.h23
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/fork.c8
-rw-r--r--kernel/hung_task.c217
-rw-r--r--kernel/irq/devres.c16
-rw-r--r--kernel/irq/handle.c50
-rw-r--r--kernel/irq/manage.c189
-rw-r--r--kernel/softlockup.c100
-rw-r--r--kernel/sysctl.c15
-rw-r--r--kernel/trace/blktrace.c7
-rw-r--r--kernel/trace/trace.c21
-rw-r--r--kernel/trace/trace.h2
-rw-r--r--kernel/trace/trace_export.c2
-rw-r--r--kernel/trace/trace_output.c2
-rw-r--r--kernel/trace/trace_sched_switch.c3
-rw-r--r--kernel/trace/trace_sched_wakeup.c8
-rw-r--r--lib/Kconfig.debug38
-rw-r--r--scripts/tracing/power.pl (renamed from scripts/trace/power.pl)0
45 files changed, 750 insertions, 250 deletions
diff --git a/Makefile b/Makefile
index c6307b6d069f..e5ad5fd96177 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
VERSION = 2
PATCHLEVEL = 6
-SUBLEVEL = 29
-EXTRAVERSION =
+SUBLEVEL = 30
+EXTRAVERSION = -rc1
NAME = Temporary Tasmanian Devil
# *DOCUMENTATION*
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 0beba0d1468d..bb83b1c397aa 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -154,6 +154,7 @@
* CPUID levels like 0x6, 0xA etc
*/
#define X86_FEATURE_IDA (7*32+ 0) /* Intel Dynamic Acceleration */
+#define X86_FEATURE_ARAT (7*32+ 1) /* Always Running APIC Timer */
/* Virtualization flags: Linux defined */
#define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 098ec84b8c00..f2870920f246 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -431,6 +431,12 @@ static void __cpuinit setup_APIC_timer(void)
{
struct clock_event_device *levt = &__get_cpu_var(lapic_events);
+ if (cpu_has(&current_cpu_data, X86_FEATURE_ARAT)) {
+ lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP;
+ /* Make LAPIC timer preferrable over percpu HPET */
+ lapic_clockevent.rating = 150;
+ }
+
memcpy(levt, &lapic_clockevent, sizeof(*levt));
levt->cpumask = cpumask_of(smp_processor_id());
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index 8220ae69849d..c965e5212714 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -31,6 +31,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
static const struct cpuid_bit __cpuinitconst cpuid_bits[] = {
{ X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 },
+ { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006 },
{ 0, 0, 0, 0 }
};
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 19f6b9d27e83..9d3af380c6bd 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -68,6 +68,7 @@ struct acpi_cpufreq_data {
unsigned int max_freq;
unsigned int resume;
unsigned int cpu_feature;
+ u64 saved_aperf, saved_mperf;
};
static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data);
@@ -241,26 +242,23 @@ static u32 get_cur_val(const struct cpumask *mask)
return cmd.val;
}
-struct perf_cur {
+struct perf_pair {
union {
struct {
u32 lo;
u32 hi;
} split;
u64 whole;
- } aperf_cur, mperf_cur;
+ } aperf, mperf;
};
static long read_measured_perf_ctrs(void *_cur)
{
- struct perf_cur *cur = _cur;
+ struct perf_pair *cur = _cur;
- rdmsr(MSR_IA32_APERF, cur->aperf_cur.split.lo, cur->aperf_cur.split.hi);
- rdmsr(MSR_IA32_MPERF, cur->mperf_cur.split.lo, cur->mperf_cur.split.hi);
-
- wrmsr(MSR_IA32_APERF, 0, 0);
- wrmsr(MSR_IA32_MPERF, 0, 0);
+ rdmsr(MSR_IA32_APERF, cur->aperf.split.lo, cur->aperf.split.hi);
+ rdmsr(MSR_IA32_MPERF, cur->mperf.split.lo, cur->mperf.split.hi);
return 0;
}
@@ -281,52 +279,57 @@ static long read_measured_perf_ctrs(void *_cur)
static unsigned int get_measured_perf(struct cpufreq_policy *policy,
unsigned int cpu)
{
- struct perf_cur cur;
+ struct perf_pair readin, cur;
unsigned int perf_percent;
unsigned int retval;
- if (!work_on_cpu(cpu, read_measured_perf_ctrs, &cur))
+ if (!work_on_cpu(cpu, read_measured_perf_ctrs, &readin))
return 0;
+ cur.aperf.whole = readin.aperf.whole -
+ per_cpu(drv_data, cpu)->saved_aperf;
+ cur.mperf.whole = readin.mperf.whole -
+ per_cpu(drv_data, cpu)->saved_mperf;
+ per_cpu(drv_data, cpu)->saved_aperf = readin.aperf.whole;
+ per_cpu(drv_data, cpu)->saved_mperf = readin.mperf.whole;
+
#ifdef __i386__
/*
* We dont want to do 64 bit divide with 32 bit kernel
* Get an approximate value. Return failure in case we cannot get
* an approximate value.
*/
- if (unlikely(cur.aperf_cur.split.hi || cur.mperf_cur.split.hi)) {
+ if (unlikely(cur.aperf.split.hi || cur.mperf.split.hi)) {
int shift_count;
u32 h;
- h = max_t(u32, cur.aperf_cur.split.hi, cur.mperf_cur.split.hi);
+ h = max_t(u32, cur.aperf.split.hi, cur.mperf.split.hi);
shift_count = fls(h);
- cur.aperf_cur.whole >>= shift_count;
- cur.mperf_cur.whole >>= shift_count;
+ cur.aperf.whole >>= shift_count;
+ cur.mperf.whole >>= shift_count;
}
- if (((unsigned long)(-1) / 100) < cur.aperf_cur.split.lo) {
+ if (((unsigned long)(-1) / 100) < cur.aperf.split.lo) {
int shift_count = 7;
- cur.aperf_cur.split.lo >>= shift_count;
- cur.mperf_cur.split.lo >>= shift_count;
+ cur.aperf.split.lo >>= shift_count;
+ cur.mperf.split.lo >>= shift_count;
}
- if (cur.aperf_cur.split.lo && cur.mperf_cur.split.lo)
- perf_percent = (cur.aperf_cur.split.lo * 100) /
- cur.mperf_cur.split.lo;
+ if (cur.aperf.split.lo && cur.mperf.split.lo)
+ perf_percent = (cur.aperf.split.lo * 100) / cur.mperf.split.lo;
else
perf_percent = 0;
#else
- if (unlikely(((unsigned long)(-1) / 100) < cur.aperf_cur.whole)) {
+ if (unlikely(((unsigned long)(-1) / 100) < cur.aperf.whole)) {
int shift_count = 7;
- cur.aperf_cur.whole >>= shift_count;
- cur.mperf_cur.whole >>= shift_count;
+ cur.aperf.whole >>= shift_count;
+ cur.mperf.whole >>= shift_count;
}
- if (cur.aperf_cur.whole && cur.mperf_cur.whole)
- perf_percent = (cur.aperf_cur.whole * 100) /
- cur.mperf_cur.whole;
+ if (cur.aperf.whole && cur.mperf.whole)
+ perf_percent = (cur.aperf.whole * 100) / cur.mperf.whole;
else
perf_percent = 0;
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c
index 0bd48e65a0ca..ce2ed3e4aad9 100644
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c
@@ -33,7 +33,6 @@
#include <linux/timex.h>
#include <linux/io.h>
#include <linux/acpi.h>
-#include <linux/kernel.h>
#include <asm/msr.h>
#include <acpi/processor.h>
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 61df77532120..70a10ca100f6 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -20,7 +20,6 @@
#include <asm/cacheflush.h>
#include <asm/ftrace.h>
-#include <linux/ftrace.h>
#include <asm/nops.h>
#include <asm/nmi.h>
diff --git a/block/blk-core.c b/block/blk-core.c
index 43fdedc524ee..07ab75403e1a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -131,6 +131,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
INIT_HLIST_NODE(&rq->hash);
RB_CLEAR_NODE(&rq->rb_node);
rq->cmd = rq->__cmd;
+ rq->cmd_len = BLK_MAX_CDB;
rq->tag = -1;
rq->ref_count = 1;
}
diff --git a/drivers/acpi/acpica/hwvalid.c b/drivers/acpi/acpica/hwvalid.c
index bd3c937b0ac0..7737afb157c3 100644
--- a/drivers/acpi/acpica/hwvalid.c
+++ b/drivers/acpi/acpica/hwvalid.c
@@ -90,7 +90,6 @@ static const struct acpi_port_info acpi_protected_ports[] = {
{"PIT2", 0x0048, 0x004B, ACPI_OSI_WIN_XP},
{"RTC", 0x0070, 0x0071, ACPI_OSI_WIN_XP},
{"CMOS", 0x0074, 0x0076, ACPI_OSI_WIN_XP},
- {"DMA1", 0x0081, 0x0083, ACPI_OSI_WIN_XP},
{"DMA1L", 0x0087, 0x0087, ACPI_OSI_WIN_XP},
{"DMA2", 0x0089, 0x008B, ACPI_OSI_WIN_XP},
{"DMA2L", 0x008F, 0x008F, ACPI_OSI_WIN_XP},
diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index b0de6312919a..3c7d8942f23b 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -903,7 +903,7 @@ static struct acpi_driver acpi_battery_driver = {
},
};
-static void __init acpi_battery_init_async(void *unused, async_cookie_t cookie)
+static void acpi_battery_init_async(void *unused, async_cookie_t cookie)
{
if (acpi_disabled)
return;
diff --git a/drivers/acpi/proc.c b/drivers/acpi/proc.c
index 05dfdc96802e..d0d550d22a6d 100644
--- a/drivers/acpi/proc.c
+++ b/drivers/acpi/proc.c
@@ -343,9 +343,6 @@ acpi_system_write_alarm(struct file *file,
}
#endif /* HAVE_ACPI_LEGACY_ALARM */
-extern struct list_head acpi_wakeup_device_list;
-extern spinlock_t acpi_device_lock;
-
static int
acpi_system_wakeup_device_seq_show(struct seq_file *seq, void *offset)
{
@@ -353,7 +350,7 @@ acpi_system_wakeup_device_seq_show(struct seq_file *seq, void *offset)
seq_printf(seq, "Device\tS-state\t Status Sysfs node\n");
- spin_lock(&acpi_device_lock);
+ mutex_lock(&acpi_device_lock);
list_for_each_safe(node, next, &acpi_wakeup_device_list) {
struct acpi_device *dev =
container_of(node, struct acpi_device, wakeup_list);
@@ -361,7 +358,6 @@ acpi_system_wakeup_device_seq_show(struct seq_file *seq, void *offset)
if (!dev->wakeup.flags.valid)
continue;
- spin_unlock(&acpi_device_lock);
ldev = acpi_get_physical_device(dev->handle);
seq_printf(seq, "%s\t S%d\t%c%-8s ",
@@ -376,9 +372,8 @@ acpi_system_wakeup_device_seq_show(struct seq_file *seq, void *offset)
seq_printf(seq, "\n");
put_device(ldev);
- spin_lock(&acpi_device_lock);
}
- spin_unlock(&acpi_device_lock);
+ mutex_unlock(&acpi_device_lock);
return 0;
}
@@ -409,7 +404,7 @@ acpi_system_write_wakeup_device(struct file *file,
strbuf[len] = '\0';
sscanf(strbuf, "%s", str);
- spin_lock(&acpi_device_lock);
+ mutex_lock(&acpi_device_lock);
list_for_each_safe(node, next, &acpi_wakeup_device_list) {
struct acpi_device *dev =
container_of(node, struct acpi_device, wakeup_list);
@@ -446,7 +441,7 @@ acpi_system_write_wakeup_device(struct file *file,
}
}
}
- spin_unlock(&acpi_device_lock);
+ mutex_unlock(&acpi_device_lock);
return count;
}
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 4e6e758bd397..6fe121434ffb 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -145,6 +145,9 @@ static void acpi_timer_check_state(int state, struct acpi_processor *pr,
struct acpi_processor_power *pwr = &pr->power;
u8 type = local_apic_timer_c2_ok ? ACPI_STATE_C3 : ACPI_STATE_C2;
+ if (cpu_has(&cpu_data(pr->id), X86_FEATURE_ARAT))
+ return;
+
/*
* Check, if one of the previous states already marked the lapic
* unstable
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 20c23c049207..8ff510b91d88 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -24,7 +24,7 @@ extern struct acpi_device *acpi_root;
static LIST_HEAD(acpi_device_list);
static LIST_HEAD(acpi_bus_id_list);
-DEFINE_SPINLOCK(acpi_device_lock);
+DEFINE_MUTEX(acpi_device_lock);
LIST_HEAD(acpi_wakeup_device_list);
struct acpi_device_bus_id{
@@ -491,7 +491,6 @@ static int acpi_device_register(struct acpi_device *device,
*/
INIT_LIST_HEAD(&device->children);
INIT_LIST_HEAD(&device->node);
- INIT_LIST_HEAD(&device->g_list);
INIT_LIST_HEAD(&device->wakeup_list);
new_bus_id = kzalloc(sizeof(struct acpi_device_bus_id), GFP_KERNEL);
@@ -500,7 +499,7 @@ static int acpi_device_register(struct acpi_device *device,
return -ENOMEM;
}
- spin_lock(&acpi_device_lock);
+ mutex_lock(&acpi_device_lock);
/*
* Find suitable bus_id and instance number in acpi_bus_id_list
* If failed, create one and link it into acpi_bus_id_list
@@ -521,14 +520,12 @@ static int acpi_device_register(struct acpi_device *device,
}
dev_set_name(&device->dev, "%s:%02x", acpi_device_bus_id->bus_id, acpi_device_bus_id->instance_no);
- if (device->parent) {
+ if (device->parent)
list_add_tail(&device->node, &device->parent->children);
- list_add_tail(&device->g_list, &device->parent->g_list);
- } else
- list_add_tail(&device->g_list, &acpi_device_list);
+
if (device->wakeup.flags.valid)
list_add_tail(&device->wakeup_list, &acpi_wakeup_device_list);
- spin_unlock(&acpi_device_lock);
+ mutex_unlock(&acpi_device_lock);
if (device->parent)
device->dev.parent = &parent->dev;
@@ -549,28 +546,22 @@ static int acpi_device_register(struct acpi_device *device,
device->removal_type = ACPI_BUS_REMOVAL_NORMAL;
return 0;
end:
- spin_lock(&acpi_device_lock);
- if (device->parent) {
+ mutex_lock(&acpi_device_lock);
+ if (device->parent)
list_del(&device->node);
- list_del(&device->g_list);
- } else
- list_del(&device->g_list);
list_del(&device->wakeup_list);
- spin_unlock(&acpi_device_lock);
+ mutex_unlock(&acpi_device_lock);
return result;
}
static void acpi_device_unregister(struct acpi_device *device, int type)
{
- spin_lock(&acpi_device_lock);
- if (device->parent) {
+ mutex_lock(&acpi_device_lock);
+ if (device->parent)
list_del(&device->node);
- list_del(&device->g_list);
- } else
- list_del(&device->g_list);
list_del(&device->wakeup_list);
- spin_unlock(&acpi_device_lock);
+ mutex_unlock(&acpi_device_lock);
acpi_detach_data(device->handle, acpi_bus_data_handler);
diff --git a/drivers/acpi/sleep.h b/drivers/acpi/sleep.h
index cfaf8f5b0a14..8a8f3b3382a6 100644
--- a/drivers/acpi/sleep.h
+++ b/drivers/acpi/sleep.h
@@ -5,3 +5,6 @@ extern int acpi_suspend (u32 state);
extern void acpi_enable_wakeup_device_prep(u8 sleep_state);
extern void acpi_enable_wakeup_device(u8 sleep_state);
extern void acpi_disable_wakeup_device(u8 sleep_state);
+
+extern struct list_head acpi_wakeup_device_list;
+extern struct mutex acpi_device_lock;
diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index 0914eaa9a097..9cd15e8c8932 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -194,6 +194,7 @@ struct acpi_thermal {
struct acpi_handle_list devices;
struct thermal_zone_device *thermal_zone;
int tz_enabled;
+ int kelvin_offset;
struct mutex lock;
};
@@ -583,7 +584,7 @@ static void acpi_thermal_check(void *data)
}
/* sys I/F for generic thermal sysfs support */
-#define KELVIN_TO_MILLICELSIUS(t) (t * 100 - 273200)
+#define KELVIN_TO_MILLICELSIUS(t, off) (((t) - (off)) * 100)
static int thermal_get_temp(struct thermal_zone_device *thermal,
unsigned long *temp)
@@ -598,7 +599,7 @@ static int thermal_get_temp(struct thermal_zone_device *thermal,
if (result)
return result;
- *temp = KELVIN_TO_MILLICELSIUS(tz->temperature);
+ *temp = KELVIN_TO_MILLICELSIUS(tz->temperature, tz->kelvin_offset);
return 0;
}
@@ -704,7 +705,8 @@ static int thermal_get_trip_temp(struct thermal_zone_device *thermal,
if (tz->trips.critical.flags.valid) {
if (!trip) {
*temp = KELVIN_TO_MILLICELSIUS(
- tz->trips.critical.temperature);
+ tz->trips.critical.temperature,
+ tz->kelvin_offset);
return 0;
}
trip--;
@@ -713,7 +715,8 @@ static int thermal_get_trip_temp(struct thermal_zone_device *thermal,
if (tz->trips.hot.flags.valid) {
if (!trip) {
*temp = KELVIN_TO_MILLICELSIUS(
- tz->trips.hot.temperature);
+ tz->trips.hot.temperature,
+ tz->kelvin_offset);
return 0;
}
trip--;
@@ -722,7 +725,8 @@ static int thermal_get_trip_temp(struct thermal_zone_device *thermal,
if (tz->trips.passive.flags.valid) {
if (!trip) {
*temp = KELVIN_TO_MILLICELSIUS(
- tz->trips.passive.temperature);
+ tz->trips.passive.temperature,
+ tz->kelvin_offset);
return 0;
}
trip--;
@@ -732,7 +736,8 @@ static int thermal_get_trip_temp(struct thermal_zone_device *thermal,
tz->trips.active[i].flags.valid; i++) {
if (!trip) {
*temp = KELVIN_TO_MILLICELSIUS(
- tz->trips.active[i].temperature);
+ tz->trips.active[i].temperature,
+ tz->kelvin_offset);
return 0;
}
trip--;
@@ -747,7 +752,8 @@ static int thermal_get_crit_temp(struct thermal_zone_device *thermal,
if (tz->trips.critical.flags.valid) {
*temperature = KELVIN_TO_MILLICELSIUS(
- tz->trips.critical.temperature);
+ tz->trips.critical.temperature,
+ tz->kelvin_offset);
return 0;
} else
return -EINVAL;
@@ -1331,6 +1337,25 @@ static int acpi_thermal_get_info(struct acpi_thermal *tz)
return 0;
}
+/*
+ * The exact offset between Kelvin and degree Celsius is 273.15. However ACPI
+ * handles temperature values with a single decimal place. As a consequence,
+ * some implementations use an offset of 273.1 and others use an offset of
+ * 273.2. Try to find out which one is being used, to present the most
+ * accurate and visually appealing number.
+ *
+ * The heuristic below should work for all ACPI thermal zones which have a
+ * critical trip point with a value being a multiple of 0.5 degree Celsius.
+ */
+static void acpi_thermal_guess_offset(struct acpi_thermal *tz)
+{
+ if (tz->trips.critical.flags.valid &&
+ (tz->trips.critical.temperature % 5) == 1)
+ tz->kelvin_offset = 2731;
+ else
+ tz->kelvin_offset = 2732;
+}
+
static int acpi_thermal_add(struct acpi_device *device)
{
int result = 0;
@@ -1356,6 +1381,8 @@ static int acpi_thermal_add(struct acpi_device *device)
if (result)
goto free_memory;
+ acpi_thermal_guess_offset(tz);
+
result = acpi_thermal_register_thermal_zone(tz);
if (result)
goto free_memory;
diff --git a/drivers/acpi/wakeup.c b/drivers/acpi/wakeup.c
index 5aee8c26cc9f..88725dcdf8bc 100644
--- a/drivers/acpi/wakeup.c
+++ b/drivers/acpi/wakeup.c
@@ -12,12 +12,14 @@
#include "internal.h"
#include "sleep.h"
+/*
+ * We didn't lock acpi_device_lock in the file, because it invokes oops in
+ * suspend/resume and isn't really required as this is called in S-state. At
+ * that time, there is no device hotplug
+ **/
#define _COMPONENT ACPI_SYSTEM_COMPONENT
ACPI_MODULE_NAME("wakeup_devices")
-extern struct list_head acpi_wakeup_device_list;
-extern spinlock_t acpi_device_lock;
-
/**
* acpi_enable_wakeup_device_prep - prepare wakeup devices
* @sleep_state: ACPI state
@@ -29,7 +31,6 @@ void acpi_enable_wakeup_device_prep(u8 sleep_state)
{
struct list_head *node, *next;
- spin_lock(&acpi_device_lock);
list_for_each_safe(node, next, &acpi_wakeup_device_list) {
struct acpi_device *dev = container_of(node,
struct acpi_device,
@@ -40,11 +41,8 @@ void acpi_enable_wakeup_device_prep(u8 sleep_state)
(sleep_state > (u32) dev->wakeup.sleep_state))
continue;
- spin_unlock(&acpi_device_lock);
acpi_enable_wakeup_device_power(dev, sleep_state);
- spin_lock(&acpi_device_lock);
}
- spin_unlock(&acpi_device_lock);
}
/**
@@ -60,7 +58,6 @@ void acpi_enable_wakeup_device(u8 sleep_state)
* Caution: this routine must be invoked when interrupt is disabled
* Refer ACPI2.0: P212
*/
- spin_lock(&acpi_device_lock);
list_for_each_safe(node, next, &acpi_wakeup_device_list) {
struct acpi_device *dev =
container_of(node, struct acpi_device, wakeup_list);
@@ -74,22 +71,17 @@ void acpi_enable_wakeup_device(u8 sleep_state)
if ((!dev->wakeup.state.enabled && !dev->wakeup.flags.prepared)
|| sleep_state > (u32) dev->wakeup.sleep_state) {
if (dev->wakeup.flags.run_wake) {
- spin_unlock(&acpi_device_lock);
/* set_gpe_type will disable GPE, leave it like that */
acpi_set_gpe_type(dev->wakeup.gpe_device,
dev->wakeup.gpe_number,
ACPI_GPE_TYPE_RUNTIME);
- spin_lock(&acpi_device_lock);
}
continue;
}
- spin_unlock(&acpi_device_lock);
if (!dev->wakeup.flags.run_wake)
acpi_enable_gpe(dev->wakeup.gpe_device,
dev->wakeup.gpe_number);
- spin_lock(&acpi_device_lock);
}
- spin_unlock(&acpi_device_lock);
}
/**
@@ -101,7 +93,6 @@ void acpi_disable_wakeup_device(u8 sleep_state)
{
struct list_head *node, *next;
- spin_lock(&acpi_device_lock);
list_for_each_safe(node, next, &acpi_wakeup_device_list) {
struct acpi_device *dev =
container_of(node, struct acpi_device, wakeup_list);
@@ -112,19 +103,16 @@ void acpi_disable_wakeup_device(u8 sleep_state)
if ((!dev->wakeup.state.enabled && !dev->wakeup.flags.prepared)
|| sleep_state > (u32) dev->wakeup.sleep_state) {
if (dev->wakeup.flags.run_wake) {
- spin_unlock(&acpi_device_lock);
acpi_set_gpe_type(dev->wakeup.gpe_device,
dev->wakeup.gpe_number,
ACPI_GPE_TYPE_WAKE_RUN);
/* Re-enable it, since set_gpe_type will disable it */
acpi_enable_gpe(dev->wakeup.gpe_device,
dev->wakeup.gpe_number);
- spin_lock(&acpi_device_lock);
}
continue;
}
- spin_unlock(&acpi_device_lock);
acpi_disable_wakeup_device_power(dev);
/* Never disable run-wake GPE */
if (!dev->wakeup.flags.run_wake) {
@@ -133,16 +121,14 @@ void acpi_disable_wakeup_device(u8 sleep_state)
acpi_clear_gpe(dev->wakeup.gpe_device,
dev->wakeup.gpe_number, ACPI_NOT_ISR);
}
- spin_lock(&acpi_device_lock);
}
- spin_unlock(&acpi_device_lock);
}
int __init acpi_wakeup_device_init(void)
{
struct list_head *node, *next;
- spin_lock(&acpi_device_lock);
+ mutex_lock(&acpi_device_lock);
list_for_each_safe(node, next, &acpi_wakeup_device_list) {
struct acpi_device *dev = container_of(node,
struct acpi_device,
@@ -150,15 +136,13 @@ int __init acpi_wakeup_device_init(void)
/* In case user doesn't load button driver */
if (!dev->wakeup.flags.run_wake || dev->wakeup.state.enabled)
continue;
- spin_unlock(&acpi_device_lock);
acpi_set_gpe_type(dev->wakeup.gpe_device,
dev->wakeup.gpe_number,
ACPI_GPE_TYPE_WAKE_RUN);
acpi_enable_gpe(dev->wakeup.gpe_device,
dev->wakeup.gpe_number);
dev->wakeup.state.enabled = 1;
- spin_lock(&acpi_device_lock);
}
- spin_unlock(&acpi_device_lock);
+ mutex_unlock(&acpi_device_lock);
return 0;
}
diff --git a/drivers/platform/x86/panasonic-laptop.c b/drivers/platform/x86/panasonic-laptop.c
index 1a11de0d3e6d..fe7cf0188acc 100644
--- a/drivers/platform/x86/panasonic-laptop.c
+++ b/drivers/platform/x86/panasonic-laptop.c
@@ -273,7 +273,7 @@ static int acpi_pcc_retrieve_biosdata(struct pcc_acpi *pcc, u32 *sinf)
union acpi_object *hkey = NULL;
int i;
- status = acpi_evaluate_object(pcc->handle, METHOD_HKEY_SINF, 0,
+ status = acpi_evaluate_object(pcc->handle, METHOD_HKEY_SINF, NULL,
&buffer);
if (ACPI_FAILURE(status)) {
ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 3523b895eb4b..5a97bcfe03e5 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -516,8 +516,6 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
goto out_unlock;
ret = nfs_updatepage(filp, page, 0, pagelen);
- if (ret == 0)
- ret = pagelen;
out_unlock:
unlock_page(page);
if (ret)
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index a2228511d4be..c34b11022908 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -270,7 +270,6 @@ struct acpi_device {
struct list_head children;
struct list_head node;
struct list_head wakeup_list;
- struct list_head g_list;
struct acpi_device_status status;
struct acpi_device_flags flags;
struct acpi_device_pnp pnp;
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index cebfdcd3dbdd..37bcb50a4d7c 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -76,7 +76,8 @@ struct ftrace_branch_data {
* Note: DISABLE_BRANCH_PROFILING can be used by special lowlevel code
* to disable branch tracing on a per file basis.
*/
-#if defined(CONFIG_TRACE_BRANCH_PROFILING) && !defined(DISABLE_BRANCH_PROFILING)
+#if defined(CONFIG_TRACE_BRANCH_PROFILING) \
+ && !defined(DISABLE_BRANCH_PROFILING) && !defined(__CHECKER__)
void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
#define likely_notrace(x) __builtin_expect(!!(x), 1)
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 015a3d22cf74..da5405dce347 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -356,6 +356,9 @@ struct ftrace_graph_ret {
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+/* for init task */
+#define INIT_FTRACE_GRAPH .ret_stack = NULL
+
/*
* Stack of return addresses for functions
* of a thread.
@@ -430,10 +433,11 @@ static inline void unpause_graph_tracing(void)
{
atomic_dec(&current->tracing_graph_pause);
}
-#else
+#else /* !CONFIG_FUNCTION_GRAPH_TRACER */
#define __notrace_funcgraph
#define __irq_entry
+#define INIT_FTRACE_GRAPH
static inline void ftrace_graph_init_task(struct task_struct *t) { }
static inline void ftrace_graph_exit_task(struct task_struct *t) { }
@@ -445,7 +449,7 @@ static inline int task_curr_ret_stack(struct task_struct *tsk)
static inline void pause_graph_tracing(void) { }
static inline void unpause_graph_tracing(void) { }
-#endif
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
#ifdef CONFIG_TRACING
#include <linux/sched.h>
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index faa1cf848bcd..45257475623c 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -116,7 +116,7 @@
# define IRQ_EXIT_OFFSET HARDIRQ_OFFSET
#endif
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) || defined(CONFIG_GENERIC_HARDIRQS)
extern void synchronize_irq(unsigned int irq);
#else
# define synchronize_irq(irq) barrier()
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index af1de95e711e..dcfb93337e9a 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -5,6 +5,7 @@
#include <linux/irqflags.h>
#include <linux/utsname.h>
#include <linux/lockdep.h>
+#include <linux/ftrace.h>
#include <linux/ipc.h>
#include <linux/pid_namespace.h>
#include <linux/user_namespace.h>
@@ -185,6 +186,7 @@ extern struct cred init_cred;
INIT_IDS \
INIT_TRACE_IRQFLAGS \
INIT_LOCKDEP \
+ INIT_FTRACE_GRAPH \
}
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 8a9613d0c674..91bb76f44f14 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -59,6 +59,18 @@
#define IRQF_NOBALANCING 0x00000800
#define IRQF_IRQPOLL 0x00001000
+/*
+ * Bits used by threaded handlers:
+ * IRQTF_RUNTHREAD - signals that the interrupt handler thread should run
+ * IRQTF_DIED - handler thread died
+ * IRQTF_WARNED - warning "IRQ_WAKE_THREAD w/o thread_fn" has been printed
+ */
+enum {
+ IRQTF_RUNTHREAD,
+ IRQTF_DIED,
+ IRQTF_WARNED,
+};
+
typedef irqreturn_t (*irq_handler_t)(int, void *);
/**
@@ -71,6 +83,9 @@ typedef irqreturn_t (*irq_handler_t)(int, void *);
* @next: pointer to the next irqaction for shared interrupts
* @irq: interrupt number
* @dir: pointer to the proc/irq/NN/name entry
+ * @thread_fn: interupt handler function for threaded interrupts
+ * @thread: thread pointer for threaded interrupts
+ * @thread_flags: flags related to @thread
*/
struct irqaction {
irq_handler_t handler;
@@ -81,18 +96,68 @@ struct irqaction {
struct irqaction *next;
int irq;
struct proc_dir_entry *dir;
+ irq_handler_t thread_fn;
+ struct task_struct *thread;
+ unsigned long thread_flags;
};
extern irqreturn_t no_action(int cpl, void *dev_id);
-extern int __must_check request_irq(unsigned int, irq_handler_t handler,
- unsigned long, const char *, void *);
+
+#ifdef CONFIG_GENERIC_HARDIRQS
+extern int __must_check
+request_threaded_irq(unsigned int irq, irq_handler_t handler,
+ irq_handler_t thread_fn,
+ unsigned long flags, const char *name, void *dev);
+
+static inline int __must_check
+request_irq(unsigned int irq, irq_handler_t handler, unsigned long flags,
+ const char *name, void *dev)
+{
+ return request_threaded_irq(irq, handler, NULL, flags, name, dev);
+}
+
+extern void exit_irq_thread(void);
+#else
+
+extern int __must_check
+request_irq(unsigned int irq, irq_handler_t handler, unsigned long flags,
+ const char *name, void *dev);
+
+/*
+ * Special function to avoid ifdeffery in kernel/irq/devres.c which
+ * gets magically built by GENERIC_HARDIRQS=n architectures (sparc,
+ * m68k). I really love these $@%#!* obvious Makefile references:
+ * ../../../kernel/irq/devres.o
+ */
+static inline int __must_check
+request_threaded_irq(unsigned int irq, irq_handler_t handler,
+ irq_handler_t thread_fn,
+ unsigned long flags, const char *name, void *dev)
+{
+ return request_irq(irq, handler, flags, name, dev);
+}
+
+static inline void exit_irq_thread(void) { }
+#endif
+
extern void free_irq(unsigned int, void *);
struct device;
-extern int __must_check devm_request_irq(struct device *dev, unsigned int irq,
- irq_handler_t handler, unsigned long irqflags,
- const char *devname, void *dev_id);
+extern int __must_check
+devm_request_threaded_irq(struct device *dev, unsigned int irq,
+ irq_handler_t handler, irq_handler_t thread_fn,
+ unsigned long irqflags, const char *devname,
+ void *dev_id);
+
+static inline int __must_check
+devm_request_irq(struct device *dev, unsigned int irq, irq_handler_t handler,
+ unsigned long irqflags, const char *devname, void *dev_id)
+{
+ return devm_request_threaded_irq(dev, irq, handler, NULL, irqflags,
+ devname, dev_id);
+}
+
extern void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id);
/*
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 974890b3c52f..ca507c9426b0 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -22,6 +22,7 @@
#include <linux/irqnr.h>
#include <linux/errno.h>
#include <linux/topology.h>
+#include <linux/wait.h>
#include <asm/irq.h>
#include <asm/ptrace.h>
@@ -158,6 +159,8 @@ struct irq_2_iommu;
* @affinity: IRQ affinity on SMP
* @cpu: cpu index useful for balancing
* @pending_mask: pending rebalanced interrupts
+ * @threads_active: number of irqaction threads currently running
+ * @wait_for_threads: wait queue for sync_irq to wait for threaded handlers
* @dir: /proc/irq/ procfs entry
* @name: flow handler name for /proc/interrupts output
*/
@@ -189,6 +192,8 @@ struct irq_desc {
cpumask_var_t pending_mask;
#endif
#endif
+ atomic_t threads_active;
+ wait_queue_head_t wait_for_threads;
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *dir;
#endif
diff --git a/include/linux/irqreturn.h b/include/linux/irqreturn.h
index c5584ca5b8c9..819acaaac3f5 100644
--- a/include/linux/irqreturn.h
+++ b/include/linux/irqreturn.h
@@ -5,10 +5,12 @@
* enum irqreturn
* @IRQ_NONE interrupt was not from this device
* @IRQ_HANDLED interrupt was handled by this device
+ * @IRQ_WAKE_THREAD handler requests to wake the handler thread
*/
enum irqreturn {
IRQ_NONE,
IRQ_HANDLED,
+ IRQ_WAKE_THREAD,
};
typedef enum irqreturn irqreturn_t;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b94f3541f67b..98e1fe51601d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -300,17 +300,11 @@ extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
struct file *filp, void __user *buffer,
size_t *lenp, loff_t *ppos);
extern unsigned int softlockup_panic;
-extern unsigned long sysctl_hung_task_check_count;
-extern unsigned long sysctl_hung_task_timeout_secs;
-extern unsigned long sysctl_hung_task_warnings;
extern int softlockup_thresh;
#else
static inline void softlockup_tick(void)
{
}
-static inline void spawn_softlockup_task(void)
-{
-}
static inline void touch_softlockup_watchdog(void)
{
}
@@ -319,6 +313,15 @@ static inline void touch_all_softlockup_watchdogs(void)
}
#endif
+#ifdef CONFIG_DETECT_HUNG_TASK
+extern unsigned int sysctl_hung_task_panic;
+extern unsigned long sysctl_hung_task_check_count;
+extern unsigned long sysctl_hung_task_timeout_secs;
+extern unsigned long sysctl_hung_task_warnings;
+extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
+ struct file *filp, void __user *buffer,
+ size_t *lenp, loff_t *ppos);
+#endif
/* Attach to any functions which should be ignored in wchan output. */
#define __sched __attribute__((__section__(".sched.text")))
@@ -1255,9 +1258,8 @@ struct task_struct {
/* ipc stuff */
struct sysv_sem sysvsem;
#endif
-#ifdef CONFIG_DETECT_SOFTLOCKUP
+#ifdef CONFIG_DETECT_HUNG_TASK
/* hung task detection */
- unsigned long last_switch_timestamp;
unsigned long last_switch_count;
#endif
/* CPU-specific state of this task */
@@ -1294,6 +1296,11 @@ struct task_struct {
/* Protection of (de-)allocation: mm, files, fs, tty, keyrings */
spinlock_t alloc_lock;
+#ifdef CONFIG_GENERIC_HARDIRQS
+ /* IRQ handler threads */
+ struct irqaction *irqaction;
+#endif
+
/* Protection of the PI data structures: */
spinlock_t pi_lock;
diff --git a/kernel/Makefile b/kernel/Makefile
index bab1dffe37e9..42423665660a 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -74,6 +74,7 @@ obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
+obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
obj-$(CONFIG_SECCOMP) += seccomp.o
obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
diff --git a/kernel/exit.c b/kernel/exit.c
index 32cbf2607cb0..abf9cf3b95c6 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -923,6 +923,8 @@ NORET_TYPE void do_exit(long code)
schedule();
}
+ exit_irq_thread();
+
exit_signals(tsk); /* sets PF_EXITING */
/*
* tsk->flags are checked in the futex code to protect against
diff --git a/kernel/fork.c b/kernel/fork.c
index 660c2b8765bc..989c7c202b3d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -645,6 +645,9 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
tsk->min_flt = tsk->maj_flt = 0;
tsk->nvcsw = tsk->nivcsw = 0;
+#ifdef CONFIG_DETECT_HUNG_TASK
+ tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
+#endif
tsk->mm = NULL;
tsk->active_mm = NULL;
@@ -1032,11 +1035,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->default_timer_slack_ns = current->timer_slack_ns;
-#ifdef CONFIG_DETECT_SOFTLOCKUP
- p->last_switch_count = 0;
- p->last_switch_timestamp = 0;
-#endif
-
task_io_accounting_init(&p->ioac);
acct_clear_integrals(p);
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
new file mode 100644
index 000000000000..022a4927b785
--- /dev/null
+++ b/kernel/hung_task.c
@@ -0,0 +1,217 @@
+/*
+ * Detect Hung Task
+ *
+ * kernel/hung_task.c - kernel thread for detecting tasks stuck in D state
+ *
+ */
+
+#include <linux/mm.h>
+#include <linux/cpu.h>
+#include <linux/nmi.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/freezer.h>
+#include <linux/kthread.h>
+#include <linux/lockdep.h>
+#include <linux/module.h>
+#include <linux/sysctl.h>
+
+/*
+ * The number of tasks checked:
+ */
+unsigned long __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
+
+/*
+ * Limit number of tasks checked in a batch.
+ *
+ * This value controls the preemptibility of khungtaskd since preemption
+ * is disabled during the critical section. It also controls the size of
+ * the RCU grace period. So it needs to be upper-bound.
+ */
+#define HUNG_TASK_BATCHING 1024
+
+/*
+ * Zero means infinite timeout - no checking done:
+ */
+unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120;
+
+unsigned long __read_mostly sysctl_hung_task_warnings = 10;
+
+static int __read_mostly did_panic;
+
+static struct task_struct *watchdog_task;
+
+/*
+ * Should we panic (and reboot, if panic_timeout= is set) when a
+ * hung task is detected:
+ */
+unsigned int __read_mostly sysctl_hung_task_panic =
+ CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE;
+
+static int __init hung_task_panic_setup(char *str)
+{
+ sysctl_hung_task_panic = simple_strtoul(str, NULL, 0);
+
+ return 1;
+}
+__setup("hung_task_panic=", hung_task_panic_setup);
+
+static int
+hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr)
+{
+ did_panic = 1;
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block panic_block = {
+ .notifier_call = hung_task_panic,
+};
+
+static void check_hung_task(struct task_struct *t, unsigned long timeout)
+{
+ unsigned long switch_count = t->nvcsw + t->nivcsw;
+
+ /*
+ * Ensure the task is not frozen.
+ * Also, when a freshly created task is scheduled once, changes
+ * its state to TASK_UNINTERRUPTIBLE without having ever been
+ * switched out once, it musn't be checked.
+ */
+ if (unlikely(t->flags & PF_FROZEN || !switch_count))
+ return;
+
+ if (switch_count != t->last_switch_count) {
+ t->last_switch_count = switch_count;
+ return;
+ }
+ if (!sysctl_hung_task_warnings)
+ return;
+ sysctl_hung_task_warnings--;
+
+ /*
+ * Ok, the task did not get scheduled for more than 2 minutes,
+ * complain:
+ */
+ printk(KERN_ERR "INFO: task %s:%d blocked for more than "
+ "%ld seconds.\n", t->comm, t->pid, timeout);
+ printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
+ " disables this message.\n");
+ sched_show_task(t);
+ __debug_show_held_locks(t);
+
+ touch_nmi_watchdog();
+
+ if (sysctl_hung_task_panic)
+ panic("hung_task: blocked tasks");
+}
+
+/*
+ * To avoid extending the RCU grace period for an unbounded amount of time,
+ * periodically exit the critical section and enter a new one.
+ *
+ * For preemptible RCU it is sufficient to call rcu_read_unlock in order
+ * exit the grace period. For classic RCU, a reschedule is required.
+ */
+static void rcu_lock_break(struct task_struct *g, struct task_struct *t)
+{
+ get_task_struct(g);
+ get_task_struct(t);
+ rcu_read_unlock();
+ cond_resched();
+ rcu_read_lock();
+ put_task_struct(t);
+ put_task_struct(g);
+}
+
+/*
+ * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
+ * a really long time (120 seconds). If that happens, print out
+ * a warning.
+ */
+static void check_hung_uninterruptible_tasks(unsigned long timeout)
+{
+ int max_count = sysctl_hung_task_check_count;
+ int batch_count = HUNG_TASK_BATCHING;
+ struct task_struct *g, *t;
+
+ /*
+ * If the system crashed already then all bets are off,
+ * do not report extra hung tasks:
+ */
+ if (test_taint(TAINT_DIE) || did_panic)
+ return;
+
+ rcu_read_lock();
+ do_each_thread(g, t) {
+ if (!--max_count)
+ goto unlock;
+ if (!--batch_count) {
+ batch_count = HUNG_TASK_BATCHING;
+ rcu_lock_break(g, t);
+ /* Exit if t or g was unhashed during refresh. */
+ if (t->state == TASK_DEAD || g->state == TASK_DEAD)
+ goto unlock;
+ }
+ /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
+ if (t->state == TASK_UNINTERRUPTIBLE)
+ check_hung_task(t, timeout);
+ } while_each_thread(g, t);
+ unlock:
+ rcu_read_unlock();
+}
+
+static unsigned long timeout_jiffies(unsigned long timeout)
+{
+ /* timeout of 0 will disable the watchdog */
+ return timeout ? timeout * HZ : MAX_SCHEDULE_TIMEOUT;
+}
+
+/*
+ * Process updating of timeout sysctl
+ */
+int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
+ struct file *filp, void __user *buffer,
+ size_t *lenp, loff_t *ppos)
+{
+ int ret;
+
+ ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
+
+ if (ret || !write)
+ goto out;
+
+ wake_up_process(watchdog_task);
+
+ out:
+ return ret;
+}
+
+/*
+ * kthread which checks for tasks stuck in D state
+ */
+static int watchdog(void *dummy)
+{
+ set_user_nice(current, 0);
+
+ for ( ; ; ) {
+ unsigned long timeout = sysctl_hung_task_timeout_secs;
+
+ while (schedule_timeout_interruptible(timeout_jiffies(timeout)))
+ timeout = sysctl_hung_task_timeout_secs;
+
+ check_hung_uninterruptible_tasks(timeout);
+ }
+
+ return 0;
+}
+
+static int __init hung_task_init(void)
+{
+ atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
+ watchdog_task = kthread_run(watchdog, NULL, "khungtaskd");
+
+ return 0;
+}
+
+module_init(hung_task_init);
diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c
index 38a25b8d8bff..d06df9c41cba 100644
--- a/kernel/irq/devres.c
+++ b/kernel/irq/devres.c
@@ -26,10 +26,12 @@ static int devm_irq_match(struct device *dev, void *res, void *data)
}
/**
- * devm_request_irq - allocate an interrupt line for a managed device
+ * devm_request_threaded_irq - allocate an interrupt line for a managed device
* @dev: device to request interrupt for
* @irq: Interrupt line to allocate
* @handler: Function to be called when the IRQ occurs
+ * @thread_fn: function to be called in a threaded interrupt context. NULL
+ * for devices which handle everything in @handler
* @irqflags: Interrupt type flags
* @devname: An ascii name for the claiming device
* @dev_id: A cookie passed back to the handler function
@@ -42,9 +44,10 @@ static int devm_irq_match(struct device *dev, void *res, void *data)
* If an IRQ allocated with this function needs to be freed
* separately, dev_free_irq() must be used.
*/
-int devm_request_irq(struct device *dev, unsigned int irq,
- irq_handler_t handler, unsigned long irqflags,
- const char *devname, void *dev_id)
+int devm_request_threaded_irq(struct device *dev, unsigned int irq,
+ irq_handler_t handler, irq_handler_t thread_fn,
+ unsigned long irqflags, const char *devname,
+ void *dev_id)
{
struct irq_devres *dr;
int rc;
@@ -54,7 +57,8 @@ int devm_request_irq(struct device *dev, unsigned int irq,
if (!dr)
return -ENOMEM;
- rc = request_irq(irq, handler, irqflags, devname, dev_id);
+ rc = request_threaded_irq(irq, handler, thread_fn, irqflags, devname,
+ dev_id);
if (rc) {
devres_free(dr);
return rc;
@@ -66,7 +70,7 @@ int devm_request_irq(struct device *dev, unsigned int irq,
return 0;
}
-EXPORT_SYMBOL(devm_request_irq);
+EXPORT_SYMBOL(devm_request_threaded_irq);
/**
* devm_free_irq - free an interrupt
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 343acecae629..d82142be8dd2 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -339,6 +339,15 @@ irqreturn_t no_action(int cpl, void *dev_id)
return IRQ_NONE;
}
+static void warn_no_thread(unsigned int irq, struct irqaction *action)
+{
+ if (test_and_set_bit(IRQTF_WARNED, &action->thread_flags))
+ return;
+
+ printk(KERN_WARNING "IRQ %d device %s returned IRQ_WAKE_THREAD "
+ "but no thread function available.", irq, action->name);
+}
+
DEFINE_TRACE(irq_handler_entry);
DEFINE_TRACE(irq_handler_exit);
@@ -363,8 +372,47 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
trace_irq_handler_entry(irq, action);
ret = action->handler(irq, action->dev_id);
trace_irq_handler_exit(irq, action, ret);
- if (ret == IRQ_HANDLED)
+
+ switch (ret) {
+ case IRQ_WAKE_THREAD:
+ /*
+ * Set result to handled so the spurious check
+ * does not trigger.
+ */
+ ret = IRQ_HANDLED;
+
+ /*
+ * Catch drivers which return WAKE_THREAD but
+ * did not set up a thread function
+ */
+ if (unlikely(!action->thread_fn)) {
+ warn_no_thread(irq, action);
+ break;
+ }
+
+ /*
+ * Wake up the handler thread for this
+ * action. In case the thread crashed and was
+ * killed we just pretend that we handled the
+ * interrupt. The hardirq handler above has
+ * disabled the device interrupt, so no irq
+ * storm is lurking.
+ */
+ if (likely(!test_bit(IRQTF_DIED,
+ &action->thread_flags))) {
+ set_bit(IRQTF_RUNTHREAD, &action->thread_flags);
+ wake_up_process(action->thread);
+ }
+
+ /* Fall through to add to randomness */
+ case IRQ_HANDLED:
status |= action->flags;
+ break;
+
+ default:
+ break;
+ }
+
retval |= ret;
action = action->next;
} while (action);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 1516ab77355c..7e2e7dd4cd2f 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -8,16 +8,15 @@
*/
#include <linux/irq.h>
+#include <linux/kthread.h>
#include <linux/module.h>
#include <linux/random.h>
#include <linux/interrupt.h>
#include <linux/slab.h>
+#include <linux/sched.h>
#include "internals.h"
-#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS)
-cpumask_var_t irq_default_affinity;
-
/**
* synchronize_irq - wait for pending IRQ handlers (on other CPUs)
* @irq: interrupt number to wait for
@@ -53,9 +52,18 @@ void synchronize_irq(unsigned int irq)
/* Oops, that failed? */
} while (status & IRQ_INPROGRESS);
+
+ /*
+ * We made sure that no hardirq handler is running. Now verify
+ * that no threaded handlers are active.
+ */
+ wait_event(desc->wait_for_threads, !atomic_read(&desc->threads_active));
}
EXPORT_SYMBOL(synchronize_irq);
+#ifdef CONFIG_SMP
+cpumask_var_t irq_default_affinity;
+
/**
* irq_can_set_affinity - Check if the affinity of a given irq can be set
* @irq: Interrupt to check
@@ -72,6 +80,18 @@ int irq_can_set_affinity(unsigned int irq)
return 1;
}
+static void
+irq_set_thread_affinity(struct irq_desc *desc, const struct cpumask *cpumask)
+{
+ struct irqaction *action = desc->action;
+
+ while (action) {
+ if (action->thread)
+ set_cpus_allowed_ptr(action->thread, cpumask);
+ action = action->next;
+ }
+}
+
/**
* irq_set_affinity - Set the irq affinity of a given irq
* @irq: Interrupt to set affinity
@@ -100,6 +120,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
cpumask_copy(desc->affinity, cpumask);
desc->chip->set_affinity(irq, cpumask);
#endif
+ irq_set_thread_affinity(desc, cpumask);
desc->status |= IRQ_AFFINITY_SET;
spin_unlock_irqrestore(&desc->lock, flags);
return 0;
@@ -150,6 +171,8 @@ int irq_select_affinity_usr(unsigned int irq)
spin_lock_irqsave(&desc->lock, flags);
ret = setup_affinity(irq, desc);
+ if (!ret)
+ irq_set_thread_affinity(desc, desc->affinity);
spin_unlock_irqrestore(&desc->lock, flags);
return ret;
@@ -401,6 +424,90 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
return ret;
}
+static int irq_wait_for_interrupt(struct irqaction *action)
+{
+ while (!kthread_should_stop()) {
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ if (test_and_clear_bit(IRQTF_RUNTHREAD,
+ &action->thread_flags)) {
+ __set_current_state(TASK_RUNNING);
+ return 0;
+ }
+ schedule();
+ }
+ return -1;
+}
+
+/*
+ * Interrupt handler thread
+ */
+static int irq_thread(void *data)
+{
+ struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2, };
+ struct irqaction *action = data;
+ struct irq_desc *desc = irq_to_desc(action->irq);
+ int wake;
+
+ sched_setscheduler(current, SCHED_FIFO, &param);
+ current->irqaction = action;
+
+ while (!irq_wait_for_interrupt(action)) {
+
+ atomic_inc(&desc->threads_active);
+
+ spin_lock_irq(&desc->lock);
+ if (unlikely(desc->status & IRQ_DISABLED)) {
+ /*
+ * CHECKME: We might need a dedicated
+ * IRQ_THREAD_PENDING flag here, which
+ * retriggers the thread in check_irq_resend()
+ * but AFAICT IRQ_PENDING should be fine as it
+ * retriggers the interrupt itself --- tglx
+ */
+ desc->status |= IRQ_PENDING;
+ spin_unlock_irq(&desc->lock);
+ } else {
+ spin_unlock_irq(&desc->lock);
+
+ action->thread_fn(action->irq, action->dev_id);
+ }
+
+ wake = atomic_dec_and_test(&desc->threads_active);
+
+ if (wake && waitqueue_active(&desc->wait_for_threads))
+ wake_up(&desc->wait_for_threads);
+ }
+
+ /*
+ * Clear irqaction. Otherwise exit_irq_thread() would make
+ * fuzz about an active irq thread going into nirvana.
+ */
+ current->irqaction = NULL;
+ return 0;
+}
+
+/*
+ * Called from do_exit()
+ */
+void exit_irq_thread(void)
+{
+ struct task_struct *tsk = current;
+
+ if (!tsk->irqaction)
+ return;
+
+ printk(KERN_ERR
+ "exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n",
+ tsk->comm ? tsk->comm : "", tsk->pid, tsk->irqaction->irq);
+
+ /*
+ * Set the THREAD DIED flag to prevent further wakeups of the
+ * soon to be gone threaded handler.
+ */
+ set_bit(IRQTF_DIED, &tsk->irqaction->flags);
+}
+
/*
* Internal function to register an irqaction - typically used to
* allocate special interrupts that are part of the architecture.
@@ -437,6 +544,26 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
}
/*
+ * Threaded handler ?
+ */
+ if (new->thread_fn) {
+ struct task_struct *t;
+
+ t = kthread_create(irq_thread, new, "irq/%d-%s", irq,
+ new->name);
+ if (IS_ERR(t))
+ return PTR_ERR(t);
+ /*
+ * We keep the reference to the task struct even if
+ * the thread dies to avoid that the interrupt code
+ * references an already freed task_struct.
+ */
+ get_task_struct(t);
+ new->thread = t;
+ wake_up_process(t);
+ }
+
+ /*
* The following block of code has to be executed atomically
*/
spin_lock_irqsave(&desc->lock, flags);
@@ -473,15 +600,15 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
if (!shared) {
irq_chip_set_defaults(desc->chip);
+ init_waitqueue_head(&desc->wait_for_threads);
+
/* Setup the type (level, edge polarity) if configured: */
if (new->flags & IRQF_TRIGGER_MASK) {
ret = __irq_set_trigger(desc, irq,
new->flags & IRQF_TRIGGER_MASK);
- if (ret) {
- spin_unlock_irqrestore(&desc->lock, flags);
- return ret;
- }
+ if (ret)
+ goto out_thread;
} else
compat_irq_chip_set_default_handler(desc);
#if defined(CONFIG_IRQ_PER_CPU)
@@ -549,8 +676,19 @@ mismatch:
dump_stack();
}
#endif
+ ret = -EBUSY;
+
+out_thread:
spin_unlock_irqrestore(&desc->lock, flags);
- return -EBUSY;
+ if (new->thread) {
+ struct task_struct *t = new->thread;
+
+ new->thread = NULL;
+ if (likely(!test_bit(IRQTF_DIED, &new->thread_flags)))
+ kthread_stop(t);
+ put_task_struct(t);
+ }
+ return ret;
}
/**
@@ -576,6 +714,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
{
struct irq_desc *desc = irq_to_desc(irq);
struct irqaction *action, **action_ptr;
+ struct task_struct *irqthread;
unsigned long flags;
WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq);
@@ -622,6 +761,10 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
else
desc->chip->disable(irq);
}
+
+ irqthread = action->thread;
+ action->thread = NULL;
+
spin_unlock_irqrestore(&desc->lock, flags);
unregister_handler_proc(irq, action);
@@ -629,6 +772,12 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
/* Make sure it's not being used on another CPU: */
synchronize_irq(irq);
+ if (irqthread) {
+ if (!test_bit(IRQTF_DIED, &action->thread_flags))
+ kthread_stop(irqthread);
+ put_task_struct(irqthread);
+ }
+
#ifdef CONFIG_DEBUG_SHIRQ
/*
* It's a shared IRQ -- the driver ought to be prepared for an IRQ
@@ -681,9 +830,12 @@ void free_irq(unsigned int irq, void *dev_id)
EXPORT_SYMBOL(free_irq);
/**
- * request_irq - allocate an interrupt line
+ * request_threaded_irq - allocate an interrupt line
* @irq: Interrupt line to allocate
- * @handler: Function to be called when the IRQ occurs
+ * @handler: Function to be called when the IRQ occurs.
+ * Primary handler for threaded interrupts
+ * @thread_fn: Function called from the irq handler thread
+ * If NULL, no irq thread is created
* @irqflags: Interrupt type flags
* @devname: An ascii name for the claiming device
* @dev_id: A cookie passed back to the handler function
@@ -695,6 +847,15 @@ EXPORT_SYMBOL(free_irq);
* raises, you must take care both to initialise your hardware
* and to set up the interrupt handler in the right order.
*
+ * If you want to set up a threaded irq handler for your device
+ * then you need to supply @handler and @thread_fn. @handler ist
+ * still called in hard interrupt context and has to check
+ * whether the interrupt originates from the device. If yes it
+ * needs to disable the interrupt on the device and return
+ * IRQ_THREAD_WAKE which will wake up the handler thread and run
+ * @thread_fn. This split handler design is necessary to support
+ * shared interrupts.
+ *
* Dev_id must be globally unique. Normally the address of the
* device data structure is used as the cookie. Since the handler
* receives this value it makes sense to use it.
@@ -710,8 +871,9 @@ EXPORT_SYMBOL(free_irq);
* IRQF_TRIGGER_* Specify active edge(s) or level
*
*/
-int request_irq(unsigned int irq, irq_handler_t handler,
- unsigned long irqflags, const char *devname, void *dev_id)
+int request_threaded_irq(unsigned int irq, irq_handler_t handler,
+ irq_handler_t thread_fn, unsigned long irqflags,
+ const char *devname, void *dev_id)
{
struct irqaction *action;
struct irq_desc *desc;
@@ -759,6 +921,7 @@ int request_irq(unsigned int irq, irq_handler_t handler,
return -ENOMEM;
action->handler = handler;
+ action->thread_fn = thread_fn;
action->flags = irqflags;
action->name = devname;
action->dev_id = dev_id;
@@ -788,4 +951,4 @@ int request_irq(unsigned int irq, irq_handler_t handler,
#endif
return retval;
}
-EXPORT_SYMBOL(request_irq);
+EXPORT_SYMBOL(request_threaded_irq);
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 85d5a2455103..88796c330838 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -166,97 +166,11 @@ void softlockup_tick(void)
}
/*
- * Have a reasonable limit on the number of tasks checked:
- */
-unsigned long __read_mostly sysctl_hung_task_check_count = 1024;
-
-/*
- * Zero means infinite timeout - no checking done:
- */
-unsigned long __read_mostly sysctl_hung_task_timeout_secs = 480;
-
-unsigned long __read_mostly sysctl_hung_task_warnings = 10;
-
-/*
- * Only do the hung-tasks check on one CPU:
- */
-static int check_cpu __read_mostly = -1;
-
-static void check_hung_task(struct task_struct *t, unsigned long now)
-{
- unsigned long switch_count = t->nvcsw + t->nivcsw;
-
- if (t->flags & PF_FROZEN)
- return;
-
- if (switch_count != t->last_switch_count || !t->last_switch_timestamp) {
- t->last_switch_count = switch_count;
- t->last_switch_timestamp = now;
- return;
- }
- if ((long)(now - t->last_switch_timestamp) <
- sysctl_hung_task_timeout_secs)
- return;
- if (!sysctl_hung_task_warnings)
- return;
- sysctl_hung_task_warnings--;
-
- /*
- * Ok, the task did not get scheduled for more than 2 minutes,
- * complain:
- */
- printk(KERN_ERR "INFO: task %s:%d blocked for more than "
- "%ld seconds.\n", t->comm, t->pid,
- sysctl_hung_task_timeout_secs);
- printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
- " disables this message.\n");
- sched_show_task(t);
- __debug_show_held_locks(t);
-
- t->last_switch_timestamp = now;
- touch_nmi_watchdog();
-
- if (softlockup_panic)
- panic("softlockup: blocked tasks");
-}
-
-/*
- * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
- * a really long time (120 seconds). If that happens, print out
- * a warning.
- */
-static void check_hung_uninterruptible_tasks(int this_cpu)
-{
- int max_count = sysctl_hung_task_check_count;
- unsigned long now = get_timestamp(this_cpu);
- struct task_struct *g, *t;
-
- /*
- * If the system crashed already then all bets are off,
- * do not report extra hung tasks:
- */
- if (test_taint(TAINT_DIE) || did_panic)
- return;
-
- read_lock(&tasklist_lock);
- do_each_thread(g, t) {
- if (!--max_count)
- goto unlock;
- /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
- if (t->state == TASK_UNINTERRUPTIBLE)
- check_hung_task(t, now);
- } while_each_thread(g, t);
- unlock:
- read_unlock(&tasklist_lock);
-}
-
-/*
* The watchdog thread - runs every second and touches the timestamp.
*/
static int watchdog(void *__bind_cpu)
{
struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
- int this_cpu = (long)__bind_cpu;
sched_setscheduler(current, SCHED_FIFO, &param);
@@ -276,11 +190,6 @@ static int watchdog(void *__bind_cpu)
if (kthread_should_stop())
break;
- if (this_cpu == check_cpu) {
- if (sysctl_hung_task_timeout_secs)
- check_hung_uninterruptible_tasks(this_cpu);
- }
-
set_current_state(TASK_INTERRUPTIBLE);
}
__set_current_state(TASK_RUNNING);
@@ -312,18 +221,9 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
break;
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
- check_cpu = cpumask_any(cpu_online_mask);
wake_up_process(per_cpu(watchdog_task, hotcpu));
break;
#ifdef CONFIG_HOTPLUG_CPU
- case CPU_DOWN_PREPARE:
- case CPU_DOWN_PREPARE_FROZEN:
- if (hotcpu == check_cpu) {
- /* Pick any other online cpu. */
- check_cpu = cpumask_any_but(cpu_online_mask, hotcpu);
- }
- break;
-
case CPU_UP_CANCELED:
case CPU_UP_CANCELED_FROZEN:
if (!per_cpu(watchdog_task, hotcpu))
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 72eb1a41dcab..4286b62b34a0 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -814,6 +814,19 @@ static struct ctl_table kern_table[] = {
.extra1 = &neg_one,
.extra2 = &sixty,
},
+#endif
+#ifdef CONFIG_DETECT_HUNG_TASK
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "hung_task_panic",
+ .data = &sysctl_hung_task_panic,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
{
.ctl_name = CTL_UNNUMBERED,
.procname = "hung_task_check_count",
@@ -829,7 +842,7 @@ static struct ctl_table kern_table[] = {
.data = &sysctl_hung_task_timeout_secs,
.maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = &proc_doulongvec_minmax,
+ .proc_handler = &proc_dohung_task_timeout_secs,
.strategy = &sysctl_intvec,
},
{
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 947c5b3f90c4..b32ff446c3fb 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -327,10 +327,10 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
char *msg;
struct blk_trace *bt;
- if (count > BLK_TN_MAX_MSG)
+ if (count >= BLK_TN_MAX_MSG)
return -EINVAL;
- msg = kmalloc(count, GFP_KERNEL);
+ msg = kmalloc(count + 1, GFP_KERNEL);
if (msg == NULL)
return -ENOMEM;
@@ -339,6 +339,7 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
return -EFAULT;
}
+ msg[count] = '\0';
bt = filp->private_data;
__trace_note_message(bt, "%s", msg);
kfree(msg);
@@ -642,7 +643,7 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
if (blk_pc_request(rq)) {
what |= BLK_TC_ACT(BLK_TC_PC);
__blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors,
- sizeof(rq->cmd), rq->cmd);
+ rq->cmd_len, rq->cmd);
} else {
what |= BLK_TC_ACT(BLK_TC_FS);
__blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a0174a40c563..9d28476a9851 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -30,6 +30,7 @@
#include <linux/percpu.h>
#include <linux/splice.h>
#include <linux/kdebug.h>
+#include <linux/string.h>
#include <linux/ctype.h>
#include <linux/init.h>
#include <linux/poll.h>
@@ -147,8 +148,7 @@ static int __init set_ftrace_dump_on_oops(char *str)
}
__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
-long
-ns2usecs(cycle_t nsec)
+unsigned long long ns2usecs(cycle_t nsec)
{
nsec += 500;
do_div(nsec, 1000);
@@ -1632,7 +1632,11 @@ static void test_cpu_buff_start(struct trace_iterator *iter)
return;
cpumask_set_cpu(iter->cpu, iter->started);
- trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu);
+
+ /* Don't print started cpu buffer for the first entry of the trace */
+ if (iter->idx > 1)
+ trace_seq_printf(s, "##### CPU %u buffer started ####\n",
+ iter->cpu);
}
static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
@@ -1867,6 +1871,11 @@ __tracing_open(struct inode *inode, struct file *file)
if (current_trace)
*iter->trace = *current_trace;
+ if (!alloc_cpumask_var(&iter->started, GFP_KERNEL))
+ goto fail;
+
+ cpumask_clear(iter->started);
+
if (current_trace && current_trace->print_max)
iter->tr = &max_tr;
else
@@ -1917,6 +1926,7 @@ __tracing_open(struct inode *inode, struct file *file)
if (iter->buffer_iter[cpu])
ring_buffer_read_finish(iter->buffer_iter[cpu]);
}
+ free_cpumask_var(iter->started);
fail:
mutex_unlock(&trace_types_lock);
kfree(iter->trace);
@@ -1960,6 +1970,7 @@ static int tracing_release(struct inode *inode, struct file *file)
seq_release(inode, file);
mutex_destroy(&iter->mutex);
+ free_cpumask_var(iter->started);
kfree(iter->trace);
kfree(iter);
return 0;
@@ -2358,9 +2369,9 @@ static const char readme_msg[] =
"# mkdir /debug\n"
"# mount -t debugfs nodev /debug\n\n"
"# cat /debug/tracing/available_tracers\n"
- "wakeup preemptirqsoff preemptoff irqsoff ftrace sched_switch none\n\n"
+ "wakeup preemptirqsoff preemptoff irqsoff function sched_switch nop\n\n"
"# cat /debug/tracing/current_tracer\n"
- "none\n"
+ "nop\n"
"# echo sched_switch > /debug/tracing/current_tracer\n"
"# cat /debug/tracing/current_tracer\n"
"sched_switch\n"
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index cbc168f1e43d..e685ac2b2ba1 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -602,7 +602,7 @@ extern int trace_selftest_startup_branch(struct tracer *trace,
#endif /* CONFIG_FTRACE_STARTUP_TEST */
extern void *head_page(struct trace_array_cpu *data);
-extern long ns2usecs(cycle_t nsec);
+extern unsigned long long ns2usecs(cycle_t nsec);
extern int
trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
extern int
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 4d9952d3df50..07a22c33ebf3 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -40,7 +40,7 @@
#undef TRACE_FIELD_ZERO_CHAR
#define TRACE_FIELD_ZERO_CHAR(item) \
- ret = trace_seq_printf(s, "\tfield: char " #item ";\t" \
+ ret = trace_seq_printf(s, "\tfield:char " #item ";\t" \
"offset:%u;\tsize:0;\n", \
(unsigned int)offsetof(typeof(field), item)); \
if (!ret) \
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index d72b9a63b247..64b54a59c55b 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -423,7 +423,7 @@ int trace_print_lat_context(struct trace_iterator *iter)
trace_find_cmdline(entry->pid, comm);
- ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08lx]"
+ ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08llx]"
" %ld.%03ldms (+%ld.%03ldms): ", comm,
entry->pid, iter->cpu, entry->flags,
entry->preempt_count, iter->idx,
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index de35f200abd3..9117cea6f1ae 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -62,6 +62,9 @@ probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success)
pc = preempt_count();
tracing_record_cmdline(current);
+ if (sched_stopped)
+ return;
+
local_irq_save(flags);
cpu = raw_smp_processor_id();
data = ctx_trace->data[cpu];
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 3c5ad6b2ec84..5bc00e8f153e 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -154,7 +154,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
if (unlikely(!tracer_enabled || next != wakeup_task))
goto out_unlock;
- trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
+ trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc);
tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc);
/*
@@ -257,6 +257,12 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
data = wakeup_trace->data[wakeup_cpu];
data->preempt_timestamp = ftrace_now(cpu);
tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc);
+
+ /*
+ * We must be careful in using CALLER_ADDR2. But since wake_up
+ * is not called by an assembly function (where as schedule is)
+ * it should be safe to use it here.
+ */
trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
out_locked:
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 9638d99644af..c6e854f215fa 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -186,6 +186,44 @@ config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
+config DETECT_HUNG_TASK
+ bool "Detect Hung Tasks"
+ depends on DEBUG_KERNEL
+ default DETECT_SOFTLOCKUP
+ help
+ Say Y here to enable the kernel to detect "hung tasks",
+ which are bugs that cause the task to be stuck in
+ uninterruptible "D" state indefinitiley.
+
+ When a hung task is detected, the kernel will print the
+ current stack trace (which you should report), but the
+ task will stay in uninterruptible state. If lockdep is
+ enabled then all held locks will also be reported. This
+ feature has negligible overhead.
+
+config BOOTPARAM_HUNG_TASK_PANIC
+ bool "Panic (Reboot) On Hung Tasks"
+ depends on DETECT_HUNG_TASK
+ help
+ Say Y here to enable the kernel to panic on "hung tasks",
+ which are bugs that cause the kernel to leave a task stuck
+ in uninterruptible "D" state.
+
+ The panic can be used in combination with panic_timeout,
+ to cause the system to reboot automatically after a
+ hung task has been detected. This feature is useful for
+ high-availability systems that have uptime guarantees and
+ where a hung tasks must be resolved ASAP.
+
+ Say N if unsure.
+
+config BOOTPARAM_HUNG_TASK_PANIC_VALUE
+ int
+ depends on DETECT_HUNG_TASK
+ range 0 1
+ default 0 if !BOOTPARAM_HUNG_TASK_PANIC
+ default 1 if BOOTPARAM_HUNG_TASK_PANIC
+
config SCHED_DEBUG
bool "Collect scheduler debugging info"
depends on DEBUG_KERNEL && PROC_FS
diff --git a/scripts/trace/power.pl b/scripts/tracing/power.pl
index 4f729b3501e0..4f729b3501e0 100644
--- a/scripts/trace/power.pl
+++ b/scripts/tracing/power.pl