From 30e3ce6dcbe3fc29c343b17e768b07d4a795de21 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 29 Apr 2012 22:52:52 +0200 Subject: PM / Sleep: Change wakeup source statistics to follow Android Wakeup statistics used by Android are slightly different from what we have in wakeup sources at the moment and there aren't any known users of those statistics other than Android, so modify them to make it easier for Android to switch to wakeup sources. This removes the struct wakeup_source's hit_cout field, which is very rough and therefore not very useful, and adds two new fields, wakeup_count and expire_count. The first one tracks how many times the wakeup source is activated with events_check_enabled set (which roughly corresponds to the situations when a system power transition to a sleep state is in progress and would be aborted by this wakeup source if it were the only active one at that time) and the second one is the number of times the wakeup source has been activated with a timeout that expired. Additionally, the last_time field is now updated when the wakeup source is deactivated too (previously it was only updated during the wakeup source's activation), which seems to be what Android does with the analogous counter for wakelocks. Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman --- include/linux/pm_wakeup.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index d9f05113e5fb..5285317a612a 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -33,12 +33,14 @@ * * @total_time: Total time this wakeup source has been active. * @max_time: Maximum time this wakeup source has been continuously active. - * @last_time: Monotonic clock when the wakeup source's was activated last time. + * @last_time: Monotonic clock when the wakeup source's was touched last time. * @event_count: Number of signaled wakeup events. * @active_count: Number of times the wakeup sorce was activated. * @relax_count: Number of times the wakeup sorce was deactivated. - * @hit_count: Number of times the wakeup sorce might abort system suspend. + * @expire_count: Number of times the wakeup source's timeout has expired. + * @wakeup_count: Number of times the wakeup source might abort suspend. * @active: Status of the wakeup source. + * @has_timeout: The wakeup source has been activated with a timeout. */ struct wakeup_source { const char *name; @@ -52,8 +54,9 @@ struct wakeup_source { unsigned long event_count; unsigned long active_count; unsigned long relax_count; - unsigned long hit_count; - unsigned int active:1; + unsigned long expire_count; + unsigned long wakeup_count; + bool active:1; }; #ifdef CONFIG_PM_SLEEP -- cgit v1.2.3 From 7483b4a4d9abf9dcf1ffe6e805ead2847ec3264e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 29 Apr 2012 22:53:22 +0200 Subject: PM / Sleep: Implement opportunistic sleep, v2 Introduce a mechanism by which the kernel can trigger global transitions to a sleep state chosen by user space if there are no active wakeup sources. It consists of a new sysfs attribute, /sys/power/autosleep, that can be written one of the strings returned by reads from /sys/power/state, an ordered workqueue and a work item carrying out the "suspend" operations. If a string representing the system's sleep state is written to /sys/power/autosleep, the work item triggering transitions to that state is queued up and it requeues itself after every execution until user space writes "off" to /sys/power/autosleep. That work item enables the detection of wakeup events using the functions already defined in drivers/base/power/wakeup.c (with one small modification) and calls either pm_suspend(), or hibernate() to put the system into a sleep state. If a wakeup event is reported while the transition is in progress, it will abort the transition and the "system suspend" work item will be queued up again. Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman Reviewed-by: NeilBrown --- Documentation/ABI/testing/sysfs-power | 17 +++++ drivers/base/power/wakeup.c | 34 +++++----- include/linux/suspend.h | 13 +++- kernel/power/Kconfig | 8 +++ kernel/power/Makefile | 1 + kernel/power/autosleep.c | 123 ++++++++++++++++++++++++++++++++++ kernel/power/main.c | 119 ++++++++++++++++++++++++++------ kernel/power/power.h | 18 +++++ 8 files changed, 298 insertions(+), 35 deletions(-) create mode 100644 kernel/power/autosleep.c (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power index b464d12761ba..237c735db6c9 100644 --- a/Documentation/ABI/testing/sysfs-power +++ b/Documentation/ABI/testing/sysfs-power @@ -172,3 +172,20 @@ Description: Reading from this file will display the current value, which is set to 1 MB by default. + +What: /sys/power/autosleep +Date: April 2012 +Contact: Rafael J. Wysocki +Description: + The /sys/power/autosleep file can be written one of the strings + returned by reads from /sys/power/state. If that happens, a + work item attempting to trigger a transition of the system to + the sleep state represented by that string is queued up. This + attempt will only succeed if there are no active wakeup sources + in the system at that time. After every execution, regardless + of whether or not the attempt to put the system to sleep has + succeeded, the work item requeues itself until user space + writes "off" to /sys/power/autosleep. + + Reading from this file causes the last string successfully + written to it to be returned. diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 1132799421cd..cf1706df7610 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -660,29 +660,33 @@ bool pm_wakeup_pending(void) /** * pm_get_wakeup_count - Read the number of registered wakeup events. * @count: Address to store the value at. + * @block: Whether or not to block. * - * Store the number of registered wakeup events at the address in @count. Block - * if the current number of wakeup events being processed is nonzero. + * Store the number of registered wakeup events at the address in @count. If + * @block is set, block until the current number of wakeup events being + * processed is zero. * - * Return 'false' if the wait for the number of wakeup events being processed to - * drop down to zero has been interrupted by a signal (and the current number - * of wakeup events being processed is still nonzero). Otherwise return 'true'. + * Return 'false' if the current number of wakeup events being processed is + * nonzero. Otherwise return 'true'. */ -bool pm_get_wakeup_count(unsigned int *count) +bool pm_get_wakeup_count(unsigned int *count, bool block) { unsigned int cnt, inpr; - DEFINE_WAIT(wait); - for (;;) { - prepare_to_wait(&wakeup_count_wait_queue, &wait, - TASK_INTERRUPTIBLE); - split_counters(&cnt, &inpr); - if (inpr == 0 || signal_pending(current)) - break; + if (block) { + DEFINE_WAIT(wait); + + for (;;) { + prepare_to_wait(&wakeup_count_wait_queue, &wait, + TASK_INTERRUPTIBLE); + split_counters(&cnt, &inpr); + if (inpr == 0 || signal_pending(current)) + break; - schedule(); + schedule(); + } + finish_wait(&wakeup_count_wait_queue, &wait); } - finish_wait(&wakeup_count_wait_queue, &wait); split_counters(&cnt, &inpr); *count = cnt; diff --git a/include/linux/suspend.h b/include/linux/suspend.h index ac1c114c499d..76b7ec7d3a81 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -356,7 +356,7 @@ extern int unregister_pm_notifier(struct notifier_block *nb); extern bool events_check_enabled; extern bool pm_wakeup_pending(void); -extern bool pm_get_wakeup_count(unsigned int *count); +extern bool pm_get_wakeup_count(unsigned int *count, bool block); extern bool pm_save_wakeup_count(unsigned int count); static inline void lock_system_sleep(void) @@ -407,6 +407,17 @@ static inline void unlock_system_sleep(void) {} #endif /* !CONFIG_PM_SLEEP */ +#ifdef CONFIG_PM_AUTOSLEEP + +/* kernel/power/autosleep.c */ +void queue_up_suspend_work(void); + +#else /* !CONFIG_PM_AUTOSLEEP */ + +static inline void queue_up_suspend_work(void) {} + +#endif /* !CONFIG_PM_AUTOSLEEP */ + #ifdef CONFIG_ARCH_SAVE_PAGE_KEYS /* * The ARCH_SAVE_PAGE_KEYS functions can be used by an architecture diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index deb5461e3216..67947083f842 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -103,6 +103,14 @@ config PM_SLEEP_SMP select HOTPLUG select HOTPLUG_CPU +config PM_AUTOSLEEP + bool "Opportunistic sleep" + depends on PM_SLEEP + default n + ---help--- + Allow the kernel to trigger a system transition into a global sleep + state automatically whenever there are no active wakeup sources. + config PM_RUNTIME bool "Run-time PM core functionality" depends on !IA64_HP_SIM diff --git a/kernel/power/Makefile b/kernel/power/Makefile index 66d808ec5252..010b2f7e148c 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -9,5 +9,6 @@ obj-$(CONFIG_SUSPEND) += suspend.o obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \ block_io.o +obj-$(CONFIG_PM_AUTOSLEEP) += autosleep.o obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o diff --git a/kernel/power/autosleep.c b/kernel/power/autosleep.c new file mode 100644 index 000000000000..42348e3589d3 --- /dev/null +++ b/kernel/power/autosleep.c @@ -0,0 +1,123 @@ +/* + * kernel/power/autosleep.c + * + * Opportunistic sleep support. + * + * Copyright (C) 2012 Rafael J. Wysocki + */ + +#include +#include +#include + +#include "power.h" + +static suspend_state_t autosleep_state; +static struct workqueue_struct *autosleep_wq; +/* + * Note: it is only safe to mutex_lock(&autosleep_lock) if a wakeup_source + * is active, otherwise a deadlock with try_to_suspend() is possible. + * Alternatively mutex_lock_interruptible() can be used. This will then fail + * if an auto_sleep cycle tries to freeze processes. + */ +static DEFINE_MUTEX(autosleep_lock); +static struct wakeup_source *autosleep_ws; + +static void try_to_suspend(struct work_struct *work) +{ + unsigned int initial_count, final_count; + + if (!pm_get_wakeup_count(&initial_count, true)) + goto out; + + mutex_lock(&autosleep_lock); + + if (!pm_save_wakeup_count(initial_count)) { + mutex_unlock(&autosleep_lock); + goto out; + } + + if (autosleep_state == PM_SUSPEND_ON) { + mutex_unlock(&autosleep_lock); + return; + } + if (autosleep_state >= PM_SUSPEND_MAX) + hibernate(); + else + pm_suspend(autosleep_state); + + mutex_unlock(&autosleep_lock); + + if (!pm_get_wakeup_count(&final_count, false)) + goto out; + + /* + * If the wakeup occured for an unknown reason, wait to prevent the + * system from trying to suspend and waking up in a tight loop. + */ + if (final_count == initial_count) + schedule_timeout_uninterruptible(HZ / 2); + + out: + queue_up_suspend_work(); +} + +static DECLARE_WORK(suspend_work, try_to_suspend); + +void queue_up_suspend_work(void) +{ + if (!work_pending(&suspend_work) && autosleep_state > PM_SUSPEND_ON) + queue_work(autosleep_wq, &suspend_work); +} + +suspend_state_t pm_autosleep_state(void) +{ + return autosleep_state; +} + +int pm_autosleep_lock(void) +{ + return mutex_lock_interruptible(&autosleep_lock); +} + +void pm_autosleep_unlock(void) +{ + mutex_unlock(&autosleep_lock); +} + +int pm_autosleep_set_state(suspend_state_t state) +{ + +#ifndef CONFIG_HIBERNATION + if (state >= PM_SUSPEND_MAX) + return -EINVAL; +#endif + + __pm_stay_awake(autosleep_ws); + + mutex_lock(&autosleep_lock); + + autosleep_state = state; + + __pm_relax(autosleep_ws); + + if (state > PM_SUSPEND_ON) + queue_up_suspend_work(); + + mutex_unlock(&autosleep_lock); + return 0; +} + +int __init pm_autosleep_init(void) +{ + autosleep_ws = wakeup_source_register("autosleep"); + if (!autosleep_ws) + return -ENOMEM; + + autosleep_wq = alloc_ordered_workqueue("autosleep", 0); + if (autosleep_wq) + return 0; + + wakeup_source_unregister(autosleep_ws); + return -ENOMEM; +} diff --git a/kernel/power/main.c b/kernel/power/main.c index 1c12581f1c62..ba6a5645952d 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -269,8 +269,7 @@ static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr, return (s - buf); } -static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr, - const char *buf, size_t n) +static suspend_state_t decode_state(const char *buf, size_t n) { #ifdef CONFIG_SUSPEND suspend_state_t state = PM_SUSPEND_STANDBY; @@ -278,27 +277,48 @@ static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr, #endif char *p; int len; - int error = -EINVAL; p = memchr(buf, '\n', n); len = p ? p - buf : n; - /* First, check if we are requested to hibernate */ - if (len == 4 && !strncmp(buf, "disk", len)) { - error = hibernate(); - goto Exit; - } + /* Check hibernation first. */ + if (len == 4 && !strncmp(buf, "disk", len)) + return PM_SUSPEND_MAX; #ifdef CONFIG_SUSPEND - for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) { - if (*s && len == strlen(*s) && !strncmp(buf, *s, len)) { - error = pm_suspend(state); - break; - } - } + for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) + if (*s && len == strlen(*s) && !strncmp(buf, *s, len)) + return state; #endif - Exit: + return PM_SUSPEND_ON; +} + +static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t n) +{ + suspend_state_t state; + int error; + + error = pm_autosleep_lock(); + if (error) + return error; + + if (pm_autosleep_state() > PM_SUSPEND_ON) { + error = -EBUSY; + goto out; + } + + state = decode_state(buf, n); + if (state < PM_SUSPEND_MAX) + error = pm_suspend(state); + else if (state == PM_SUSPEND_MAX) + error = hibernate(); + else + error = -EINVAL; + + out: + pm_autosleep_unlock(); return error ? error : n; } @@ -339,7 +359,8 @@ static ssize_t wakeup_count_show(struct kobject *kobj, { unsigned int val; - return pm_get_wakeup_count(&val) ? sprintf(buf, "%u\n", val) : -EINTR; + return pm_get_wakeup_count(&val, true) ? + sprintf(buf, "%u\n", val) : -EINTR; } static ssize_t wakeup_count_store(struct kobject *kobj, @@ -347,15 +368,69 @@ static ssize_t wakeup_count_store(struct kobject *kobj, const char *buf, size_t n) { unsigned int val; + int error; + + error = pm_autosleep_lock(); + if (error) + return error; + if (pm_autosleep_state() > PM_SUSPEND_ON) { + error = -EBUSY; + goto out; + } + + error = -EINVAL; if (sscanf(buf, "%u", &val) == 1) { if (pm_save_wakeup_count(val)) - return n; + error = n; } - return -EINVAL; + + out: + pm_autosleep_unlock(); + return error; } power_attr(wakeup_count); + +#ifdef CONFIG_PM_AUTOSLEEP +static ssize_t autosleep_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + suspend_state_t state = pm_autosleep_state(); + + if (state == PM_SUSPEND_ON) + return sprintf(buf, "off\n"); + +#ifdef CONFIG_SUSPEND + if (state < PM_SUSPEND_MAX) + return sprintf(buf, "%s\n", valid_state(state) ? + pm_states[state] : "error"); +#endif +#ifdef CONFIG_HIBERNATION + return sprintf(buf, "disk\n"); +#else + return sprintf(buf, "error"); +#endif +} + +static ssize_t autosleep_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + suspend_state_t state = decode_state(buf, n); + int error; + + if (state == PM_SUSPEND_ON + && !(strncmp(buf, "off", 3) && strncmp(buf, "off\n", 4))) + return -EINVAL; + + error = pm_autosleep_set_state(state); + return error ? error : n; +} + +power_attr(autosleep); +#endif /* CONFIG_PM_AUTOSLEEP */ #endif /* CONFIG_PM_SLEEP */ #ifdef CONFIG_PM_TRACE @@ -409,6 +484,9 @@ static struct attribute * g[] = { #ifdef CONFIG_PM_SLEEP &pm_async_attr.attr, &wakeup_count_attr.attr, +#ifdef CONFIG_PM_AUTOSLEEP + &autosleep_attr.attr, +#endif #ifdef CONFIG_PM_DEBUG &pm_test_attr.attr, #endif @@ -444,7 +522,10 @@ static int __init pm_init(void) power_kobj = kobject_create_and_add("power", NULL); if (!power_kobj) return -ENOMEM; - return sysfs_create_group(power_kobj, &attr_group); + error = sysfs_create_group(power_kobj, &attr_group); + if (error) + return error; + return pm_autosleep_init(); } core_initcall(pm_init); diff --git a/kernel/power/power.h b/kernel/power/power.h index 98f3622d7407..4cf80fa115d9 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -264,3 +264,21 @@ static inline void suspend_thaw_processes(void) { } #endif + +#ifdef CONFIG_PM_AUTOSLEEP + +/* kernel/power/autosleep.c */ +extern int pm_autosleep_init(void); +extern int pm_autosleep_lock(void); +extern void pm_autosleep_unlock(void); +extern suspend_state_t pm_autosleep_state(void); +extern int pm_autosleep_set_state(suspend_state_t state); + +#else /* !CONFIG_PM_AUTOSLEEP */ + +static inline int pm_autosleep_init(void) { return 0; } +static inline int pm_autosleep_lock(void) { return 0; } +static inline void pm_autosleep_unlock(void) {} +static inline suspend_state_t pm_autosleep_state(void) { return PM_SUSPEND_ON; } + +#endif /* !CONFIG_PM_AUTOSLEEP */ -- cgit v1.2.3 From 55850945e872531644f31fefd217d61dd15dcab8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 29 Apr 2012 22:53:32 +0200 Subject: PM / Sleep: Add "prevent autosleep time" statistics to wakeup sources Android uses one wakelock statistics that is only necessary for opportunistic sleep. Namely, the prevent_suspend_time field accumulates the total time the given wakelock has been locked while "automatic suspend" was enabled. Add an analogous field, prevent_sleep_time, to wakeup sources and make it behave in a similar way. Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-devices-power | 11 +++++ drivers/base/power/sysfs.c | 24 +++++++++++ drivers/base/power/wakeup.c | 61 +++++++++++++++++++++++++-- include/linux/pm_wakeup.h | 4 ++ include/linux/suspend.h | 1 + kernel/power/autosleep.c | 6 ++- 6 files changed, 102 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-devices-power b/Documentation/ABI/testing/sysfs-devices-power index b0a5d9a6135e..45000f0db4d4 100644 --- a/Documentation/ABI/testing/sysfs-devices-power +++ b/Documentation/ABI/testing/sysfs-devices-power @@ -158,6 +158,17 @@ Description: not enabled to wake up the system from sleep states, this attribute is not present. +What: /sys/devices/.../power/wakeup_prevent_sleep_time_ms +Date: February 2012 +Contact: Rafael J. Wysocki +Description: + The /sys/devices/.../wakeup_prevent_sleep_time_ms attribute + contains the total time the device has been preventing + opportunistic transitions to sleep states from occuring. + This attribute is read-only. If the device is not enabled to + wake up the system from sleep states, this attribute is not + present. + What: /sys/devices/.../power/autosuspend_delay_ms Date: September 2010 Contact: Alan Stern diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index 13e40b9021b9..48be2ad4dd2c 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -417,6 +417,27 @@ static ssize_t wakeup_last_time_show(struct device *dev, } static DEVICE_ATTR(wakeup_last_time_ms, 0444, wakeup_last_time_show, NULL); + +#ifdef CONFIG_PM_AUTOSLEEP +static ssize_t wakeup_prevent_sleep_time_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + s64 msec = 0; + bool enabled = false; + + spin_lock_irq(&dev->power.lock); + if (dev->power.wakeup) { + msec = ktime_to_ms(dev->power.wakeup->prevent_sleep_time); + enabled = true; + } + spin_unlock_irq(&dev->power.lock); + return enabled ? sprintf(buf, "%lld\n", msec) : sprintf(buf, "\n"); +} + +static DEVICE_ATTR(wakeup_prevent_sleep_time_ms, 0444, + wakeup_prevent_sleep_time_show, NULL); +#endif /* CONFIG_PM_AUTOSLEEP */ #endif /* CONFIG_PM_SLEEP */ #ifdef CONFIG_PM_ADVANCED_DEBUG @@ -511,6 +532,9 @@ static struct attribute *wakeup_attrs[] = { &dev_attr_wakeup_total_time_ms.attr, &dev_attr_wakeup_max_time_ms.attr, &dev_attr_wakeup_last_time_ms.attr, +#ifdef CONFIG_PM_AUTOSLEEP + &dev_attr_wakeup_prevent_sleep_time_ms.attr, +#endif #endif NULL, }; diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index cf1706df7610..2595b8d8fe1f 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -380,6 +380,8 @@ static void wakeup_source_activate(struct wakeup_source *ws) ws->active = true; ws->active_count++; ws->last_time = ktime_get(); + if (ws->autosleep_enabled) + ws->start_prevent_time = ws->last_time; /* Increment the counter of events in progress. */ cec = atomic_inc_return(&combined_event_count); @@ -449,6 +451,17 @@ void pm_stay_awake(struct device *dev) } EXPORT_SYMBOL_GPL(pm_stay_awake); +#ifdef CONFIG_PM_AUTOSLEEP +static void update_prevent_sleep_time(struct wakeup_source *ws, ktime_t now) +{ + ktime_t delta = ktime_sub(now, ws->start_prevent_time); + ws->prevent_sleep_time = ktime_add(ws->prevent_sleep_time, delta); +} +#else +static inline void update_prevent_sleep_time(struct wakeup_source *ws, + ktime_t now) {} +#endif + /** * wakup_source_deactivate - Mark given wakeup source as inactive. * @ws: Wakeup source to handle. @@ -490,6 +503,9 @@ static void wakeup_source_deactivate(struct wakeup_source *ws) del_timer(&ws->timer); ws->timer_expires = 0; + if (ws->autosleep_enabled) + update_prevent_sleep_time(ws, now); + /* * Increment the counter of registered wakeup events and decrement the * couter of wakeup events in progress simultaneously. @@ -718,6 +734,34 @@ bool pm_save_wakeup_count(unsigned int count) return events_check_enabled; } +#ifdef CONFIG_PM_AUTOSLEEP +/** + * pm_wakep_autosleep_enabled - Modify autosleep_enabled for all wakeup sources. + * @enabled: Whether to set or to clear the autosleep_enabled flags. + */ +void pm_wakep_autosleep_enabled(bool set) +{ + struct wakeup_source *ws; + ktime_t now = ktime_get(); + + rcu_read_lock(); + list_for_each_entry_rcu(ws, &wakeup_sources, entry) { + spin_lock_irq(&ws->lock); + if (ws->autosleep_enabled != set) { + ws->autosleep_enabled = set; + if (ws->active) { + if (set) + ws->start_prevent_time = now; + else + update_prevent_sleep_time(ws, now); + } + } + spin_unlock_irq(&ws->lock); + } + rcu_read_unlock(); +} +#endif /* CONFIG_PM_AUTOSLEEP */ + static struct dentry *wakeup_sources_stats_dentry; /** @@ -733,28 +777,37 @@ static int print_wakeup_source_stats(struct seq_file *m, ktime_t max_time; unsigned long active_count; ktime_t active_time; + ktime_t prevent_sleep_time; int ret; spin_lock_irqsave(&ws->lock, flags); total_time = ws->total_time; max_time = ws->max_time; + prevent_sleep_time = ws->prevent_sleep_time; active_count = ws->active_count; if (ws->active) { - active_time = ktime_sub(ktime_get(), ws->last_time); + ktime_t now = ktime_get(); + + active_time = ktime_sub(now, ws->last_time); total_time = ktime_add(total_time, active_time); if (active_time.tv64 > max_time.tv64) max_time = active_time; + + if (ws->autosleep_enabled) + prevent_sleep_time = ktime_add(prevent_sleep_time, + ktime_sub(now, ws->start_prevent_time)); } else { active_time = ktime_set(0, 0); } ret = seq_printf(m, "%-12s\t%lu\t\t%lu\t\t%lu\t\t%lu\t\t" - "%lld\t\t%lld\t\t%lld\t\t%lld\n", + "%lld\t\t%lld\t\t%lld\t\t%lld\t\t%lld\n", ws->name, active_count, ws->event_count, ws->wakeup_count, ws->expire_count, ktime_to_ms(active_time), ktime_to_ms(total_time), - ktime_to_ms(max_time), ktime_to_ms(ws->last_time)); + ktime_to_ms(max_time), ktime_to_ms(ws->last_time), + ktime_to_ms(prevent_sleep_time)); spin_unlock_irqrestore(&ws->lock, flags); @@ -771,7 +824,7 @@ static int wakeup_sources_stats_show(struct seq_file *m, void *unused) seq_puts(m, "name\t\tactive_count\tevent_count\twakeup_count\t" "expire_count\tactive_since\ttotal_time\tmax_time\t" - "last_change\n"); + "last_change\tprevent_suspend_time\n"); rcu_read_lock(); list_for_each_entry_rcu(ws, &wakeup_sources, entry) diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index 5285317a612a..569781faa504 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -34,6 +34,7 @@ * @total_time: Total time this wakeup source has been active. * @max_time: Maximum time this wakeup source has been continuously active. * @last_time: Monotonic clock when the wakeup source's was touched last time. + * @prevent_sleep_time: Total time this source has been preventing autosleep. * @event_count: Number of signaled wakeup events. * @active_count: Number of times the wakeup sorce was activated. * @relax_count: Number of times the wakeup sorce was deactivated. @@ -51,12 +52,15 @@ struct wakeup_source { ktime_t total_time; ktime_t max_time; ktime_t last_time; + ktime_t start_prevent_time; + ktime_t prevent_sleep_time; unsigned long event_count; unsigned long active_count; unsigned long relax_count; unsigned long expire_count; unsigned long wakeup_count; bool active:1; + bool autosleep_enabled:1; }; #ifdef CONFIG_PM_SLEEP diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 76b7ec7d3a81..cd83059fb592 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -358,6 +358,7 @@ extern bool events_check_enabled; extern bool pm_wakeup_pending(void); extern bool pm_get_wakeup_count(unsigned int *count, bool block); extern bool pm_save_wakeup_count(unsigned int count); +extern void pm_wakep_autosleep_enabled(bool set); static inline void lock_system_sleep(void) { diff --git a/kernel/power/autosleep.c b/kernel/power/autosleep.c index 42348e3589d3..ca304046d9e2 100644 --- a/kernel/power/autosleep.c +++ b/kernel/power/autosleep.c @@ -101,8 +101,12 @@ int pm_autosleep_set_state(suspend_state_t state) __pm_relax(autosleep_ws); - if (state > PM_SUSPEND_ON) + if (state > PM_SUSPEND_ON) { + pm_wakep_autosleep_enabled(true); queue_up_suspend_work(); + } else { + pm_wakep_autosleep_enabled(false); + } mutex_unlock(&autosleep_lock); return 0; -- cgit v1.2.3 From 4d7e30d98939a0340022ccd49325a3d70f7e0238 Mon Sep 17 00:00:00 2001 From: Arve Hjønnevåg Date: Tue, 1 May 2012 21:33:34 +0200 Subject: epoll: Add a flag, EPOLLWAKEUP, to prevent suspend while epoll events are ready MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When an epoll_event, that has the EPOLLWAKEUP flag set, is ready, a wakeup_source will be active to prevent suspend. This can be used to handle wakeup events from a driver that support poll, e.g. input, if that driver wakes up the waitqueue passed to epoll before allowing suspend. Signed-off-by: Arve Hjønnevåg Reviewed-by: NeilBrown Signed-off-by: Rafael J. Wysocki --- fs/eventpoll.c | 90 ++++++++++++++++++++++++++++++++++++++++++++-- include/linux/capability.h | 5 ++- include/linux/eventpoll.h | 12 +++++++ 3 files changed, 103 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/fs/eventpoll.c b/fs/eventpoll.c index c0b3c70ee87a..2cf0f2153be5 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -87,7 +88,7 @@ */ /* Epoll private bits inside the event mask */ -#define EP_PRIVATE_BITS (EPOLLONESHOT | EPOLLET) +#define EP_PRIVATE_BITS (EPOLLWAKEUP | EPOLLONESHOT | EPOLLET) /* Maximum number of nesting allowed inside epoll sets */ #define EP_MAX_NESTS 4 @@ -154,6 +155,9 @@ struct epitem { /* List header used to link this item to the "struct file" items list */ struct list_head fllink; + /* wakeup_source used when EPOLLWAKEUP is set */ + struct wakeup_source *ws; + /* The structure that describe the interested events and the source fd */ struct epoll_event event; }; @@ -194,6 +198,9 @@ struct eventpoll { */ struct epitem *ovflist; + /* wakeup_source used when ep_scan_ready_list is running */ + struct wakeup_source *ws; + /* The user that created the eventpoll descriptor */ struct user_struct *user; @@ -588,8 +595,10 @@ static int ep_scan_ready_list(struct eventpoll *ep, * queued into ->ovflist but the "txlist" might already * contain them, and the list_splice() below takes care of them. */ - if (!ep_is_linked(&epi->rdllink)) + if (!ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); + __pm_stay_awake(epi->ws); + } } /* * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after @@ -602,6 +611,7 @@ static int ep_scan_ready_list(struct eventpoll *ep, * Quickly re-inject items left on "txlist". */ list_splice(&txlist, &ep->rdllist); + __pm_relax(ep->ws); if (!list_empty(&ep->rdllist)) { /* @@ -656,6 +666,8 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) list_del_init(&epi->rdllink); spin_unlock_irqrestore(&ep->lock, flags); + wakeup_source_unregister(epi->ws); + /* At this point it is safe to free the eventpoll item */ kmem_cache_free(epi_cache, epi); @@ -706,6 +718,7 @@ static void ep_free(struct eventpoll *ep) mutex_unlock(&epmutex); mutex_destroy(&ep->mtx); free_uid(ep->user); + wakeup_source_unregister(ep->ws); kfree(ep); } @@ -737,6 +750,7 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head, * callback, but it's not actually ready, as far as * caller requested events goes. We can remove it here. */ + __pm_relax(epi->ws); list_del_init(&epi->rdllink); } } @@ -927,13 +941,23 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k if (epi->next == EP_UNACTIVE_PTR) { epi->next = ep->ovflist; ep->ovflist = epi; + if (epi->ws) { + /* + * Activate ep->ws since epi->ws may get + * deactivated at any time. + */ + __pm_stay_awake(ep->ws); + } + } goto out_unlock; } /* If this file is already in the ready list we exit soon */ - if (!ep_is_linked(&epi->rdllink)) + if (!ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); + __pm_stay_awake(epi->ws); + } /* * Wake up ( if active ) both the eventpoll wait list and the ->poll() @@ -1091,6 +1115,30 @@ static int reverse_path_check(void) return error; } +static int ep_create_wakeup_source(struct epitem *epi) +{ + const char *name; + + if (!epi->ep->ws) { + epi->ep->ws = wakeup_source_register("eventpoll"); + if (!epi->ep->ws) + return -ENOMEM; + } + + name = epi->ffd.file->f_path.dentry->d_name.name; + epi->ws = wakeup_source_register(name); + if (!epi->ws) + return -ENOMEM; + + return 0; +} + +static void ep_destroy_wakeup_source(struct epitem *epi) +{ + wakeup_source_unregister(epi->ws); + epi->ws = NULL; +} + /* * Must be called with "mtx" held. */ @@ -1118,6 +1166,13 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, epi->event = *event; epi->nwait = 0; epi->next = EP_UNACTIVE_PTR; + if (epi->event.events & EPOLLWAKEUP) { + error = ep_create_wakeup_source(epi); + if (error) + goto error_create_wakeup_source; + } else { + epi->ws = NULL; + } /* Initialize the poll table using the queue callback */ epq.epi = epi; @@ -1164,6 +1219,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, /* If the file is already "ready" we drop it inside the ready list */ if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); + __pm_stay_awake(epi->ws); /* Notify waiting tasks that events are available */ if (waitqueue_active(&ep->wq)) @@ -1204,6 +1260,9 @@ error_unregister: list_del_init(&epi->rdllink); spin_unlock_irqrestore(&ep->lock, flags); + wakeup_source_unregister(epi->ws); + +error_create_wakeup_source: kmem_cache_free(epi_cache, epi); return error; @@ -1229,6 +1288,12 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even epi->event.events = event->events; pt._key = event->events; epi->event.data = event->data; /* protected by mtx */ + if (epi->event.events & EPOLLWAKEUP) { + if (!epi->ws) + ep_create_wakeup_source(epi); + } else if (epi->ws) { + ep_destroy_wakeup_source(epi); + } /* * Get current event bits. We can safely use the file* here because @@ -1244,6 +1309,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even spin_lock_irq(&ep->lock); if (!ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); + __pm_stay_awake(epi->ws); /* Notify waiting tasks that events are available */ if (waitqueue_active(&ep->wq)) @@ -1282,6 +1348,18 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, !list_empty(head) && eventcnt < esed->maxevents;) { epi = list_first_entry(head, struct epitem, rdllink); + /* + * Activate ep->ws before deactivating epi->ws to prevent + * triggering auto-suspend here (in case we reactive epi->ws + * below). + * + * This could be rearranged to delay the deactivation of epi->ws + * instead, but then epi->ws would temporarily be out of sync + * with ep_is_linked(). + */ + if (epi->ws && epi->ws->active) + __pm_stay_awake(ep->ws); + __pm_relax(epi->ws); list_del_init(&epi->rdllink); pt._key = epi->event.events; @@ -1298,6 +1376,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, if (__put_user(revents, &uevent->events) || __put_user(epi->event.data, &uevent->data)) { list_add(&epi->rdllink, head); + __pm_stay_awake(epi->ws); return eventcnt ? eventcnt : -EFAULT; } eventcnt++; @@ -1317,6 +1396,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, * poll callback will queue them in ep->ovflist. */ list_add_tail(&epi->rdllink, &ep->rdllist); + __pm_stay_awake(epi->ws); } } } @@ -1629,6 +1709,10 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, if (!tfile->f_op || !tfile->f_op->poll) goto error_tgt_fput; + /* Check if EPOLLWAKEUP is allowed */ + if ((epds.events & EPOLLWAKEUP) && !capable(CAP_EPOLLWAKEUP)) + goto error_tgt_fput; + /* * We have to check that the file structure underneath the file descriptor * the user passed to us _is_ an eventpoll file. And also we do not permit diff --git a/include/linux/capability.h b/include/linux/capability.h index 12d52dedb229..c398cff3dab7 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -360,8 +360,11 @@ struct cpu_vfs_cap_data { #define CAP_WAKE_ALARM 35 +/* Allow preventing system suspends while epoll events are pending */ -#define CAP_LAST_CAP CAP_WAKE_ALARM +#define CAP_EPOLLWAKEUP 36 + +#define CAP_LAST_CAP CAP_EPOLLWAKEUP #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP) diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h index 657ab55beda0..6f8be328770a 100644 --- a/include/linux/eventpoll.h +++ b/include/linux/eventpoll.h @@ -26,6 +26,18 @@ #define EPOLL_CTL_DEL 2 #define EPOLL_CTL_MOD 3 +/* + * Request the handling of system wakeup events so as to prevent system suspends + * from happening while those events are being processed. + * + * Assuming neither EPOLLET nor EPOLLONESHOT is set, system suspends will not be + * re-allowed until epoll_wait is called again after consuming the wakeup + * event(s). + * + * Requires CAP_EPOLLWAKEUP + */ +#define EPOLLWAKEUP (1 << 29) + /* Set the One Shot behaviour for the target file descriptor */ #define EPOLLONESHOT (1 << 30) -- cgit v1.2.3