From 0b7c328fd3ef253c04854dab16c6dd0797963637 Mon Sep 17 00:00:00 2001 From: Xiaoguang Chen Date: Sun, 29 Apr 2012 22:51:31 +0200 Subject: PM: devfreq: init performance/powersave governor Performance and powersave governor's get_target_freq is not called if driver chooses one of these two governors. Add init function in governor profile to call update_devfreq which will call get_target_freq subsequently. Signed-off-by: Xiaoguang Chen Acked-by: MyungJoo Ham Signed-off-by: Rafael J. Wysocki --- drivers/devfreq/governor_performance.c | 7 +++++++ drivers/devfreq/governor_powersave.c | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/drivers/devfreq/governor_performance.c b/drivers/devfreq/governor_performance.c index 574a06b1b1de..af75ddd4f158 100644 --- a/drivers/devfreq/governor_performance.c +++ b/drivers/devfreq/governor_performance.c @@ -10,6 +10,7 @@ */ #include +#include "governor.h" static int devfreq_performance_func(struct devfreq *df, unsigned long *freq) @@ -25,8 +26,14 @@ static int devfreq_performance_func(struct devfreq *df, return 0; } +static int performance_init(struct devfreq *devfreq) +{ + return update_devfreq(devfreq); +} + const struct devfreq_governor devfreq_performance = { .name = "performance", + .init = performance_init, .get_target_freq = devfreq_performance_func, .no_central_polling = true, }; diff --git a/drivers/devfreq/governor_powersave.c b/drivers/devfreq/governor_powersave.c index d742d4a82d6a..fec0cdbd2477 100644 --- a/drivers/devfreq/governor_powersave.c +++ b/drivers/devfreq/governor_powersave.c @@ -10,6 +10,7 @@ */ #include +#include "governor.h" static int devfreq_powersave_func(struct devfreq *df, unsigned long *freq) @@ -22,8 +23,14 @@ static int devfreq_powersave_func(struct devfreq *df, return 0; } +static int powersave_init(struct devfreq *devfreq) +{ + return update_devfreq(devfreq); +} + const struct devfreq_governor devfreq_powersave = { .name = "powersave", + .init = powersave_init, .get_target_freq = devfreq_powersave_func, .no_central_polling = true, }; -- cgit v1.2.3 From 5a21d489fd9541a4a66b9a500659abaca1b19a51 Mon Sep 17 00:00:00 2001 From: Bojan Smojver Date: Sun, 29 Apr 2012 22:42:06 +0200 Subject: PM / Hibernate: Hibernate/thaw fixes/improvements 1. Do not allocate memory for buffers from emergency pools, unless absolutely required. Do not warn about and do not retry non-essential failed allocations. 2. Do not check the amount of free pages left on every single page write, but wait until one map is completely populated and then check. 3. Set maximum number of pages for read buffering consistently, instead of inadvertently depending on the size of the sector type. 4. Fix copyright line, which I missed when I submitted the hibernation threading patch. 5. Dispense with bit shifting arithmetic to improve readability. 6. Really recalculate the number of pages required to be free after all allocations have been done. 7. Fix calculation of pages required for read buffering. Only count in pages that do not belong to high memory. Signed-off-by: Bojan Smojver Signed-off-by: Rafael J. Wysocki --- kernel/power/swap.c | 62 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 39 insertions(+), 23 deletions(-) diff --git a/kernel/power/swap.c b/kernel/power/swap.c index eef311a58a64..11e22c068e8b 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -6,7 +6,7 @@ * * Copyright (C) 1998,2001-2005 Pavel Machek * Copyright (C) 2006 Rafael J. Wysocki - * Copyright (C) 2010 Bojan Smojver + * Copyright (C) 2010-2012 Bojan Smojver * * This file is released under the GPLv2. * @@ -282,14 +282,17 @@ static int write_page(void *buf, sector_t offset, struct bio **bio_chain) return -ENOSPC; if (bio_chain) { - src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); + src = (void *)__get_free_page(__GFP_WAIT | __GFP_NOWARN | + __GFP_NORETRY); if (src) { copy_page(src, buf); } else { ret = hib_wait_on_bio_chain(bio_chain); /* Free pages */ if (ret) return ret; - src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); + src = (void *)__get_free_page(__GFP_WAIT | + __GFP_NOWARN | + __GFP_NORETRY); if (src) { copy_page(src, buf); } else { @@ -367,12 +370,17 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf, clear_page(handle->cur); handle->cur_swap = offset; handle->k = 0; - } - if (bio_chain && low_free_pages() <= handle->reqd_free_pages) { - error = hib_wait_on_bio_chain(bio_chain); - if (error) - goto out; - handle->reqd_free_pages = reqd_free_pages(); + + if (bio_chain && low_free_pages() <= handle->reqd_free_pages) { + error = hib_wait_on_bio_chain(bio_chain); + if (error) + goto out; + /* + * Recalculate the number of required free pages, to + * make sure we never take more than half. + */ + handle->reqd_free_pages = reqd_free_pages(); + } } out: return error; @@ -419,8 +427,9 @@ static int swap_writer_finish(struct swap_map_handle *handle, /* Maximum number of threads for compression/decompression. */ #define LZO_THREADS 3 -/* Maximum number of pages for read buffering. */ -#define LZO_READ_PAGES (MAP_PAGE_ENTRIES * 8) +/* Minimum/maximum number of pages for read buffering. */ +#define LZO_MIN_RD_PAGES 1024 +#define LZO_MAX_RD_PAGES 8192 /** @@ -630,12 +639,6 @@ static int save_image_lzo(struct swap_map_handle *handle, } } - /* - * Adjust number of free pages after all allocations have been done. - * We don't want to run out of pages when writing. - */ - handle->reqd_free_pages = reqd_free_pages(); - /* * Start the CRC32 thread. */ @@ -657,6 +660,12 @@ static int save_image_lzo(struct swap_map_handle *handle, goto out_clean; } + /* + * Adjust the number of required free pages after all allocations have + * been done. We don't want to run out of pages when writing. + */ + handle->reqd_free_pages = reqd_free_pages(); + printk(KERN_INFO "PM: Using %u thread(s) for compression.\n" "PM: Compressing and saving image data (%u pages) ... ", @@ -1067,7 +1076,7 @@ static int load_image_lzo(struct swap_map_handle *handle, unsigned i, thr, run_threads, nr_threads; unsigned ring = 0, pg = 0, ring_size = 0, have = 0, want, need, asked = 0; - unsigned long read_pages; + unsigned long read_pages = 0; unsigned char **page = NULL; struct dec_data *data = NULL; struct crc_data *crc = NULL; @@ -1079,7 +1088,7 @@ static int load_image_lzo(struct swap_map_handle *handle, nr_threads = num_online_cpus() - 1; nr_threads = clamp_val(nr_threads, 1, LZO_THREADS); - page = vmalloc(sizeof(*page) * LZO_READ_PAGES); + page = vmalloc(sizeof(*page) * LZO_MAX_RD_PAGES); if (!page) { printk(KERN_ERR "PM: Failed to allocate LZO page\n"); ret = -ENOMEM; @@ -1144,15 +1153,22 @@ static int load_image_lzo(struct swap_map_handle *handle, } /* - * Adjust number of pages for read buffering, in case we are short. + * Set the number of pages for read buffering. + * This is complete guesswork, because we'll only know the real + * picture once prepare_image() is called, which is much later on + * during the image load phase. We'll assume the worst case and + * say that none of the image pages are from high memory. */ - read_pages = (nr_free_pages() - snapshot_get_image_size()) >> 1; - read_pages = clamp_val(read_pages, LZO_CMP_PAGES, LZO_READ_PAGES); + if (low_free_pages() > snapshot_get_image_size()) + read_pages = (low_free_pages() - snapshot_get_image_size()) / 2; + read_pages = clamp_val(read_pages, LZO_MIN_RD_PAGES, LZO_MAX_RD_PAGES); for (i = 0; i < read_pages; i++) { page[i] = (void *)__get_free_page(i < LZO_CMP_PAGES ? __GFP_WAIT | __GFP_HIGH : - __GFP_WAIT); + __GFP_WAIT | __GFP_NOWARN | + __GFP_NORETRY); + if (!page[i]) { if (i < LZO_CMP_PAGES) { ring_size = i; -- cgit v1.2.3 From 52d136cc2cf6659ee247dbcc88c9e7bd7428ad06 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 29 Apr 2012 22:52:19 +0200 Subject: PM / Sleep: Look for wakeup events in later stages of device suspend Currently, the device suspend code in drivers/base/power/main.c only checks if there have been any wakeup events, and therefore the ongoing system transition to a sleep state should be aborted, during the first (i.e. "suspend") device suspend phase. However, wakeup events may be reported later as well, so it's reasonable to look for them in the in the subsequent (i.e. "late suspend" and "suspend noirq") phases. Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman --- drivers/base/power/main.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index b462c0e341cb..e0fb5b0435a3 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -889,6 +889,11 @@ static int dpm_suspend_noirq(pm_message_t state) if (!list_empty(&dev->power.entry)) list_move(&dev->power.entry, &dpm_noirq_list); put_device(dev); + + if (pm_wakeup_pending()) { + error = -EBUSY; + break; + } } mutex_unlock(&dpm_list_mtx); if (error) @@ -962,6 +967,11 @@ static int dpm_suspend_late(pm_message_t state) if (!list_empty(&dev->power.entry)) list_move(&dev->power.entry, &dpm_late_early_list); put_device(dev); + + if (pm_wakeup_pending()) { + error = -EBUSY; + break; + } } mutex_unlock(&dpm_list_mtx); if (error) -- cgit v1.2.3 From 60af1066913162c5dd13fad3b872a67b1eb7da0f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 29 Apr 2012 22:52:34 +0200 Subject: PM / Sleep: Use wait queue to signal "no wakeup events in progress" The current wakeup source deactivation code doesn't do anything when the counter of wakeup events in progress goes down to zero, which requires pm_get_wakeup_count() to poll that counter periodically. Although this reduces the average time it takes to deactivate a wakeup source, it also may lead to a substantial amount of unnecessary polling if there are extended periods of wakeup activity. Thus it seems reasonable to use a wait queue for signaling the "no wakeup events in progress" condition and remove the polling. Signed-off-by: Rafael J. Wysocki Acked-by: mark gross Acked-by: Greg Kroah-Hartman --- drivers/base/power/wakeup.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 2a3e581b8dcd..92f220d89d35 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -17,8 +17,6 @@ #include "power.h" -#define TIMEOUT 100 - /* * If set, the suspend/hibernate code will abort transitions to a sleep state * if wakeup events are registered during or immediately before the transition. @@ -52,6 +50,8 @@ static void pm_wakeup_timer_fn(unsigned long data); static LIST_HEAD(wakeup_sources); +static DECLARE_WAIT_QUEUE_HEAD(wakeup_count_wait_queue); + /** * wakeup_source_prepare - Prepare a new wakeup source for initialization. * @ws: Wakeup source to prepare. @@ -442,6 +442,7 @@ EXPORT_SYMBOL_GPL(pm_stay_awake); */ static void wakeup_source_deactivate(struct wakeup_source *ws) { + unsigned int cnt, inpr; ktime_t duration; ktime_t now; @@ -476,6 +477,10 @@ static void wakeup_source_deactivate(struct wakeup_source *ws) * couter of wakeup events in progress simultaneously. */ atomic_add(MAX_IN_PROGRESS, &combined_event_count); + + split_counters(&cnt, &inpr); + if (!inpr && waitqueue_active(&wakeup_count_wait_queue)) + wake_up(&wakeup_count_wait_queue); } /** @@ -667,14 +672,19 @@ bool pm_wakeup_pending(void) bool pm_get_wakeup_count(unsigned int *count) { unsigned int cnt, inpr; + DEFINE_WAIT(wait); for (;;) { + prepare_to_wait(&wakeup_count_wait_queue, &wait, + TASK_INTERRUPTIBLE); split_counters(&cnt, &inpr); if (inpr == 0 || signal_pending(current)) break; pm_wakeup_update_hit_counts(); - schedule_timeout_interruptible(msecs_to_jiffies(TIMEOUT)); + + schedule(); } + finish_wait(&wakeup_count_wait_queue, &wait); split_counters(&cnt, &inpr); *count = cnt; -- cgit v1.2.3 From 30e3ce6dcbe3fc29c343b17e768b07d4a795de21 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 29 Apr 2012 22:52:52 +0200 Subject: PM / Sleep: Change wakeup source statistics to follow Android Wakeup statistics used by Android are slightly different from what we have in wakeup sources at the moment and there aren't any known users of those statistics other than Android, so modify them to make it easier for Android to switch to wakeup sources. This removes the struct wakeup_source's hit_cout field, which is very rough and therefore not very useful, and adds two new fields, wakeup_count and expire_count. The first one tracks how many times the wakeup source is activated with events_check_enabled set (which roughly corresponds to the situations when a system power transition to a sleep state is in progress and would be aborted by this wakeup source if it were the only active one at that time) and the second one is the number of times the wakeup source has been activated with a timeout that expired. Additionally, the last_time field is now updated when the wakeup source is deactivated too (previously it was only updated during the wakeup source's activation), which seems to be what Android does with the analogous counter for wakelocks. Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-devices-power | 24 +++++++--- drivers/base/power/sysfs.c | 30 ++++++++++--- drivers/base/power/wakeup.c | 64 ++++++++++++--------------- include/linux/pm_wakeup.h | 11 +++-- 4 files changed, 77 insertions(+), 52 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-devices-power b/Documentation/ABI/testing/sysfs-devices-power index 840f7d64d483..b0a5d9a6135e 100644 --- a/Documentation/ABI/testing/sysfs-devices-power +++ b/Documentation/ABI/testing/sysfs-devices-power @@ -96,16 +96,26 @@ Description: is read-only. If the device is not enabled to wake up the system from sleep states, this attribute is not present. -What: /sys/devices/.../power/wakeup_hit_count -Date: September 2010 +What: /sys/devices/.../power/wakeup_abort_count +Date: February 2012 Contact: Rafael J. Wysocki Description: - The /sys/devices/.../wakeup_hit_count attribute contains the + The /sys/devices/.../wakeup_abort_count attribute contains the number of times the processing of a wakeup event associated with - the device might prevent the system from entering a sleep state. - This attribute is read-only. If the device is not enabled to - wake up the system from sleep states, this attribute is not - present. + the device might have aborted system transition into a sleep + state in progress. This attribute is read-only. If the device + is not enabled to wake up the system from sleep states, this + attribute is not present. + +What: /sys/devices/.../power/wakeup_expire_count +Date: February 2012 +Contact: Rafael J. Wysocki +Description: + The /sys/devices/.../wakeup_expire_count attribute contains the + number of times a wakeup event associated with the device has + been reported with a timeout that expired. This attribute is + read-only. If the device is not enabled to wake up the system + from sleep states, this attribute is not present. What: /sys/devices/.../power/wakeup_active Date: September 2010 diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index 95c12f6cb5b9..13e40b9021b9 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -314,22 +314,41 @@ static ssize_t wakeup_active_count_show(struct device *dev, static DEVICE_ATTR(wakeup_active_count, 0444, wakeup_active_count_show, NULL); -static ssize_t wakeup_hit_count_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t wakeup_abort_count_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + unsigned long count = 0; + bool enabled = false; + + spin_lock_irq(&dev->power.lock); + if (dev->power.wakeup) { + count = dev->power.wakeup->wakeup_count; + enabled = true; + } + spin_unlock_irq(&dev->power.lock); + return enabled ? sprintf(buf, "%lu\n", count) : sprintf(buf, "\n"); +} + +static DEVICE_ATTR(wakeup_abort_count, 0444, wakeup_abort_count_show, NULL); + +static ssize_t wakeup_expire_count_show(struct device *dev, + struct device_attribute *attr, + char *buf) { unsigned long count = 0; bool enabled = false; spin_lock_irq(&dev->power.lock); if (dev->power.wakeup) { - count = dev->power.wakeup->hit_count; + count = dev->power.wakeup->expire_count; enabled = true; } spin_unlock_irq(&dev->power.lock); return enabled ? sprintf(buf, "%lu\n", count) : sprintf(buf, "\n"); } -static DEVICE_ATTR(wakeup_hit_count, 0444, wakeup_hit_count_show, NULL); +static DEVICE_ATTR(wakeup_expire_count, 0444, wakeup_expire_count_show, NULL); static ssize_t wakeup_active_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -486,7 +505,8 @@ static struct attribute *wakeup_attrs[] = { &dev_attr_wakeup.attr, &dev_attr_wakeup_count.attr, &dev_attr_wakeup_active_count.attr, - &dev_attr_wakeup_hit_count.attr, + &dev_attr_wakeup_abort_count.attr, + &dev_attr_wakeup_expire_count.attr, &dev_attr_wakeup_active.attr, &dev_attr_wakeup_total_time_ms.attr, &dev_attr_wakeup_max_time_ms.attr, diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 92f220d89d35..7a6eada4534d 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -21,7 +21,7 @@ * If set, the suspend/hibernate code will abort transitions to a sleep state * if wakeup events are registered during or immediately before the transition. */ -bool events_check_enabled; +bool events_check_enabled __read_mostly; /* * Combined counters of registered wakeup events and wakeup events in progress. @@ -382,6 +382,21 @@ static void wakeup_source_activate(struct wakeup_source *ws) atomic_inc(&combined_event_count); } +/** + * wakeup_source_report_event - Report wakeup event using the given source. + * @ws: Wakeup source to report the event for. + */ +static void wakeup_source_report_event(struct wakeup_source *ws) +{ + ws->event_count++; + /* This is racy, but the counter is approximate anyway. */ + if (events_check_enabled) + ws->wakeup_count++; + + if (!ws->active) + wakeup_source_activate(ws); +} + /** * __pm_stay_awake - Notify the PM core of a wakeup event. * @ws: Wakeup source object associated with the source of the event. @@ -397,10 +412,7 @@ void __pm_stay_awake(struct wakeup_source *ws) spin_lock_irqsave(&ws->lock, flags); - ws->event_count++; - if (!ws->active) - wakeup_source_activate(ws); - + wakeup_source_report_event(ws); del_timer(&ws->timer); ws->timer_expires = 0; @@ -469,6 +481,7 @@ static void wakeup_source_deactivate(struct wakeup_source *ws) if (ktime_to_ns(duration) > ktime_to_ns(ws->max_time)) ws->max_time = duration; + ws->last_time = now; del_timer(&ws->timer); ws->timer_expires = 0; @@ -541,8 +554,10 @@ static void pm_wakeup_timer_fn(unsigned long data) spin_lock_irqsave(&ws->lock, flags); if (ws->active && ws->timer_expires - && time_after_eq(jiffies, ws->timer_expires)) + && time_after_eq(jiffies, ws->timer_expires)) { wakeup_source_deactivate(ws); + ws->expire_count++; + } spin_unlock_irqrestore(&ws->lock, flags); } @@ -569,9 +584,7 @@ void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec) spin_lock_irqsave(&ws->lock, flags); - ws->event_count++; - if (!ws->active) - wakeup_source_activate(ws); + wakeup_source_report_event(ws); if (!msec) { wakeup_source_deactivate(ws); @@ -613,24 +626,6 @@ void pm_wakeup_event(struct device *dev, unsigned int msec) } EXPORT_SYMBOL_GPL(pm_wakeup_event); -/** - * pm_wakeup_update_hit_counts - Update hit counts of all active wakeup sources. - */ -static void pm_wakeup_update_hit_counts(void) -{ - unsigned long flags; - struct wakeup_source *ws; - - rcu_read_lock(); - list_for_each_entry_rcu(ws, &wakeup_sources, entry) { - spin_lock_irqsave(&ws->lock, flags); - if (ws->active) - ws->hit_count++; - spin_unlock_irqrestore(&ws->lock, flags); - } - rcu_read_unlock(); -} - /** * pm_wakeup_pending - Check if power transition in progress should be aborted. * @@ -653,8 +648,6 @@ bool pm_wakeup_pending(void) events_check_enabled = !ret; } spin_unlock_irqrestore(&events_lock, flags); - if (ret) - pm_wakeup_update_hit_counts(); return ret; } @@ -680,7 +673,6 @@ bool pm_get_wakeup_count(unsigned int *count) split_counters(&cnt, &inpr); if (inpr == 0 || signal_pending(current)) break; - pm_wakeup_update_hit_counts(); schedule(); } @@ -713,8 +705,6 @@ bool pm_save_wakeup_count(unsigned int count) events_check_enabled = true; } spin_unlock_irq(&events_lock); - if (!events_check_enabled) - pm_wakeup_update_hit_counts(); return events_check_enabled; } @@ -749,9 +739,10 @@ static int print_wakeup_source_stats(struct seq_file *m, active_time = ktime_set(0, 0); } - ret = seq_printf(m, "%-12s\t%lu\t\t%lu\t\t%lu\t\t" + ret = seq_printf(m, "%-12s\t%lu\t\t%lu\t\t%lu\t\t%lu\t\t" "%lld\t\t%lld\t\t%lld\t\t%lld\n", - ws->name, active_count, ws->event_count, ws->hit_count, + ws->name, active_count, ws->event_count, + ws->wakeup_count, ws->expire_count, ktime_to_ms(active_time), ktime_to_ms(total_time), ktime_to_ms(max_time), ktime_to_ms(ws->last_time)); @@ -768,8 +759,9 @@ static int wakeup_sources_stats_show(struct seq_file *m, void *unused) { struct wakeup_source *ws; - seq_puts(m, "name\t\tactive_count\tevent_count\thit_count\t" - "active_since\ttotal_time\tmax_time\tlast_change\n"); + seq_puts(m, "name\t\tactive_count\tevent_count\twakeup_count\t" + "expire_count\tactive_since\ttotal_time\tmax_time\t" + "last_change\n"); rcu_read_lock(); list_for_each_entry_rcu(ws, &wakeup_sources, entry) diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index d9f05113e5fb..5285317a612a 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -33,12 +33,14 @@ * * @total_time: Total time this wakeup source has been active. * @max_time: Maximum time this wakeup source has been continuously active. - * @last_time: Monotonic clock when the wakeup source's was activated last time. + * @last_time: Monotonic clock when the wakeup source's was touched last time. * @event_count: Number of signaled wakeup events. * @active_count: Number of times the wakeup sorce was activated. * @relax_count: Number of times the wakeup sorce was deactivated. - * @hit_count: Number of times the wakeup sorce might abort system suspend. + * @expire_count: Number of times the wakeup source's timeout has expired. + * @wakeup_count: Number of times the wakeup source might abort suspend. * @active: Status of the wakeup source. + * @has_timeout: The wakeup source has been activated with a timeout. */ struct wakeup_source { const char *name; @@ -52,8 +54,9 @@ struct wakeup_source { unsigned long event_count; unsigned long active_count; unsigned long relax_count; - unsigned long hit_count; - unsigned int active:1; + unsigned long expire_count; + unsigned long wakeup_count; + bool active:1; }; #ifdef CONFIG_PM_SLEEP -- cgit v1.2.3 From 6791e36c4a40e8930e08669e60077eea6770c429 Mon Sep 17 00:00:00 2001 From: Arve Hjønnevåg Date: Sun, 29 Apr 2012 22:53:02 +0200 Subject: PM / Sleep: Add wakeup_source_activate and wakeup_source_deactivate tracepoints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add tracepoints to wakeup_source_activate and wakeup_source_deactivate. Useful for checking that specific wakeup sources overlap as expected. Signed-off-by: Arve Hjønnevåg Acked-by: Greg Kroah-Hartman Signed-off-by: Rafael J. Wysocki --- drivers/base/power/wakeup.c | 12 +++++++++--- include/trace/events/power.h | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 7a6eada4534d..1132799421cd 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "power.h" @@ -374,12 +375,16 @@ EXPORT_SYMBOL_GPL(device_set_wakeup_enable); */ static void wakeup_source_activate(struct wakeup_source *ws) { + unsigned int cec; + ws->active = true; ws->active_count++; ws->last_time = ktime_get(); /* Increment the counter of events in progress. */ - atomic_inc(&combined_event_count); + cec = atomic_inc_return(&combined_event_count); + + trace_wakeup_source_activate(ws->name, cec); } /** @@ -454,7 +459,7 @@ EXPORT_SYMBOL_GPL(pm_stay_awake); */ static void wakeup_source_deactivate(struct wakeup_source *ws) { - unsigned int cnt, inpr; + unsigned int cnt, inpr, cec; ktime_t duration; ktime_t now; @@ -489,7 +494,8 @@ static void wakeup_source_deactivate(struct wakeup_source *ws) * Increment the counter of registered wakeup events and decrement the * couter of wakeup events in progress simultaneously. */ - atomic_add(MAX_IN_PROGRESS, &combined_event_count); + cec = atomic_add_return(MAX_IN_PROGRESS, &combined_event_count); + trace_wakeup_source_deactivate(ws->name, cec); split_counters(&cnt, &inpr); if (!inpr && waitqueue_active(&wakeup_count_wait_queue)) diff --git a/include/trace/events/power.h b/include/trace/events/power.h index cae9a94f025d..0c9783841a30 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -65,6 +65,40 @@ TRACE_EVENT(machine_suspend, TP_printk("state=%lu", (unsigned long)__entry->state) ); +DECLARE_EVENT_CLASS(wakeup_source, + + TP_PROTO(const char *name, unsigned int state), + + TP_ARGS(name, state), + + TP_STRUCT__entry( + __string( name, name ) + __field( u64, state ) + ), + + TP_fast_assign( + __assign_str(name, name); + __entry->state = state; + ), + + TP_printk("%s state=0x%lx", __get_str(name), + (unsigned long)__entry->state) +); + +DEFINE_EVENT(wakeup_source, wakeup_source_activate, + + TP_PROTO(const char *name, unsigned int state), + + TP_ARGS(name, state) +); + +DEFINE_EVENT(wakeup_source, wakeup_source_deactivate, + + TP_PROTO(const char *name, unsigned int state), + + TP_ARGS(name, state) +); + #ifdef CONFIG_EVENT_POWER_TRACING_DEPRECATED /* -- cgit v1.2.3 From 7483b4a4d9abf9dcf1ffe6e805ead2847ec3264e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 29 Apr 2012 22:53:22 +0200 Subject: PM / Sleep: Implement opportunistic sleep, v2 Introduce a mechanism by which the kernel can trigger global transitions to a sleep state chosen by user space if there are no active wakeup sources. It consists of a new sysfs attribute, /sys/power/autosleep, that can be written one of the strings returned by reads from /sys/power/state, an ordered workqueue and a work item carrying out the "suspend" operations. If a string representing the system's sleep state is written to /sys/power/autosleep, the work item triggering transitions to that state is queued up and it requeues itself after every execution until user space writes "off" to /sys/power/autosleep. That work item enables the detection of wakeup events using the functions already defined in drivers/base/power/wakeup.c (with one small modification) and calls either pm_suspend(), or hibernate() to put the system into a sleep state. If a wakeup event is reported while the transition is in progress, it will abort the transition and the "system suspend" work item will be queued up again. Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman Reviewed-by: NeilBrown --- Documentation/ABI/testing/sysfs-power | 17 +++++ drivers/base/power/wakeup.c | 34 +++++----- include/linux/suspend.h | 13 +++- kernel/power/Kconfig | 8 +++ kernel/power/Makefile | 1 + kernel/power/autosleep.c | 123 ++++++++++++++++++++++++++++++++++ kernel/power/main.c | 119 ++++++++++++++++++++++++++------ kernel/power/power.h | 18 +++++ 8 files changed, 298 insertions(+), 35 deletions(-) create mode 100644 kernel/power/autosleep.c diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power index b464d12761ba..237c735db6c9 100644 --- a/Documentation/ABI/testing/sysfs-power +++ b/Documentation/ABI/testing/sysfs-power @@ -172,3 +172,20 @@ Description: Reading from this file will display the current value, which is set to 1 MB by default. + +What: /sys/power/autosleep +Date: April 2012 +Contact: Rafael J. Wysocki +Description: + The /sys/power/autosleep file can be written one of the strings + returned by reads from /sys/power/state. If that happens, a + work item attempting to trigger a transition of the system to + the sleep state represented by that string is queued up. This + attempt will only succeed if there are no active wakeup sources + in the system at that time. After every execution, regardless + of whether or not the attempt to put the system to sleep has + succeeded, the work item requeues itself until user space + writes "off" to /sys/power/autosleep. + + Reading from this file causes the last string successfully + written to it to be returned. diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 1132799421cd..cf1706df7610 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -660,29 +660,33 @@ bool pm_wakeup_pending(void) /** * pm_get_wakeup_count - Read the number of registered wakeup events. * @count: Address to store the value at. + * @block: Whether or not to block. * - * Store the number of registered wakeup events at the address in @count. Block - * if the current number of wakeup events being processed is nonzero. + * Store the number of registered wakeup events at the address in @count. If + * @block is set, block until the current number of wakeup events being + * processed is zero. * - * Return 'false' if the wait for the number of wakeup events being processed to - * drop down to zero has been interrupted by a signal (and the current number - * of wakeup events being processed is still nonzero). Otherwise return 'true'. + * Return 'false' if the current number of wakeup events being processed is + * nonzero. Otherwise return 'true'. */ -bool pm_get_wakeup_count(unsigned int *count) +bool pm_get_wakeup_count(unsigned int *count, bool block) { unsigned int cnt, inpr; - DEFINE_WAIT(wait); - for (;;) { - prepare_to_wait(&wakeup_count_wait_queue, &wait, - TASK_INTERRUPTIBLE); - split_counters(&cnt, &inpr); - if (inpr == 0 || signal_pending(current)) - break; + if (block) { + DEFINE_WAIT(wait); + + for (;;) { + prepare_to_wait(&wakeup_count_wait_queue, &wait, + TASK_INTERRUPTIBLE); + split_counters(&cnt, &inpr); + if (inpr == 0 || signal_pending(current)) + break; - schedule(); + schedule(); + } + finish_wait(&wakeup_count_wait_queue, &wait); } - finish_wait(&wakeup_count_wait_queue, &wait); split_counters(&cnt, &inpr); *count = cnt; diff --git a/include/linux/suspend.h b/include/linux/suspend.h index ac1c114c499d..76b7ec7d3a81 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -356,7 +356,7 @@ extern int unregister_pm_notifier(struct notifier_block *nb); extern bool events_check_enabled; extern bool pm_wakeup_pending(void); -extern bool pm_get_wakeup_count(unsigned int *count); +extern bool pm_get_wakeup_count(unsigned int *count, bool block); extern bool pm_save_wakeup_count(unsigned int count); static inline void lock_system_sleep(void) @@ -407,6 +407,17 @@ static inline void unlock_system_sleep(void) {} #endif /* !CONFIG_PM_SLEEP */ +#ifdef CONFIG_PM_AUTOSLEEP + +/* kernel/power/autosleep.c */ +void queue_up_suspend_work(void); + +#else /* !CONFIG_PM_AUTOSLEEP */ + +static inline void queue_up_suspend_work(void) {} + +#endif /* !CONFIG_PM_AUTOSLEEP */ + #ifdef CONFIG_ARCH_SAVE_PAGE_KEYS /* * The ARCH_SAVE_PAGE_KEYS functions can be used by an architecture diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index deb5461e3216..67947083f842 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -103,6 +103,14 @@ config PM_SLEEP_SMP select HOTPLUG select HOTPLUG_CPU +config PM_AUTOSLEEP + bool "Opportunistic sleep" + depends on PM_SLEEP + default n + ---help--- + Allow the kernel to trigger a system transition into a global sleep + state automatically whenever there are no active wakeup sources. + config PM_RUNTIME bool "Run-time PM core functionality" depends on !IA64_HP_SIM diff --git a/kernel/power/Makefile b/kernel/power/Makefile index 66d808ec5252..010b2f7e148c 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -9,5 +9,6 @@ obj-$(CONFIG_SUSPEND) += suspend.o obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \ block_io.o +obj-$(CONFIG_PM_AUTOSLEEP) += autosleep.o obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o diff --git a/kernel/power/autosleep.c b/kernel/power/autosleep.c new file mode 100644 index 000000000000..42348e3589d3 --- /dev/null +++ b/kernel/power/autosleep.c @@ -0,0 +1,123 @@ +/* + * kernel/power/autosleep.c + * + * Opportunistic sleep support. + * + * Copyright (C) 2012 Rafael J. Wysocki + */ + +#include +#include +#include + +#include "power.h" + +static suspend_state_t autosleep_state; +static struct workqueue_struct *autosleep_wq; +/* + * Note: it is only safe to mutex_lock(&autosleep_lock) if a wakeup_source + * is active, otherwise a deadlock with try_to_suspend() is possible. + * Alternatively mutex_lock_interruptible() can be used. This will then fail + * if an auto_sleep cycle tries to freeze processes. + */ +static DEFINE_MUTEX(autosleep_lock); +static struct wakeup_source *autosleep_ws; + +static void try_to_suspend(struct work_struct *work) +{ + unsigned int initial_count, final_count; + + if (!pm_get_wakeup_count(&initial_count, true)) + goto out; + + mutex_lock(&autosleep_lock); + + if (!pm_save_wakeup_count(initial_count)) { + mutex_unlock(&autosleep_lock); + goto out; + } + + if (autosleep_state == PM_SUSPEND_ON) { + mutex_unlock(&autosleep_lock); + return; + } + if (autosleep_state >= PM_SUSPEND_MAX) + hibernate(); + else + pm_suspend(autosleep_state); + + mutex_unlock(&autosleep_lock); + + if (!pm_get_wakeup_count(&final_count, false)) + goto out; + + /* + * If the wakeup occured for an unknown reason, wait to prevent the + * system from trying to suspend and waking up in a tight loop. + */ + if (final_count == initial_count) + schedule_timeout_uninterruptible(HZ / 2); + + out: + queue_up_suspend_work(); +} + +static DECLARE_WORK(suspend_work, try_to_suspend); + +void queue_up_suspend_work(void) +{ + if (!work_pending(&suspend_work) && autosleep_state > PM_SUSPEND_ON) + queue_work(autosleep_wq, &suspend_work); +} + +suspend_state_t pm_autosleep_state(void) +{ + return autosleep_state; +} + +int pm_autosleep_lock(void) +{ + return mutex_lock_interruptible(&autosleep_lock); +} + +void pm_autosleep_unlock(void) +{ + mutex_unlock(&autosleep_lock); +} + +int pm_autosleep_set_state(suspend_state_t state) +{ + +#ifndef CONFIG_HIBERNATION + if (state >= PM_SUSPEND_MAX) + return -EINVAL; +#endif + + __pm_stay_awake(autosleep_ws); + + mutex_lock(&autosleep_lock); + + autosleep_state = state; + + __pm_relax(autosleep_ws); + + if (state > PM_SUSPEND_ON) + queue_up_suspend_work(); + + mutex_unlock(&autosleep_lock); + return 0; +} + +int __init pm_autosleep_init(void) +{ + autosleep_ws = wakeup_source_register("autosleep"); + if (!autosleep_ws) + return -ENOMEM; + + autosleep_wq = alloc_ordered_workqueue("autosleep", 0); + if (autosleep_wq) + return 0; + + wakeup_source_unregister(autosleep_ws); + return -ENOMEM; +} diff --git a/kernel/power/main.c b/kernel/power/main.c index 1c12581f1c62..ba6a5645952d 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -269,8 +269,7 @@ static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr, return (s - buf); } -static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr, - const char *buf, size_t n) +static suspend_state_t decode_state(const char *buf, size_t n) { #ifdef CONFIG_SUSPEND suspend_state_t state = PM_SUSPEND_STANDBY; @@ -278,27 +277,48 @@ static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr, #endif char *p; int len; - int error = -EINVAL; p = memchr(buf, '\n', n); len = p ? p - buf : n; - /* First, check if we are requested to hibernate */ - if (len == 4 && !strncmp(buf, "disk", len)) { - error = hibernate(); - goto Exit; - } + /* Check hibernation first. */ + if (len == 4 && !strncmp(buf, "disk", len)) + return PM_SUSPEND_MAX; #ifdef CONFIG_SUSPEND - for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) { - if (*s && len == strlen(*s) && !strncmp(buf, *s, len)) { - error = pm_suspend(state); - break; - } - } + for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) + if (*s && len == strlen(*s) && !strncmp(buf, *s, len)) + return state; #endif - Exit: + return PM_SUSPEND_ON; +} + +static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t n) +{ + suspend_state_t state; + int error; + + error = pm_autosleep_lock(); + if (error) + return error; + + if (pm_autosleep_state() > PM_SUSPEND_ON) { + error = -EBUSY; + goto out; + } + + state = decode_state(buf, n); + if (state < PM_SUSPEND_MAX) + error = pm_suspend(state); + else if (state == PM_SUSPEND_MAX) + error = hibernate(); + else + error = -EINVAL; + + out: + pm_autosleep_unlock(); return error ? error : n; } @@ -339,7 +359,8 @@ static ssize_t wakeup_count_show(struct kobject *kobj, { unsigned int val; - return pm_get_wakeup_count(&val) ? sprintf(buf, "%u\n", val) : -EINTR; + return pm_get_wakeup_count(&val, true) ? + sprintf(buf, "%u\n", val) : -EINTR; } static ssize_t wakeup_count_store(struct kobject *kobj, @@ -347,15 +368,69 @@ static ssize_t wakeup_count_store(struct kobject *kobj, const char *buf, size_t n) { unsigned int val; + int error; + + error = pm_autosleep_lock(); + if (error) + return error; + if (pm_autosleep_state() > PM_SUSPEND_ON) { + error = -EBUSY; + goto out; + } + + error = -EINVAL; if (sscanf(buf, "%u", &val) == 1) { if (pm_save_wakeup_count(val)) - return n; + error = n; } - return -EINVAL; + + out: + pm_autosleep_unlock(); + return error; } power_attr(wakeup_count); + +#ifdef CONFIG_PM_AUTOSLEEP +static ssize_t autosleep_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + suspend_state_t state = pm_autosleep_state(); + + if (state == PM_SUSPEND_ON) + return sprintf(buf, "off\n"); + +#ifdef CONFIG_SUSPEND + if (state < PM_SUSPEND_MAX) + return sprintf(buf, "%s\n", valid_state(state) ? + pm_states[state] : "error"); +#endif +#ifdef CONFIG_HIBERNATION + return sprintf(buf, "disk\n"); +#else + return sprintf(buf, "error"); +#endif +} + +static ssize_t autosleep_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + suspend_state_t state = decode_state(buf, n); + int error; + + if (state == PM_SUSPEND_ON + && !(strncmp(buf, "off", 3) && strncmp(buf, "off\n", 4))) + return -EINVAL; + + error = pm_autosleep_set_state(state); + return error ? error : n; +} + +power_attr(autosleep); +#endif /* CONFIG_PM_AUTOSLEEP */ #endif /* CONFIG_PM_SLEEP */ #ifdef CONFIG_PM_TRACE @@ -409,6 +484,9 @@ static struct attribute * g[] = { #ifdef CONFIG_PM_SLEEP &pm_async_attr.attr, &wakeup_count_attr.attr, +#ifdef CONFIG_PM_AUTOSLEEP + &autosleep_attr.attr, +#endif #ifdef CONFIG_PM_DEBUG &pm_test_attr.attr, #endif @@ -444,7 +522,10 @@ static int __init pm_init(void) power_kobj = kobject_create_and_add("power", NULL); if (!power_kobj) return -ENOMEM; - return sysfs_create_group(power_kobj, &attr_group); + error = sysfs_create_group(power_kobj, &attr_group); + if (error) + return error; + return pm_autosleep_init(); } core_initcall(pm_init); diff --git a/kernel/power/power.h b/kernel/power/power.h index 98f3622d7407..4cf80fa115d9 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -264,3 +264,21 @@ static inline void suspend_thaw_processes(void) { } #endif + +#ifdef CONFIG_PM_AUTOSLEEP + +/* kernel/power/autosleep.c */ +extern int pm_autosleep_init(void); +extern int pm_autosleep_lock(void); +extern void pm_autosleep_unlock(void); +extern suspend_state_t pm_autosleep_state(void); +extern int pm_autosleep_set_state(suspend_state_t state); + +#else /* !CONFIG_PM_AUTOSLEEP */ + +static inline int pm_autosleep_init(void) { return 0; } +static inline int pm_autosleep_lock(void) { return 0; } +static inline void pm_autosleep_unlock(void) {} +static inline suspend_state_t pm_autosleep_state(void) { return PM_SUSPEND_ON; } + +#endif /* !CONFIG_PM_AUTOSLEEP */ -- cgit v1.2.3 From 55850945e872531644f31fefd217d61dd15dcab8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 29 Apr 2012 22:53:32 +0200 Subject: PM / Sleep: Add "prevent autosleep time" statistics to wakeup sources Android uses one wakelock statistics that is only necessary for opportunistic sleep. Namely, the prevent_suspend_time field accumulates the total time the given wakelock has been locked while "automatic suspend" was enabled. Add an analogous field, prevent_sleep_time, to wakeup sources and make it behave in a similar way. Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-devices-power | 11 +++++ drivers/base/power/sysfs.c | 24 +++++++++++ drivers/base/power/wakeup.c | 61 +++++++++++++++++++++++++-- include/linux/pm_wakeup.h | 4 ++ include/linux/suspend.h | 1 + kernel/power/autosleep.c | 6 ++- 6 files changed, 102 insertions(+), 5 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-devices-power b/Documentation/ABI/testing/sysfs-devices-power index b0a5d9a6135e..45000f0db4d4 100644 --- a/Documentation/ABI/testing/sysfs-devices-power +++ b/Documentation/ABI/testing/sysfs-devices-power @@ -158,6 +158,17 @@ Description: not enabled to wake up the system from sleep states, this attribute is not present. +What: /sys/devices/.../power/wakeup_prevent_sleep_time_ms +Date: February 2012 +Contact: Rafael J. Wysocki +Description: + The /sys/devices/.../wakeup_prevent_sleep_time_ms attribute + contains the total time the device has been preventing + opportunistic transitions to sleep states from occuring. + This attribute is read-only. If the device is not enabled to + wake up the system from sleep states, this attribute is not + present. + What: /sys/devices/.../power/autosuspend_delay_ms Date: September 2010 Contact: Alan Stern diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index 13e40b9021b9..48be2ad4dd2c 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -417,6 +417,27 @@ static ssize_t wakeup_last_time_show(struct device *dev, } static DEVICE_ATTR(wakeup_last_time_ms, 0444, wakeup_last_time_show, NULL); + +#ifdef CONFIG_PM_AUTOSLEEP +static ssize_t wakeup_prevent_sleep_time_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + s64 msec = 0; + bool enabled = false; + + spin_lock_irq(&dev->power.lock); + if (dev->power.wakeup) { + msec = ktime_to_ms(dev->power.wakeup->prevent_sleep_time); + enabled = true; + } + spin_unlock_irq(&dev->power.lock); + return enabled ? sprintf(buf, "%lld\n", msec) : sprintf(buf, "\n"); +} + +static DEVICE_ATTR(wakeup_prevent_sleep_time_ms, 0444, + wakeup_prevent_sleep_time_show, NULL); +#endif /* CONFIG_PM_AUTOSLEEP */ #endif /* CONFIG_PM_SLEEP */ #ifdef CONFIG_PM_ADVANCED_DEBUG @@ -511,6 +532,9 @@ static struct attribute *wakeup_attrs[] = { &dev_attr_wakeup_total_time_ms.attr, &dev_attr_wakeup_max_time_ms.attr, &dev_attr_wakeup_last_time_ms.attr, +#ifdef CONFIG_PM_AUTOSLEEP + &dev_attr_wakeup_prevent_sleep_time_ms.attr, +#endif #endif NULL, }; diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index cf1706df7610..2595b8d8fe1f 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -380,6 +380,8 @@ static void wakeup_source_activate(struct wakeup_source *ws) ws->active = true; ws->active_count++; ws->last_time = ktime_get(); + if (ws->autosleep_enabled) + ws->start_prevent_time = ws->last_time; /* Increment the counter of events in progress. */ cec = atomic_inc_return(&combined_event_count); @@ -449,6 +451,17 @@ void pm_stay_awake(struct device *dev) } EXPORT_SYMBOL_GPL(pm_stay_awake); +#ifdef CONFIG_PM_AUTOSLEEP +static void update_prevent_sleep_time(struct wakeup_source *ws, ktime_t now) +{ + ktime_t delta = ktime_sub(now, ws->start_prevent_time); + ws->prevent_sleep_time = ktime_add(ws->prevent_sleep_time, delta); +} +#else +static inline void update_prevent_sleep_time(struct wakeup_source *ws, + ktime_t now) {} +#endif + /** * wakup_source_deactivate - Mark given wakeup source as inactive. * @ws: Wakeup source to handle. @@ -490,6 +503,9 @@ static void wakeup_source_deactivate(struct wakeup_source *ws) del_timer(&ws->timer); ws->timer_expires = 0; + if (ws->autosleep_enabled) + update_prevent_sleep_time(ws, now); + /* * Increment the counter of registered wakeup events and decrement the * couter of wakeup events in progress simultaneously. @@ -718,6 +734,34 @@ bool pm_save_wakeup_count(unsigned int count) return events_check_enabled; } +#ifdef CONFIG_PM_AUTOSLEEP +/** + * pm_wakep_autosleep_enabled - Modify autosleep_enabled for all wakeup sources. + * @enabled: Whether to set or to clear the autosleep_enabled flags. + */ +void pm_wakep_autosleep_enabled(bool set) +{ + struct wakeup_source *ws; + ktime_t now = ktime_get(); + + rcu_read_lock(); + list_for_each_entry_rcu(ws, &wakeup_sources, entry) { + spin_lock_irq(&ws->lock); + if (ws->autosleep_enabled != set) { + ws->autosleep_enabled = set; + if (ws->active) { + if (set) + ws->start_prevent_time = now; + else + update_prevent_sleep_time(ws, now); + } + } + spin_unlock_irq(&ws->lock); + } + rcu_read_unlock(); +} +#endif /* CONFIG_PM_AUTOSLEEP */ + static struct dentry *wakeup_sources_stats_dentry; /** @@ -733,28 +777,37 @@ static int print_wakeup_source_stats(struct seq_file *m, ktime_t max_time; unsigned long active_count; ktime_t active_time; + ktime_t prevent_sleep_time; int ret; spin_lock_irqsave(&ws->lock, flags); total_time = ws->total_time; max_time = ws->max_time; + prevent_sleep_time = ws->prevent_sleep_time; active_count = ws->active_count; if (ws->active) { - active_time = ktime_sub(ktime_get(), ws->last_time); + ktime_t now = ktime_get(); + + active_time = ktime_sub(now, ws->last_time); total_time = ktime_add(total_time, active_time); if (active_time.tv64 > max_time.tv64) max_time = active_time; + + if (ws->autosleep_enabled) + prevent_sleep_time = ktime_add(prevent_sleep_time, + ktime_sub(now, ws->start_prevent_time)); } else { active_time = ktime_set(0, 0); } ret = seq_printf(m, "%-12s\t%lu\t\t%lu\t\t%lu\t\t%lu\t\t" - "%lld\t\t%lld\t\t%lld\t\t%lld\n", + "%lld\t\t%lld\t\t%lld\t\t%lld\t\t%lld\n", ws->name, active_count, ws->event_count, ws->wakeup_count, ws->expire_count, ktime_to_ms(active_time), ktime_to_ms(total_time), - ktime_to_ms(max_time), ktime_to_ms(ws->last_time)); + ktime_to_ms(max_time), ktime_to_ms(ws->last_time), + ktime_to_ms(prevent_sleep_time)); spin_unlock_irqrestore(&ws->lock, flags); @@ -771,7 +824,7 @@ static int wakeup_sources_stats_show(struct seq_file *m, void *unused) seq_puts(m, "name\t\tactive_count\tevent_count\twakeup_count\t" "expire_count\tactive_since\ttotal_time\tmax_time\t" - "last_change\n"); + "last_change\tprevent_suspend_time\n"); rcu_read_lock(); list_for_each_entry_rcu(ws, &wakeup_sources, entry) diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index 5285317a612a..569781faa504 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -34,6 +34,7 @@ * @total_time: Total time this wakeup source has been active. * @max_time: Maximum time this wakeup source has been continuously active. * @last_time: Monotonic clock when the wakeup source's was touched last time. + * @prevent_sleep_time: Total time this source has been preventing autosleep. * @event_count: Number of signaled wakeup events. * @active_count: Number of times the wakeup sorce was activated. * @relax_count: Number of times the wakeup sorce was deactivated. @@ -51,12 +52,15 @@ struct wakeup_source { ktime_t total_time; ktime_t max_time; ktime_t last_time; + ktime_t start_prevent_time; + ktime_t prevent_sleep_time; unsigned long event_count; unsigned long active_count; unsigned long relax_count; unsigned long expire_count; unsigned long wakeup_count; bool active:1; + bool autosleep_enabled:1; }; #ifdef CONFIG_PM_SLEEP diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 76b7ec7d3a81..cd83059fb592 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -358,6 +358,7 @@ extern bool events_check_enabled; extern bool pm_wakeup_pending(void); extern bool pm_get_wakeup_count(unsigned int *count, bool block); extern bool pm_save_wakeup_count(unsigned int count); +extern void pm_wakep_autosleep_enabled(bool set); static inline void lock_system_sleep(void) { diff --git a/kernel/power/autosleep.c b/kernel/power/autosleep.c index 42348e3589d3..ca304046d9e2 100644 --- a/kernel/power/autosleep.c +++ b/kernel/power/autosleep.c @@ -101,8 +101,12 @@ int pm_autosleep_set_state(suspend_state_t state) __pm_relax(autosleep_ws); - if (state > PM_SUSPEND_ON) + if (state > PM_SUSPEND_ON) { + pm_wakep_autosleep_enabled(true); queue_up_suspend_work(); + } else { + pm_wakep_autosleep_enabled(false); + } mutex_unlock(&autosleep_lock); return 0; -- cgit v1.2.3 From b86ff9820fd5df69295273b9aa68e58786ffc23f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 29 Apr 2012 22:53:42 +0200 Subject: PM / Sleep: Add user space interface for manipulating wakeup sources, v3 Android allows user space to manipulate wakelocks using two sysfs file located in /sys/power/, wake_lock and wake_unlock. Writing a wakelock name and optionally a timeout to the wake_lock file causes the wakelock whose name was written to be acquired (it is created before is necessary), optionally with the given timeout. Writing the name of a wakelock to wake_unlock causes that wakelock to be released. Implement an analogous interface for user space using wakeup sources. Add the /sys/power/wake_lock and /sys/power/wake_unlock files allowing user space to create, activate and deactivate wakeup sources, such that writing a name and optionally a timeout to wake_lock causes the wakeup source of that name to be activated, optionally with the given timeout. If that wakeup source doesn't exist, it will be created and then activated. Writing a name to wake_unlock causes the wakeup source of that name, if there is one, to be deactivated. Wakeup sources created with the help of wake_lock that haven't been used for more than 5 minutes are garbage collected and destroyed. Moreover, there can be only WL_NUMBER_LIMIT wakeup sources created with the help of wake_lock present at a time. The data type used to track wakeup sources created by user space is called "struct wakelock" to indicate the origins of this feature. This version of the patch includes an rbtree manipulation fix from John Stultz. Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman Reviewed-by: NeilBrown --- Documentation/ABI/testing/sysfs-power | 42 +++++++ drivers/base/power/wakeup.c | 1 + kernel/power/Kconfig | 8 ++ kernel/power/Makefile | 1 + kernel/power/main.c | 41 +++++++ kernel/power/power.h | 9 ++ kernel/power/wakelock.c | 215 ++++++++++++++++++++++++++++++++++ 7 files changed, 317 insertions(+) create mode 100644 kernel/power/wakelock.c diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power index 237c735db6c9..31725ffeeb3a 100644 --- a/Documentation/ABI/testing/sysfs-power +++ b/Documentation/ABI/testing/sysfs-power @@ -189,3 +189,45 @@ Description: Reading from this file causes the last string successfully written to it to be returned. + +What: /sys/power/wake_lock +Date: February 2012 +Contact: Rafael J. Wysocki +Description: + The /sys/power/wake_lock file allows user space to create + wakeup source objects and activate them on demand (if one of + those wakeup sources is active, reads from the + /sys/power/wakeup_count file block or return false). When a + string without white space is written to /sys/power/wake_lock, + it will be assumed to represent a wakeup source name. If there + is a wakeup source object with that name, it will be activated + (unless active already). Otherwise, a new wakeup source object + will be registered, assigned the given name and activated. + If a string written to /sys/power/wake_lock contains white + space, the part of the string preceding the white space will be + regarded as a wakeup source name and handled as descrived above. + The other part of the string will be regarded as a timeout (in + nanoseconds) such that the wakeup source will be automatically + deactivated after it has expired. The timeout, if present, is + set regardless of the current state of the wakeup source object + in question. + + Reads from this file return a string consisting of the names of + wakeup sources created with the help of it that are active at + the moment, separated with spaces. + + +What: /sys/power/wake_unlock +Date: February 2012 +Contact: Rafael J. Wysocki +Description: + The /sys/power/wake_unlock file allows user space to deactivate + wakeup sources created with the help of /sys/power/wake_lock. + When a string is written to /sys/power/wake_unlock, it will be + assumed to represent the name of a wakeup source to deactivate. + If a wakeup source object of that name exists and is active at + the moment, it will be deactivated. + + Reads from this file return a string consisting of the names of + wakeup sources created with the help of /sys/power/wake_lock + that are inactive at the moment, separated with spaces. diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 2595b8d8fe1f..cbb463b3a750 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -133,6 +133,7 @@ void wakeup_source_add(struct wakeup_source *ws) spin_lock_init(&ws->lock); setup_timer(&ws->timer, pm_wakeup_timer_fn, (unsigned long)ws); ws->active = false; + ws->last_time = ktime_get(); spin_lock_irq(&events_lock); list_add_rcu(&ws->entry, &wakeup_sources); diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 67947083f842..1d534076d33a 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -111,6 +111,14 @@ config PM_AUTOSLEEP Allow the kernel to trigger a system transition into a global sleep state automatically whenever there are no active wakeup sources. +config PM_WAKELOCKS + bool "User space wakeup sources interface" + depends on PM_SLEEP + default n + ---help--- + Allow user space to create, activate and deactivate wakeup source + objects with the help of a sysfs-based interface. + config PM_RUNTIME bool "Run-time PM core functionality" depends on !IA64_HP_SIM diff --git a/kernel/power/Makefile b/kernel/power/Makefile index 010b2f7e148c..29472bff11ef 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -10,5 +10,6 @@ obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \ block_io.o obj-$(CONFIG_PM_AUTOSLEEP) += autosleep.o +obj-$(CONFIG_PM_WAKELOCKS) += wakelock.o obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o diff --git a/kernel/power/main.c b/kernel/power/main.c index ba6a5645952d..54ec071de337 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -431,6 +431,43 @@ static ssize_t autosleep_store(struct kobject *kobj, power_attr(autosleep); #endif /* CONFIG_PM_AUTOSLEEP */ + +#ifdef CONFIG_PM_WAKELOCKS +static ssize_t wake_lock_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return pm_show_wakelocks(buf, true); +} + +static ssize_t wake_lock_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + int error = pm_wake_lock(buf); + return error ? error : n; +} + +power_attr(wake_lock); + +static ssize_t wake_unlock_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return pm_show_wakelocks(buf, false); +} + +static ssize_t wake_unlock_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + int error = pm_wake_unlock(buf); + return error ? error : n; +} + +power_attr(wake_unlock); + +#endif /* CONFIG_PM_WAKELOCKS */ #endif /* CONFIG_PM_SLEEP */ #ifdef CONFIG_PM_TRACE @@ -487,6 +524,10 @@ static struct attribute * g[] = { #ifdef CONFIG_PM_AUTOSLEEP &autosleep_attr.attr, #endif +#ifdef CONFIG_PM_WAKELOCKS + &wake_lock_attr.attr, + &wake_unlock_attr.attr, +#endif #ifdef CONFIG_PM_DEBUG &pm_test_attr.attr, #endif diff --git a/kernel/power/power.h b/kernel/power/power.h index 4cf80fa115d9..b0bd4beaebfe 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -282,3 +282,12 @@ static inline void pm_autosleep_unlock(void) {} static inline suspend_state_t pm_autosleep_state(void) { return PM_SUSPEND_ON; } #endif /* !CONFIG_PM_AUTOSLEEP */ + +#ifdef CONFIG_PM_WAKELOCKS + +/* kernel/power/wakelock.c */ +extern ssize_t pm_show_wakelocks(char *buf, bool show_active); +extern int pm_wake_lock(const char *buf); +extern int pm_wake_unlock(const char *buf); + +#endif /* !CONFIG_PM_WAKELOCKS */ diff --git a/kernel/power/wakelock.c b/kernel/power/wakelock.c new file mode 100644 index 000000000000..579700665e8c --- /dev/null +++ b/kernel/power/wakelock.c @@ -0,0 +1,215 @@ +/* + * kernel/power/wakelock.c + * + * User space wakeup sources support. + * + * Copyright (C) 2012 Rafael J. Wysocki + * + * This code is based on the analogous interface allowing user space to + * manipulate wakelocks on Android. + */ + +#include +#include +#include +#include +#include +#include +#include + +#define WL_NUMBER_LIMIT 100 +#define WL_GC_COUNT_MAX 100 +#define WL_GC_TIME_SEC 300 + +static DEFINE_MUTEX(wakelocks_lock); + +struct wakelock { + char *name; + struct rb_node node; + struct wakeup_source ws; + struct list_head lru; +}; + +static struct rb_root wakelocks_tree = RB_ROOT; +static LIST_HEAD(wakelocks_lru_list); +static unsigned int number_of_wakelocks; +static unsigned int wakelocks_gc_count; + +ssize_t pm_show_wakelocks(char *buf, bool show_active) +{ + struct rb_node *node; + struct wakelock *wl; + char *str = buf; + char *end = buf + PAGE_SIZE; + + mutex_lock(&wakelocks_lock); + + for (node = rb_first(&wakelocks_tree); node; node = rb_next(node)) { + wl = rb_entry(node, struct wakelock, node); + if (wl->ws.active == show_active) + str += scnprintf(str, end - str, "%s ", wl->name); + } + if (str > buf) + str--; + + str += scnprintf(str, end - str, "\n"); + + mutex_unlock(&wakelocks_lock); + return (str - buf); +} + +static struct wakelock *wakelock_lookup_add(const char *name, size_t len, + bool add_if_not_found) +{ + struct rb_node **node = &wakelocks_tree.rb_node; + struct rb_node *parent = *node; + struct wakelock *wl; + + while (*node) { + int diff; + + parent = *node; + wl = rb_entry(*node, struct wakelock, node); + diff = strncmp(name, wl->name, len); + if (diff == 0) { + if (wl->name[len]) + diff = -1; + else + return wl; + } + if (diff < 0) + node = &(*node)->rb_left; + else + node = &(*node)->rb_right; + } + if (!add_if_not_found) + return ERR_PTR(-EINVAL); + + if (number_of_wakelocks > WL_NUMBER_LIMIT) + return ERR_PTR(-ENOSPC); + + /* Not found, we have to add a new one. */ + wl = kzalloc(sizeof(*wl), GFP_KERNEL); + if (!wl) + return ERR_PTR(-ENOMEM); + + wl->name = kstrndup(name, len, GFP_KERNEL); + if (!wl->name) { + kfree(wl); + return ERR_PTR(-ENOMEM); + } + wl->ws.name = wl->name; + wakeup_source_add(&wl->ws); + rb_link_node(&wl->node, parent, node); + rb_insert_color(&wl->node, &wakelocks_tree); + list_add(&wl->lru, &wakelocks_lru_list); + number_of_wakelocks++; + return wl; +} + +int pm_wake_lock(const char *buf) +{ + const char *str = buf; + struct wakelock *wl; + u64 timeout_ns = 0; + size_t len; + int ret = 0; + + while (*str && !isspace(*str)) + str++; + + len = str - buf; + if (!len) + return -EINVAL; + + if (*str && *str != '\n') { + /* Find out if there's a valid timeout string appended. */ + ret = kstrtou64(skip_spaces(str), 10, &timeout_ns); + if (ret) + return -EINVAL; + } + + mutex_lock(&wakelocks_lock); + + wl = wakelock_lookup_add(buf, len, true); + if (IS_ERR(wl)) { + ret = PTR_ERR(wl); + goto out; + } + if (timeout_ns) { + u64 timeout_ms = timeout_ns + NSEC_PER_MSEC - 1; + + do_div(timeout_ms, NSEC_PER_MSEC); + __pm_wakeup_event(&wl->ws, timeout_ms); + } else { + __pm_stay_awake(&wl->ws); + } + + list_move(&wl->lru, &wakelocks_lru_list); + + out: + mutex_unlock(&wakelocks_lock); + return ret; +} + +static void wakelocks_gc(void) +{ + struct wakelock *wl, *aux; + ktime_t now = ktime_get(); + + list_for_each_entry_safe_reverse(wl, aux, &wakelocks_lru_list, lru) { + u64 idle_time_ns; + bool active; + + spin_lock_irq(&wl->ws.lock); + idle_time_ns = ktime_to_ns(ktime_sub(now, wl->ws.last_time)); + active = wl->ws.active; + spin_unlock_irq(&wl->ws.lock); + + if (idle_time_ns < ((u64)WL_GC_TIME_SEC * NSEC_PER_SEC)) + break; + + if (!active) { + wakeup_source_remove(&wl->ws); + rb_erase(&wl->node, &wakelocks_tree); + list_del(&wl->lru); + kfree(wl->name); + kfree(wl); + number_of_wakelocks--; + } + } + wakelocks_gc_count = 0; +} + +int pm_wake_unlock(const char *buf) +{ + struct wakelock *wl; + size_t len; + int ret = 0; + + len = strlen(buf); + if (!len) + return -EINVAL; + + if (buf[len-1] == '\n') + len--; + + if (!len) + return -EINVAL; + + mutex_lock(&wakelocks_lock); + + wl = wakelock_lookup_add(buf, len, false); + if (IS_ERR(wl)) { + ret = PTR_ERR(wl); + goto out; + } + __pm_relax(&wl->ws); + list_move(&wl->lru, &wakelocks_lru_list); + if (++wakelocks_gc_count > WL_GC_COUNT_MAX) + wakelocks_gc(); + + out: + mutex_unlock(&wakelocks_lock); + return ret; +} -- cgit v1.2.3 From a5bef810ad9816a3a8e500d8832be77d52903a12 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 29 Apr 2012 22:54:17 +0200 Subject: PM / Domains: Rework default device stop governor function, v2 The existing default device stop governor function for PM domains, default_stop_ok(), is supposed to check whether or not the device's PM QoS latency constraint will be violated if the device is stopped by pm_genpd_runtime_suspend(). However, the computations carried out by it don't reflect the definition of the PM QoS latency constrait in Documentation/ABI/testing/sysfs-devices-power. Make default_stop_ok() follow the definition of the PM QoS latency constrait. In particular, make it take the device's start and stop latencies correctly. Add a new field, effective_constraint_ns, to struct gpd_timing_data and use it to store the difference between the device's PM QoS constraint and its resume latency for use by the device's parent (the effective_constraint_ns values for the children are used for computing the parent's one along with its PM QoS constraint). Remove the break_even_ns field from struct gpd_timing_data, because it's not used any more. Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 1 + drivers/base/power/domain_governor.c | 53 +++++++++++++++++++++++++++++++++--- include/linux/pm_domain.h | 2 +- 3 files changed, 51 insertions(+), 5 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 73ce9fbe9839..3c6e94fe058a 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -506,6 +506,7 @@ static int pm_genpd_runtime_suspend(struct device *dev) if (dev_gpd_data(dev)->always_on) return -EBUSY; + dev_gpd_data(dev)->td.effective_constraint_ns = -1; stop_ok = genpd->gov ? genpd->gov->stop_ok : NULL; if (stop_ok && !stop_ok(dev)) return -EBUSY; diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c index 66a265bf5867..a67f157a7a74 100644 --- a/drivers/base/power/domain_governor.c +++ b/drivers/base/power/domain_governor.c @@ -14,6 +14,31 @@ #ifdef CONFIG_PM_RUNTIME +static int dev_update_qos_constraint(struct device *dev, void *data) +{ + s64 *constraint_ns_p = data; + s32 constraint_ns = -1; + + if (dev->power.subsys_data && dev->power.subsys_data->domain_data) + constraint_ns = dev_gpd_data(dev)->td.effective_constraint_ns; + + if (constraint_ns < 0) { + constraint_ns = dev_pm_qos_read_value(dev); + constraint_ns *= NSEC_PER_USEC; + } + if (constraint_ns == 0) + return 0; + + /* + * constraint_ns cannot be negative here, because the device has been + * suspended. + */ + if (constraint_ns < *constraint_ns_p || *constraint_ns_p == 0) + *constraint_ns_p = constraint_ns; + + return 0; +} + /** * default_stop_ok - Default PM domain governor routine for stopping devices. * @dev: Device to check. @@ -21,14 +46,34 @@ bool default_stop_ok(struct device *dev) { struct gpd_timing_data *td = &dev_gpd_data(dev)->td; + s64 constraint_ns; dev_dbg(dev, "%s()\n", __func__); - if (dev->power.max_time_suspended_ns < 0 || td->break_even_ns == 0) - return true; + constraint_ns = dev_pm_qos_read_value(dev); + if (constraint_ns < 0) + return false; + + constraint_ns *= NSEC_PER_USEC; + /* + * We can walk the children without any additional locking, because + * they all have been suspended at this point. + */ + if (!dev->power.ignore_children) + device_for_each_child(dev, &constraint_ns, + dev_update_qos_constraint); - return td->stop_latency_ns + td->start_latency_ns < td->break_even_ns - && td->break_even_ns < dev->power.max_time_suspended_ns; + if (constraint_ns > 0) { + constraint_ns -= td->start_latency_ns; + if (constraint_ns == 0) + return false; + } + td->effective_constraint_ns = constraint_ns; + /* + * The children have been suspended already, so we don't need to take + * their stop latencies into account here. + */ + return constraint_ns > td->stop_latency_ns || constraint_ns == 0; } /** diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 91f8286106ea..9c25219458c2 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -93,7 +93,7 @@ struct gpd_timing_data { s64 start_latency_ns; s64 save_state_latency_ns; s64 restore_state_latency_ns; - s64 break_even_ns; + s64 effective_constraint_ns; }; struct generic_pm_domain_data { -- cgit v1.2.3 From dd8683e97f12609fb3f8c4318628f0d246542f89 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 29 Apr 2012 22:54:30 +0200 Subject: PM / Domains: Rework default domain power off governor function, v2 The existing default domain power down governor function for PM domains, default_power_down_ok(), is supposed to check whether or not the PM QoS latency constraints of the devices in the domain will be violated if the domain is turned off by pm_genpd_poweroff(). However, the computations carried out by it don't reflect the definition of the PM QoS latency constrait in Documentation/ABI/testing/sysfs-devices-power. Make default_power_down_ok() follow the definition of the PM QoS latency constrait. In particular, make it only take latencies into account, because it doesn't matter how much time has elapsed since the domain's devices were suspended for the computation. Remove the break_even_ns and power_off_time fields from struct generic_pm_domain, because they are not necessary any more. Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 2 +- drivers/base/power/domain_governor.c | 69 ++++++++++++++++++------------------ include/linux/pm_domain.h | 2 -- 3 files changed, 35 insertions(+), 38 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 3c6e94fe058a..d03a8c7ad847 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -381,6 +381,7 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd) return 0; } + genpd->max_off_time_ns = -1; if (genpd->gov && genpd->gov->power_down_ok) { if (!genpd->gov->power_down_ok(&genpd->domain)) return -EAGAIN; @@ -443,7 +444,6 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd) } genpd->status = GPD_STATE_POWER_OFF; - genpd->power_off_time = ktime_get(); /* Update PM QoS information for devices in the domain. */ list_for_each_entry_reverse(pdd, &genpd->dev_list, list_node) { diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c index a67f157a7a74..2aae623fd840 100644 --- a/drivers/base/power/domain_governor.c +++ b/drivers/base/power/domain_governor.c @@ -89,7 +89,6 @@ static bool default_power_down_ok(struct dev_pm_domain *pd) struct pm_domain_data *pdd; s64 min_dev_off_time_ns; s64 off_on_time_ns; - ktime_t time_now = ktime_get(); off_on_time_ns = genpd->power_off_latency_ns + genpd->power_on_latency_ns; @@ -118,8 +117,6 @@ static bool default_power_down_ok(struct dev_pm_domain *pd) if (sd_max_off_ns < 0) continue; - sd_max_off_ns -= ktime_to_ns(ktime_sub(time_now, - sd->power_off_time)); /* * Check if the subdomain is allowed to be off long enough for * the current domain to turn off and on (that's how much time @@ -135,52 +132,54 @@ static bool default_power_down_ok(struct dev_pm_domain *pd) min_dev_off_time_ns = -1; list_for_each_entry(pdd, &genpd->dev_list, list_node) { struct gpd_timing_data *td; - struct device *dev = pdd->dev; - s64 dev_off_time_ns; + s64 constraint_ns; - if (!dev->driver || dev->power.max_time_suspended_ns < 0) + if (!pdd->dev->driver) continue; + /* + * Check if the device is allowed to be off long enough for the + * domain to turn off and on (that's how much time it will + * have to wait worst case). + */ td = &to_gpd_data(pdd)->td; - dev_off_time_ns = dev->power.max_time_suspended_ns - - (td->start_latency_ns + td->restore_state_latency_ns + - ktime_to_ns(ktime_sub(time_now, - dev->power.suspend_time))); - if (dev_off_time_ns <= off_on_time_ns) - return false; - - if (min_dev_off_time_ns > dev_off_time_ns - || min_dev_off_time_ns < 0) - min_dev_off_time_ns = dev_off_time_ns; - } + constraint_ns = td->effective_constraint_ns; + /* default_stop_ok() need not be called before us. */ + if (constraint_ns < 0) { + constraint_ns = dev_pm_qos_read_value(pdd->dev); + constraint_ns *= NSEC_PER_USEC; + } + if (constraint_ns == 0) + continue; - if (min_dev_off_time_ns < 0) { /* - * There are no latency constraints, so the domain can spend - * arbitrary time in the "off" state. + * constraint_ns cannot be negative here, because the device has + * been suspended. */ - genpd->max_off_time_ns = -1; - return true; + constraint_ns -= td->restore_state_latency_ns; + if (constraint_ns <= off_on_time_ns) + return false; + + if (min_dev_off_time_ns > constraint_ns + || min_dev_off_time_ns < 0) + min_dev_off_time_ns = constraint_ns; } /* - * The difference between the computed minimum delta and the time needed - * to turn the domain on is the maximum theoretical time this domain can - * spend in the "off" state. + * If the computed minimum device off time is negative, there are no + * latency constraints, so the domain can spend arbitrary time in the + * "off" state. */ - min_dev_off_time_ns -= genpd->power_on_latency_ns; + if (min_dev_off_time_ns < 0) + return true; /* - * If the difference between the computed minimum delta and the time - * needed to turn the domain off and back on on is smaller than the - * domain's power break even time, removing power from the domain is not - * worth it. + * The difference between the computed minimum device off time and the + * time needed to turn the domain on is the maximum theoretical time + * this domain can spend in the "off" state. */ - if (genpd->break_even_ns > - min_dev_off_time_ns - genpd->power_off_latency_ns) - return false; - - genpd->max_off_time_ns = min_dev_off_time_ns; + genpd->max_off_time_ns = min_dev_off_time_ns - + genpd->power_on_latency_ns; return true; } diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 9c25219458c2..e7ada5ccdfc2 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -70,9 +70,7 @@ struct generic_pm_domain { int (*power_on)(struct generic_pm_domain *domain); s64 power_on_latency_ns; struct gpd_dev_ops dev_ops; - s64 break_even_ns; /* Power break even for the entire domain. */ s64 max_off_time_ns; /* Maximum allowed "suspended" time. */ - ktime_t power_off_time; struct device_node *of_node; /* Node in device tree */ }; -- cgit v1.2.3 From 76e267d822f2913893ad210ba431607aa8e2af94 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 29 Apr 2012 22:54:36 +0200 Subject: PM / Runtime: Remove device fields related to suspend time, v2 After the previous changes in default_stop_ok() and default_power_down_ok() for PM domains, there are two fields in struct dev_pm_info that aren't necessary any more, suspend_time and max_time_suspended_ns. Remove those fields along with all of the code that accesses them, which simplifies the runtime PM framework quite a bit. Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 13 ------ drivers/base/power/runtime.c | 103 +------------------------------------------ include/linux/pm.h | 2 - include/linux/pm_runtime.h | 3 -- 4 files changed, 2 insertions(+), 119 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index d03a8c7ad847..45c2b7f0fe3b 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -445,16 +445,6 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd) genpd->status = GPD_STATE_POWER_OFF; - /* Update PM QoS information for devices in the domain. */ - list_for_each_entry_reverse(pdd, &genpd->dev_list, list_node) { - struct gpd_timing_data *td = &to_gpd_data(pdd)->td; - - pm_runtime_update_max_time_suspended(pdd->dev, - td->start_latency_ns + - td->restore_state_latency_ns + - genpd->power_on_latency_ns); - } - list_for_each_entry(link, &genpd->slave_links, slave_node) { genpd_sd_counter_dec(link->master); genpd_queue_power_off_work(link->master); @@ -515,9 +505,6 @@ static int pm_genpd_runtime_suspend(struct device *dev) if (ret) return ret; - pm_runtime_update_max_time_suspended(dev, - dev_gpd_data(dev)->td.start_latency_ns); - /* * If power.irq_safe is set, this routine will be run with interrupts * off, so it can't use mutexes. diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index bd0f3949bcf9..59894873a3b3 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -282,47 +282,6 @@ static int rpm_callback(int (*cb)(struct device *), struct device *dev) return retval != -EACCES ? retval : -EIO; } -struct rpm_qos_data { - ktime_t time_now; - s64 constraint_ns; -}; - -/** - * rpm_update_qos_constraint - Update a given PM QoS constraint data. - * @dev: Device whose timing data to use. - * @data: PM QoS constraint data to update. - * - * Use the suspend timing data of @dev to update PM QoS constraint data pointed - * to by @data. - */ -static int rpm_update_qos_constraint(struct device *dev, void *data) -{ - struct rpm_qos_data *qos = data; - unsigned long flags; - s64 delta_ns; - int ret = 0; - - spin_lock_irqsave(&dev->power.lock, flags); - - if (dev->power.max_time_suspended_ns < 0) - goto out; - - delta_ns = dev->power.max_time_suspended_ns - - ktime_to_ns(ktime_sub(qos->time_now, dev->power.suspend_time)); - if (delta_ns <= 0) { - ret = -EBUSY; - goto out; - } - - if (qos->constraint_ns > delta_ns || qos->constraint_ns == 0) - qos->constraint_ns = delta_ns; - - out: - spin_unlock_irqrestore(&dev->power.lock, flags); - - return ret; -} - /** * rpm_suspend - Carry out runtime suspend of given device. * @dev: Device to suspend. @@ -349,7 +308,6 @@ static int rpm_suspend(struct device *dev, int rpmflags) { int (*callback)(struct device *); struct device *parent = NULL; - struct rpm_qos_data qos; int retval; trace_rpm_suspend(dev, rpmflags); @@ -445,38 +403,14 @@ static int rpm_suspend(struct device *dev, int rpmflags) goto out; } - qos.constraint_ns = __dev_pm_qos_read_value(dev); - if (qos.constraint_ns < 0) { - /* Negative constraint means "never suspend". */ + if (__dev_pm_qos_read_value(dev) < 0) { + /* Negative PM QoS constraint means "never suspend". */ retval = -EPERM; goto out; } - qos.constraint_ns *= NSEC_PER_USEC; - qos.time_now = ktime_get(); __update_runtime_status(dev, RPM_SUSPENDING); - if (!dev->power.ignore_children) { - if (dev->power.irq_safe) - spin_unlock(&dev->power.lock); - else - spin_unlock_irq(&dev->power.lock); - - retval = device_for_each_child(dev, &qos, - rpm_update_qos_constraint); - - if (dev->power.irq_safe) - spin_lock(&dev->power.lock); - else - spin_lock_irq(&dev->power.lock); - - if (retval) - goto fail; - } - - dev->power.suspend_time = qos.time_now; - dev->power.max_time_suspended_ns = qos.constraint_ns ? : -1; - if (dev->pm_domain) callback = dev->pm_domain->ops.runtime_suspend; else if (dev->type && dev->type->pm) @@ -529,8 +463,6 @@ static int rpm_suspend(struct device *dev, int rpmflags) fail: __update_runtime_status(dev, RPM_ACTIVE); - dev->power.suspend_time = ktime_set(0, 0); - dev->power.max_time_suspended_ns = -1; dev->power.deferred_resume = false; wake_up_all(&dev->power.wait_queue); @@ -704,9 +636,6 @@ static int rpm_resume(struct device *dev, int rpmflags) if (dev->power.no_callbacks) goto no_callback; /* Assume success. */ - dev->power.suspend_time = ktime_set(0, 0); - dev->power.max_time_suspended_ns = -1; - __update_runtime_status(dev, RPM_RESUMING); if (dev->pm_domain) @@ -1369,9 +1298,6 @@ void pm_runtime_init(struct device *dev) setup_timer(&dev->power.suspend_timer, pm_suspend_timer_fn, (unsigned long)dev); - dev->power.suspend_time = ktime_set(0, 0); - dev->power.max_time_suspended_ns = -1; - init_waitqueue_head(&dev->power.wait_queue); } @@ -1389,28 +1315,3 @@ void pm_runtime_remove(struct device *dev) if (dev->power.irq_safe && dev->parent) pm_runtime_put_sync(dev->parent); } - -/** - * pm_runtime_update_max_time_suspended - Update device's suspend time data. - * @dev: Device to handle. - * @delta_ns: Value to subtract from the device's max_time_suspended_ns field. - * - * Update the device's power.max_time_suspended_ns field by subtracting - * @delta_ns from it. The resulting value of power.max_time_suspended_ns is - * never negative. - */ -void pm_runtime_update_max_time_suspended(struct device *dev, s64 delta_ns) -{ - unsigned long flags; - - spin_lock_irqsave(&dev->power.lock, flags); - - if (delta_ns > 0 && dev->power.max_time_suspended_ns > 0) { - if (dev->power.max_time_suspended_ns > delta_ns) - dev->power.max_time_suspended_ns -= delta_ns; - else - dev->power.max_time_suspended_ns = 0; - } - - spin_unlock_irqrestore(&dev->power.lock, flags); -} diff --git a/include/linux/pm.h b/include/linux/pm.h index 715305e05123..f067e60a3832 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -544,8 +544,6 @@ struct dev_pm_info { unsigned long active_jiffies; unsigned long suspended_jiffies; unsigned long accounting_timestamp; - ktime_t suspend_time; - s64 max_time_suspended_ns; struct dev_pm_qos_request *pq_req; #endif struct pm_subsys_data *subsys_data; /* Owned by the subsystem. */ diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 609daae7a014..f271860c78d5 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -150,9 +150,6 @@ static inline void pm_runtime_set_autosuspend_delay(struct device *dev, static inline unsigned long pm_runtime_autosuspend_expiration( struct device *dev) { return 0; } -static inline void pm_runtime_update_max_time_suspended(struct device *dev, - s64 delta_ns) {} - #endif /* !CONFIG_PM_RUNTIME */ static inline int pm_runtime_idle(struct device *dev) -- cgit v1.2.3 From 23e0fc5ae64925e0ff1b6221b83dff1b217545df Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 29 Apr 2012 22:54:47 +0200 Subject: PM / QoS: Create device constraints objects on notifier registration The current behavior of dev_pm_qos_add_notifier() makes device PM QoS notifiers less than useful. Namely, it silently returns success when called before any PM QoS constraints are added for the device, so the caller will assume that the notifier has been registered, but when someone actually adds some nontrivial constraints for the device eventually, the previous callers of dev_pm_qos_add_notifier() will not know about that and their notifier routines will not be executed (contrary to their expectations). To address this problem make dev_pm_qos_add_notifier() create the constraints object for the device if it is not present when the routine is called. Signed-off-by: Rafael J. Wysocki Acked-by : markgross --- drivers/base/power/qos.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c index 71855570922d..fd849a2c4fa8 100644 --- a/drivers/base/power/qos.c +++ b/drivers/base/power/qos.c @@ -352,21 +352,26 @@ EXPORT_SYMBOL_GPL(dev_pm_qos_remove_request); * * Will register the notifier into a notification chain that gets called * upon changes to the target value for the device. + * + * If the device's constraints object doesn't exist when this routine is called, + * it will be created (or error code will be returned if that fails). */ int dev_pm_qos_add_notifier(struct device *dev, struct notifier_block *notifier) { - int retval = 0; + int ret = 0; mutex_lock(&dev_pm_qos_mtx); - /* Silently return if the constraints object is not present. */ - if (dev->power.constraints) - retval = blocking_notifier_chain_register( - dev->power.constraints->notifiers, - notifier); + if (!dev->power.constraints) + ret = dev->power.power_state.event != PM_EVENT_INVALID ? + dev_pm_qos_constraints_allocate(dev) : -ENODEV; + + if (!ret) + ret = blocking_notifier_chain_register( + dev->power.constraints->notifiers, notifier); mutex_unlock(&dev_pm_qos_mtx); - return retval; + return ret; } EXPORT_SYMBOL_GPL(dev_pm_qos_add_notifier); -- cgit v1.2.3 From 4d7e30d98939a0340022ccd49325a3d70f7e0238 Mon Sep 17 00:00:00 2001 From: Arve Hjønnevåg Date: Tue, 1 May 2012 21:33:34 +0200 Subject: epoll: Add a flag, EPOLLWAKEUP, to prevent suspend while epoll events are ready MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When an epoll_event, that has the EPOLLWAKEUP flag set, is ready, a wakeup_source will be active to prevent suspend. This can be used to handle wakeup events from a driver that support poll, e.g. input, if that driver wakes up the waitqueue passed to epoll before allowing suspend. Signed-off-by: Arve Hjønnevåg Reviewed-by: NeilBrown Signed-off-by: Rafael J. Wysocki --- fs/eventpoll.c | 90 ++++++++++++++++++++++++++++++++++++++++++++-- include/linux/capability.h | 5 ++- include/linux/eventpoll.h | 12 +++++++ 3 files changed, 103 insertions(+), 4 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index c0b3c70ee87a..2cf0f2153be5 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -87,7 +88,7 @@ */ /* Epoll private bits inside the event mask */ -#define EP_PRIVATE_BITS (EPOLLONESHOT | EPOLLET) +#define EP_PRIVATE_BITS (EPOLLWAKEUP | EPOLLONESHOT | EPOLLET) /* Maximum number of nesting allowed inside epoll sets */ #define EP_MAX_NESTS 4 @@ -154,6 +155,9 @@ struct epitem { /* List header used to link this item to the "struct file" items list */ struct list_head fllink; + /* wakeup_source used when EPOLLWAKEUP is set */ + struct wakeup_source *ws; + /* The structure that describe the interested events and the source fd */ struct epoll_event event; }; @@ -194,6 +198,9 @@ struct eventpoll { */ struct epitem *ovflist; + /* wakeup_source used when ep_scan_ready_list is running */ + struct wakeup_source *ws; + /* The user that created the eventpoll descriptor */ struct user_struct *user; @@ -588,8 +595,10 @@ static int ep_scan_ready_list(struct eventpoll *ep, * queued into ->ovflist but the "txlist" might already * contain them, and the list_splice() below takes care of them. */ - if (!ep_is_linked(&epi->rdllink)) + if (!ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); + __pm_stay_awake(epi->ws); + } } /* * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after @@ -602,6 +611,7 @@ static int ep_scan_ready_list(struct eventpoll *ep, * Quickly re-inject items left on "txlist". */ list_splice(&txlist, &ep->rdllist); + __pm_relax(ep->ws); if (!list_empty(&ep->rdllist)) { /* @@ -656,6 +666,8 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) list_del_init(&epi->rdllink); spin_unlock_irqrestore(&ep->lock, flags); + wakeup_source_unregister(epi->ws); + /* At this point it is safe to free the eventpoll item */ kmem_cache_free(epi_cache, epi); @@ -706,6 +718,7 @@ static void ep_free(struct eventpoll *ep) mutex_unlock(&epmutex); mutex_destroy(&ep->mtx); free_uid(ep->user); + wakeup_source_unregister(ep->ws); kfree(ep); } @@ -737,6 +750,7 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head, * callback, but it's not actually ready, as far as * caller requested events goes. We can remove it here. */ + __pm_relax(epi->ws); list_del_init(&epi->rdllink); } } @@ -927,13 +941,23 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k if (epi->next == EP_UNACTIVE_PTR) { epi->next = ep->ovflist; ep->ovflist = epi; + if (epi->ws) { + /* + * Activate ep->ws since epi->ws may get + * deactivated at any time. + */ + __pm_stay_awake(ep->ws); + } + } goto out_unlock; } /* If this file is already in the ready list we exit soon */ - if (!ep_is_linked(&epi->rdllink)) + if (!ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); + __pm_stay_awake(epi->ws); + } /* * Wake up ( if active ) both the eventpoll wait list and the ->poll() @@ -1091,6 +1115,30 @@ static int reverse_path_check(void) return error; } +static int ep_create_wakeup_source(struct epitem *epi) +{ + const char *name; + + if (!epi->ep->ws) { + epi->ep->ws = wakeup_source_register("eventpoll"); + if (!epi->ep->ws) + return -ENOMEM; + } + + name = epi->ffd.file->f_path.dentry->d_name.name; + epi->ws = wakeup_source_register(name); + if (!epi->ws) + return -ENOMEM; + + return 0; +} + +static void ep_destroy_wakeup_source(struct epitem *epi) +{ + wakeup_source_unregister(epi->ws); + epi->ws = NULL; +} + /* * Must be called with "mtx" held. */ @@ -1118,6 +1166,13 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, epi->event = *event; epi->nwait = 0; epi->next = EP_UNACTIVE_PTR; + if (epi->event.events & EPOLLWAKEUP) { + error = ep_create_wakeup_source(epi); + if (error) + goto error_create_wakeup_source; + } else { + epi->ws = NULL; + } /* Initialize the poll table using the queue callback */ epq.epi = epi; @@ -1164,6 +1219,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, /* If the file is already "ready" we drop it inside the ready list */ if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); + __pm_stay_awake(epi->ws); /* Notify waiting tasks that events are available */ if (waitqueue_active(&ep->wq)) @@ -1204,6 +1260,9 @@ error_unregister: list_del_init(&epi->rdllink); spin_unlock_irqrestore(&ep->lock, flags); + wakeup_source_unregister(epi->ws); + +error_create_wakeup_source: kmem_cache_free(epi_cache, epi); return error; @@ -1229,6 +1288,12 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even epi->event.events = event->events; pt._key = event->events; epi->event.data = event->data; /* protected by mtx */ + if (epi->event.events & EPOLLWAKEUP) { + if (!epi->ws) + ep_create_wakeup_source(epi); + } else if (epi->ws) { + ep_destroy_wakeup_source(epi); + } /* * Get current event bits. We can safely use the file* here because @@ -1244,6 +1309,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even spin_lock_irq(&ep->lock); if (!ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); + __pm_stay_awake(epi->ws); /* Notify waiting tasks that events are available */ if (waitqueue_active(&ep->wq)) @@ -1282,6 +1348,18 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, !list_empty(head) && eventcnt < esed->maxevents;) { epi = list_first_entry(head, struct epitem, rdllink); + /* + * Activate ep->ws before deactivating epi->ws to prevent + * triggering auto-suspend here (in case we reactive epi->ws + * below). + * + * This could be rearranged to delay the deactivation of epi->ws + * instead, but then epi->ws would temporarily be out of sync + * with ep_is_linked(). + */ + if (epi->ws && epi->ws->active) + __pm_stay_awake(ep->ws); + __pm_relax(epi->ws); list_del_init(&epi->rdllink); pt._key = epi->event.events; @@ -1298,6 +1376,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, if (__put_user(revents, &uevent->events) || __put_user(epi->event.data, &uevent->data)) { list_add(&epi->rdllink, head); + __pm_stay_awake(epi->ws); return eventcnt ? eventcnt : -EFAULT; } eventcnt++; @@ -1317,6 +1396,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, * poll callback will queue them in ep->ovflist. */ list_add_tail(&epi->rdllink, &ep->rdllist); + __pm_stay_awake(epi->ws); } } } @@ -1629,6 +1709,10 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, if (!tfile->f_op || !tfile->f_op->poll) goto error_tgt_fput; + /* Check if EPOLLWAKEUP is allowed */ + if ((epds.events & EPOLLWAKEUP) && !capable(CAP_EPOLLWAKEUP)) + goto error_tgt_fput; + /* * We have to check that the file structure underneath the file descriptor * the user passed to us _is_ an eventpoll file. And also we do not permit diff --git a/include/linux/capability.h b/include/linux/capability.h index 12d52dedb229..c398cff3dab7 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -360,8 +360,11 @@ struct cpu_vfs_cap_data { #define CAP_WAKE_ALARM 35 +/* Allow preventing system suspends while epoll events are pending */ -#define CAP_LAST_CAP CAP_WAKE_ALARM +#define CAP_EPOLLWAKEUP 36 + +#define CAP_LAST_CAP CAP_EPOLLWAKEUP #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP) diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h index 657ab55beda0..6f8be328770a 100644 --- a/include/linux/eventpoll.h +++ b/include/linux/eventpoll.h @@ -26,6 +26,18 @@ #define EPOLL_CTL_DEL 2 #define EPOLL_CTL_MOD 3 +/* + * Request the handling of system wakeup events so as to prevent system suspends + * from happening while those events are being processed. + * + * Assuming neither EPOLLET nor EPOLLONESHOT is set, system suspends will not be + * re-allowed until epoll_wait is called again after consuming the wakeup + * event(s). + * + * Requires CAP_EPOLLWAKEUP + */ +#define EPOLLWAKEUP (1 << 29) + /* Set the One Shot behaviour for the target file descriptor */ #define EPOLLONESHOT (1 << 30) -- cgit v1.2.3 From 040e5bf65e1ee66266bc314c5965518a7c21ff36 Mon Sep 17 00:00:00 2001 From: Arve Hjønnevåg Date: Fri, 4 May 2012 00:14:21 +0200 Subject: PM / Sleep: Fix a mistake in a conditional in autosleep_store() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The condition check in autosleep_store() is incorrect and prevents /sys/power/autosleep from working as advertised. Fix that. [rjw: Added the changelog.] Signed-off-by: Arve Hjønnevåg Signed-off-by: Rafael J. Wysocki --- kernel/power/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/power/main.c b/kernel/power/main.c index 54ec071de337..428f8a034e96 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -422,7 +422,7 @@ static ssize_t autosleep_store(struct kobject *kobj, int error; if (state == PM_SUSPEND_ON - && !(strncmp(buf, "off", 3) && strncmp(buf, "off\n", 4))) + && strcmp(buf, "off") && strcmp(buf, "off\n")) return -EINVAL; error = pm_autosleep_set_state(state); -- cgit v1.2.3 From efa6902501ffc87d69bfb10b8a09b7d6ee222d77 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 1 May 2012 21:33:53 +0200 Subject: PM / Domains: Make device removal more straightforward The removal of a device from a PM domain doesn't have to browse the domain's device list, because it can check directly if the device belongs to the given domain. Moreover, it should clear the domain_data pointer in dev->power.subsys_data, because dev_pm_put_subsys_data(dev) may not remove dev->power.subsys_data and the stale domain data pointer may cause problems to happen. Rework pm_genpd_remove_device() taking the above observations into account. Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 45c2b7f0fe3b..6ae5672c35ab 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1279,11 +1279,13 @@ int pm_genpd_remove_device(struct generic_pm_domain *genpd, struct device *dev) { struct pm_domain_data *pdd; - int ret = -EINVAL; + int ret = 0; dev_dbg(dev, "%s()\n", __func__); - if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(dev)) + if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(dev) + || IS_ERR_OR_NULL(dev->pm_domain) + || pd_to_genpd(dev->pm_domain) != genpd) return -EINVAL; genpd_acquire_lock(genpd); @@ -1293,21 +1295,14 @@ int pm_genpd_remove_device(struct generic_pm_domain *genpd, goto out; } - list_for_each_entry(pdd, &genpd->dev_list, list_node) { - if (pdd->dev != dev) - continue; - - list_del_init(&pdd->list_node); - pdd->dev = NULL; - dev_pm_put_subsys_data(dev); - dev->pm_domain = NULL; - kfree(to_gpd_data(pdd)); - - genpd->device_count--; + dev->pm_domain = NULL; + pdd = dev->power.subsys_data->domain_data; + list_del_init(&pdd->list_node); + dev->power.subsys_data->domain_data = NULL; + dev_pm_put_subsys_data(dev); + kfree(to_gpd_data(pdd)); - ret = 0; - break; - } + genpd->device_count--; out: genpd_release_lock(genpd); -- cgit v1.2.3 From 6ff7bb0d02f82968be13937c03e93b6c090229df Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 1 May 2012 21:34:07 +0200 Subject: PM / Domains: Cache device stop and domain power off governor results, v3 The results of the default device stop and domain power off governor functions for generic PM domains, default_stop_ok() and default_power_down_ok(), depend only on the timing data of devices, which are static, and on their PM QoS constraints. Thus, in theory, these functions only need to carry out their computations, which may be time consuming in general, when it is known that the PM QoS constraint of at least one of the devices in question has changed. Use the PM QoS notifiers of devices to implement that. First, introduce new fields, constraint_changed and max_off_time_changed, into struct gpd_timing_data and struct generic_pm_domain, respectively, and register a PM QoS notifier function when adding a device into a domain that will set those fields to 'true' whenever the device's PM QoS constraint is modified. Second, make default_stop_ok() and default_power_down_ok() use those fields to decide whether or not to carry out their computations from scratch. The device and PM domain hierarchies are taken into account in that and the expense is that the changes of PM QoS constraints of suspended devices will not be taken into account immediately, which isn't guaranteed anyway in general. Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 120 ++++++++++++++++++++++++++++++----- drivers/base/power/domain_governor.c | 45 ++++++++++++- include/linux/pm_domain.h | 7 ++ 3 files changed, 153 insertions(+), 19 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 6ae5672c35ab..cde5983de6c2 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -38,11 +39,13 @@ ktime_t __start = ktime_get(); \ type __retval = GENPD_DEV_CALLBACK(genpd, type, callback, dev); \ s64 __elapsed = ktime_to_ns(ktime_sub(ktime_get(), __start)); \ - struct generic_pm_domain_data *__gpd_data = dev_gpd_data(dev); \ - if (__elapsed > __gpd_data->td.field) { \ - __gpd_data->td.field = __elapsed; \ + struct gpd_timing_data *__td = &dev_gpd_data(dev)->td; \ + if (!__retval && __elapsed > __td->field) { \ + __td->field = __elapsed; \ dev_warn(dev, name " latency exceeded, new value %lld ns\n", \ __elapsed); \ + genpd->max_off_time_changed = true; \ + __td->constraint_changed = true; \ } \ __retval; \ }) @@ -211,6 +214,7 @@ int __pm_genpd_poweron(struct generic_pm_domain *genpd) elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start)); if (elapsed_ns > genpd->power_on_latency_ns) { genpd->power_on_latency_ns = elapsed_ns; + genpd->max_off_time_changed = true; if (genpd->name) pr_warning("%s: Power-on latency exceeded, " "new value %lld ns\n", genpd->name, @@ -247,6 +251,53 @@ int pm_genpd_poweron(struct generic_pm_domain *genpd) #ifdef CONFIG_PM_RUNTIME +static int genpd_dev_pm_qos_notifier(struct notifier_block *nb, + unsigned long val, void *ptr) +{ + struct generic_pm_domain_data *gpd_data; + struct device *dev; + + gpd_data = container_of(nb, struct generic_pm_domain_data, nb); + + mutex_lock(&gpd_data->lock); + dev = gpd_data->base.dev; + if (!dev) { + mutex_unlock(&gpd_data->lock); + return NOTIFY_DONE; + } + mutex_unlock(&gpd_data->lock); + + for (;;) { + struct generic_pm_domain *genpd; + struct pm_domain_data *pdd; + + spin_lock_irq(&dev->power.lock); + + pdd = dev->power.subsys_data ? + dev->power.subsys_data->domain_data : NULL; + if (pdd) { + to_gpd_data(pdd)->td.constraint_changed = true; + genpd = dev_to_genpd(dev); + } else { + genpd = ERR_PTR(-ENODATA); + } + + spin_unlock_irq(&dev->power.lock); + + if (!IS_ERR(genpd)) { + mutex_lock(&genpd->lock); + genpd->max_off_time_changed = true; + mutex_unlock(&genpd->lock); + } + + dev = dev->parent; + if (!dev || dev->power.ignore_children) + break; + } + + return NOTIFY_DONE; +} + /** * __pm_genpd_save_device - Save the pre-suspend state of a device. * @pdd: Domain data of the device to save the state of. @@ -381,7 +432,6 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd) return 0; } - genpd->max_off_time_ns = -1; if (genpd->gov && genpd->gov->power_down_ok) { if (!genpd->gov->power_down_ok(&genpd->domain)) return -EAGAIN; @@ -436,6 +486,7 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd) elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start)); if (elapsed_ns > genpd->power_off_latency_ns) { genpd->power_off_latency_ns = elapsed_ns; + genpd->max_off_time_changed = true; if (genpd->name) pr_warning("%s: Power-off latency exceeded, " "new value %lld ns\n", genpd->name, @@ -496,7 +547,6 @@ static int pm_genpd_runtime_suspend(struct device *dev) if (dev_gpd_data(dev)->always_on) return -EBUSY; - dev_gpd_data(dev)->td.effective_constraint_ns = -1; stop_ok = genpd->gov ? genpd->gov->stop_ok : NULL; if (stop_ok && !stop_ok(dev)) return -EBUSY; @@ -601,6 +651,12 @@ void pm_genpd_poweroff_unused(void) #else +static inline int genpd_dev_pm_qos_notifier(struct notifier_block *nb, + unsigned long val, void *ptr) +{ + return NOTIFY_DONE; +} + static inline void genpd_power_off_work_fn(struct work_struct *work) {} #define pm_genpd_runtime_suspend NULL @@ -1197,6 +1253,14 @@ int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(dev)) return -EINVAL; + gpd_data = kzalloc(sizeof(*gpd_data), GFP_KERNEL); + if (!gpd_data) + return -ENOMEM; + + mutex_init(&gpd_data->lock); + gpd_data->nb.notifier_call = genpd_dev_pm_qos_notifier; + dev_pm_qos_add_notifier(dev, &gpd_data->nb); + genpd_acquire_lock(genpd); if (genpd->status == GPD_STATE_POWER_OFF) { @@ -1215,26 +1279,35 @@ int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, goto out; } - gpd_data = kzalloc(sizeof(*gpd_data), GFP_KERNEL); - if (!gpd_data) { - ret = -ENOMEM; - goto out; - } - genpd->device_count++; + genpd->max_off_time_changed = true; - dev->pm_domain = &genpd->domain; dev_pm_get_subsys_data(dev); + + mutex_lock(&gpd_data->lock); + spin_lock_irq(&dev->power.lock); + dev->pm_domain = &genpd->domain; dev->power.subsys_data->domain_data = &gpd_data->base; gpd_data->base.dev = dev; - gpd_data->need_restore = false; list_add_tail(&gpd_data->base.list_node, &genpd->dev_list); + gpd_data->need_restore = false; if (td) gpd_data->td = *td; + gpd_data->td.constraint_changed = true; + gpd_data->td.effective_constraint_ns = -1; + spin_unlock_irq(&dev->power.lock); + mutex_unlock(&gpd_data->lock); + + genpd_release_lock(genpd); + + return 0; + out: genpd_release_lock(genpd); + dev_pm_qos_remove_notifier(dev, &gpd_data->nb); + kfree(gpd_data); return ret; } @@ -1278,6 +1351,7 @@ int __pm_genpd_of_add_device(struct device_node *genpd_node, struct device *dev, int pm_genpd_remove_device(struct generic_pm_domain *genpd, struct device *dev) { + struct generic_pm_domain_data *gpd_data; struct pm_domain_data *pdd; int ret = 0; @@ -1295,14 +1369,27 @@ int pm_genpd_remove_device(struct generic_pm_domain *genpd, goto out; } + genpd->device_count--; + genpd->max_off_time_changed = true; + + spin_lock_irq(&dev->power.lock); dev->pm_domain = NULL; pdd = dev->power.subsys_data->domain_data; list_del_init(&pdd->list_node); dev->power.subsys_data->domain_data = NULL; - dev_pm_put_subsys_data(dev); - kfree(to_gpd_data(pdd)); + spin_unlock_irq(&dev->power.lock); - genpd->device_count--; + gpd_data = to_gpd_data(pdd); + mutex_lock(&gpd_data->lock); + pdd->dev = NULL; + mutex_unlock(&gpd_data->lock); + + genpd_release_lock(genpd); + + dev_pm_qos_remove_notifier(dev, &gpd_data->nb); + kfree(gpd_data); + dev_pm_put_subsys_data(dev); + return 0; out: genpd_release_lock(genpd); @@ -1673,6 +1760,7 @@ void pm_genpd_init(struct generic_pm_domain *genpd, genpd->resume_count = 0; genpd->device_count = 0; genpd->max_off_time_ns = -1; + genpd->max_off_time_changed = true; genpd->domain.ops.runtime_suspend = pm_genpd_runtime_suspend; genpd->domain.ops.runtime_resume = pm_genpd_runtime_resume; genpd->domain.ops.runtime_idle = pm_generic_runtime_idle; diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c index 2aae623fd840..3a5c5346bc47 100644 --- a/drivers/base/power/domain_governor.c +++ b/drivers/base/power/domain_governor.c @@ -46,18 +46,34 @@ static int dev_update_qos_constraint(struct device *dev, void *data) bool default_stop_ok(struct device *dev) { struct gpd_timing_data *td = &dev_gpd_data(dev)->td; + unsigned long flags; s64 constraint_ns; dev_dbg(dev, "%s()\n", __func__); - constraint_ns = dev_pm_qos_read_value(dev); + spin_lock_irqsave(&dev->power.lock, flags); + + if (!td->constraint_changed) { + bool ret = td->cached_stop_ok; + + spin_unlock_irqrestore(&dev->power.lock, flags); + return ret; + } + td->constraint_changed = false; + td->cached_stop_ok = false; + td->effective_constraint_ns = -1; + constraint_ns = __dev_pm_qos_read_value(dev); + + spin_unlock_irqrestore(&dev->power.lock, flags); + if (constraint_ns < 0) return false; constraint_ns *= NSEC_PER_USEC; /* * We can walk the children without any additional locking, because - * they all have been suspended at this point. + * they all have been suspended at this point and their + * effective_constraint_ns fields won't be modified in parallel with us. */ if (!dev->power.ignore_children) device_for_each_child(dev, &constraint_ns, @@ -69,11 +85,13 @@ bool default_stop_ok(struct device *dev) return false; } td->effective_constraint_ns = constraint_ns; + td->cached_stop_ok = constraint_ns > td->stop_latency_ns || + constraint_ns == 0; /* * The children have been suspended already, so we don't need to take * their stop latencies into account here. */ - return constraint_ns > td->stop_latency_ns || constraint_ns == 0; + return td->cached_stop_ok; } /** @@ -90,6 +108,25 @@ static bool default_power_down_ok(struct dev_pm_domain *pd) s64 min_dev_off_time_ns; s64 off_on_time_ns; + if (genpd->max_off_time_changed) { + struct gpd_link *link; + + /* + * We have to invalidate the cached results for the masters, so + * use the observation that default_power_down_ok() is not + * going to be called for any master until this instance + * returns. + */ + list_for_each_entry(link, &genpd->slave_links, slave_node) + link->master->max_off_time_changed = true; + + genpd->max_off_time_changed = false; + genpd->cached_power_down_ok = false; + genpd->max_off_time_ns = -1; + } else { + return genpd->cached_power_down_ok; + } + off_on_time_ns = genpd->power_off_latency_ns + genpd->power_on_latency_ns; /* @@ -165,6 +202,8 @@ static bool default_power_down_ok(struct dev_pm_domain *pd) min_dev_off_time_ns = constraint_ns; } + genpd->cached_power_down_ok = true; + /* * If the computed minimum device off time is negative, there are no * latency constraints, so the domain can spend arbitrary time in the diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index e7ada5ccdfc2..1e994eeacdf3 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -14,6 +14,7 @@ #include #include #include +#include enum gpd_status { GPD_STATE_ACTIVE = 0, /* PM domain is active */ @@ -71,6 +72,8 @@ struct generic_pm_domain { s64 power_on_latency_ns; struct gpd_dev_ops dev_ops; s64 max_off_time_ns; /* Maximum allowed "suspended" time. */ + bool max_off_time_changed; + bool cached_power_down_ok; struct device_node *of_node; /* Node in device tree */ }; @@ -92,12 +95,16 @@ struct gpd_timing_data { s64 save_state_latency_ns; s64 restore_state_latency_ns; s64 effective_constraint_ns; + bool constraint_changed; + bool cached_stop_ok; }; struct generic_pm_domain_data { struct pm_domain_data base; struct gpd_dev_ops ops; struct gpd_timing_data td; + struct notifier_block nb; + struct mutex lock; bool need_restore; bool always_on; }; -- cgit v1.2.3 From 6237dd132d4eb408ffa80830fe395448e5657ab0 Mon Sep 17 00:00:00 2001 From: Marcos Paulo de Souza Date: Wed, 2 May 2012 14:33:37 +0200 Subject: PM / Documentation: suspend-and-cpuhotplug.txt: Fix typo sysfs was expected in this context. Signed-off-by: Marcos Paulo de Souza Acked-by: Srivatsa S. Bhat Signed-off-by: Rafael J. Wysocki --- Documentation/power/suspend-and-cpuhotplug.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/power/suspend-and-cpuhotplug.txt b/Documentation/power/suspend-and-cpuhotplug.txt index f28f9a6f0347..e13dafc8e8f1 100644 --- a/Documentation/power/suspend-and-cpuhotplug.txt +++ b/Documentation/power/suspend-and-cpuhotplug.txt @@ -29,7 +29,7 @@ More details follow: Write 'mem' to /sys/power/state - syfs file + sysfs file | v Acquire pm_mutex lock -- cgit v1.2.3 From c73893e2ca731b4a81ae59246ab57979aa188777 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 5 May 2012 21:57:20 +0200 Subject: PM / Sleep: Make the limit of user space wakeup sources configurable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make it possible to configure out the check against the limit of user space wakeup sources for debugging and default Android builds. Signed-off-by: Rafael J. Wysocki Acked-by: Arve Hjønnevåg --- kernel/power/Kconfig | 6 ++++++ kernel/power/wakelock.c | 31 ++++++++++++++++++++++++++----- 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 1d534076d33a..08783eda9ce4 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -119,6 +119,12 @@ config PM_WAKELOCKS Allow user space to create, activate and deactivate wakeup source objects with the help of a sysfs-based interface. +config PM_WAKELOCKS_LIMIT + int "Maximum number of user space wakeup sources (0 = no limit)" + range 0 100000 + default 100 + depends on PM_WAKELOCKS + config PM_RUNTIME bool "Run-time PM core functionality" depends on !IA64_HP_SIM diff --git a/kernel/power/wakelock.c b/kernel/power/wakelock.c index 579700665e8c..dc34b9d3b7d8 100644 --- a/kernel/power/wakelock.c +++ b/kernel/power/wakelock.c @@ -17,7 +17,6 @@ #include #include -#define WL_NUMBER_LIMIT 100 #define WL_GC_COUNT_MAX 100 #define WL_GC_TIME_SEC 300 @@ -32,7 +31,6 @@ struct wakelock { static struct rb_root wakelocks_tree = RB_ROOT; static LIST_HEAD(wakelocks_lru_list); -static unsigned int number_of_wakelocks; static unsigned int wakelocks_gc_count; ssize_t pm_show_wakelocks(char *buf, bool show_active) @@ -58,6 +56,29 @@ ssize_t pm_show_wakelocks(char *buf, bool show_active) return (str - buf); } +#if CONFIG_PM_WAKELOCKS_LIMIT > 0 +static unsigned int number_of_wakelocks; + +static inline bool wakelocks_limit_exceeded(void) +{ + return number_of_wakelocks > CONFIG_PM_WAKELOCKS_LIMIT; +} + +static inline void increment_wakelocks_number(void) +{ + number_of_wakelocks++; +} + +static inline void decrement_wakelocks_number(void) +{ + number_of_wakelocks--; +} +#else /* CONFIG_PM_WAKELOCKS_LIMIT = 0 */ +static inline bool wakelocks_limit_exceeded(void) { return false; } +static inline void increment_wakelocks_number(void) {} +static inline void decrement_wakelocks_number(void) {} +#endif /* CONFIG_PM_WAKELOCKS_LIMIT */ + static struct wakelock *wakelock_lookup_add(const char *name, size_t len, bool add_if_not_found) { @@ -85,7 +106,7 @@ static struct wakelock *wakelock_lookup_add(const char *name, size_t len, if (!add_if_not_found) return ERR_PTR(-EINVAL); - if (number_of_wakelocks > WL_NUMBER_LIMIT) + if (wakelocks_limit_exceeded()) return ERR_PTR(-ENOSPC); /* Not found, we have to add a new one. */ @@ -103,7 +124,7 @@ static struct wakelock *wakelock_lookup_add(const char *name, size_t len, rb_link_node(&wl->node, parent, node); rb_insert_color(&wl->node, &wakelocks_tree); list_add(&wl->lru, &wakelocks_lru_list); - number_of_wakelocks++; + increment_wakelocks_number(); return wl; } @@ -175,7 +196,7 @@ static void wakelocks_gc(void) list_del(&wl->lru); kfree(wl->name); kfree(wl); - number_of_wakelocks--; + decrement_wakelocks_number(); } } wakelocks_gc_count = 0; -- cgit v1.2.3 From 4e585d25e120f1eae0a3a8bf8f6ebc7692afec18 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 5 May 2012 21:57:28 +0200 Subject: PM / Sleep: User space wakeup sources garbage collector Kconfig option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make it possible to configure out the user space wakeup sources garbage collector for debugging and default Android builds. Signed-off-by: Rafael J. Wysocki Acked-by: Arve Hjønnevåg --- kernel/power/Kconfig | 5 +++ kernel/power/wakelock.c | 101 +++++++++++++++++++++++++++++------------------- 2 files changed, 67 insertions(+), 39 deletions(-) diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 08783eda9ce4..8f9b4eb974e0 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -125,6 +125,11 @@ config PM_WAKELOCKS_LIMIT default 100 depends on PM_WAKELOCKS +config PM_WAKELOCKS_GC + bool "Garbage collector for user space wakeup sources" + depends on PM_WAKELOCKS + default y + config PM_RUNTIME bool "Run-time PM core functionality" depends on !IA64_HP_SIM diff --git a/kernel/power/wakelock.c b/kernel/power/wakelock.c index dc34b9d3b7d8..c8fba3380076 100644 --- a/kernel/power/wakelock.c +++ b/kernel/power/wakelock.c @@ -17,21 +17,18 @@ #include #include -#define WL_GC_COUNT_MAX 100 -#define WL_GC_TIME_SEC 300 - static DEFINE_MUTEX(wakelocks_lock); struct wakelock { char *name; struct rb_node node; struct wakeup_source ws; +#ifdef CONFIG_PM_WAKELOCKS_GC struct list_head lru; +#endif }; static struct rb_root wakelocks_tree = RB_ROOT; -static LIST_HEAD(wakelocks_lru_list); -static unsigned int wakelocks_gc_count; ssize_t pm_show_wakelocks(char *buf, bool show_active) { @@ -79,6 +76,61 @@ static inline void increment_wakelocks_number(void) {} static inline void decrement_wakelocks_number(void) {} #endif /* CONFIG_PM_WAKELOCKS_LIMIT */ +#ifdef CONFIG_PM_WAKELOCKS_GC +#define WL_GC_COUNT_MAX 100 +#define WL_GC_TIME_SEC 300 + +static LIST_HEAD(wakelocks_lru_list); +static unsigned int wakelocks_gc_count; + +static inline void wakelocks_lru_add(struct wakelock *wl) +{ + list_add(&wl->lru, &wakelocks_lru_list); +} + +static inline void wakelocks_lru_most_recent(struct wakelock *wl) +{ + list_move(&wl->lru, &wakelocks_lru_list); +} + +static void wakelocks_gc(void) +{ + struct wakelock *wl, *aux; + ktime_t now; + + if (++wakelocks_gc_count <= WL_GC_COUNT_MAX) + return; + + now = ktime_get(); + list_for_each_entry_safe_reverse(wl, aux, &wakelocks_lru_list, lru) { + u64 idle_time_ns; + bool active; + + spin_lock_irq(&wl->ws.lock); + idle_time_ns = ktime_to_ns(ktime_sub(now, wl->ws.last_time)); + active = wl->ws.active; + spin_unlock_irq(&wl->ws.lock); + + if (idle_time_ns < ((u64)WL_GC_TIME_SEC * NSEC_PER_SEC)) + break; + + if (!active) { + wakeup_source_remove(&wl->ws); + rb_erase(&wl->node, &wakelocks_tree); + list_del(&wl->lru); + kfree(wl->name); + kfree(wl); + decrement_wakelocks_number(); + } + } + wakelocks_gc_count = 0; +} +#else /* !CONFIG_PM_WAKELOCKS_GC */ +static inline void wakelocks_lru_add(struct wakelock *wl) {} +static inline void wakelocks_lru_most_recent(struct wakelock *wl) {} +static inline void wakelocks_gc(void) {} +#endif /* !CONFIG_PM_WAKELOCKS_GC */ + static struct wakelock *wakelock_lookup_add(const char *name, size_t len, bool add_if_not_found) { @@ -123,7 +175,7 @@ static struct wakelock *wakelock_lookup_add(const char *name, size_t len, wakeup_source_add(&wl->ws); rb_link_node(&wl->node, parent, node); rb_insert_color(&wl->node, &wakelocks_tree); - list_add(&wl->lru, &wakelocks_lru_list); + wakelocks_lru_add(wl); increment_wakelocks_number(); return wl; } @@ -166,42 +218,13 @@ int pm_wake_lock(const char *buf) __pm_stay_awake(&wl->ws); } - list_move(&wl->lru, &wakelocks_lru_list); + wakelocks_lru_most_recent(wl); out: mutex_unlock(&wakelocks_lock); return ret; } -static void wakelocks_gc(void) -{ - struct wakelock *wl, *aux; - ktime_t now = ktime_get(); - - list_for_each_entry_safe_reverse(wl, aux, &wakelocks_lru_list, lru) { - u64 idle_time_ns; - bool active; - - spin_lock_irq(&wl->ws.lock); - idle_time_ns = ktime_to_ns(ktime_sub(now, wl->ws.last_time)); - active = wl->ws.active; - spin_unlock_irq(&wl->ws.lock); - - if (idle_time_ns < ((u64)WL_GC_TIME_SEC * NSEC_PER_SEC)) - break; - - if (!active) { - wakeup_source_remove(&wl->ws); - rb_erase(&wl->node, &wakelocks_tree); - list_del(&wl->lru); - kfree(wl->name); - kfree(wl); - decrement_wakelocks_number(); - } - } - wakelocks_gc_count = 0; -} - int pm_wake_unlock(const char *buf) { struct wakelock *wl; @@ -226,9 +249,9 @@ int pm_wake_unlock(const char *buf) goto out; } __pm_relax(&wl->ws); - list_move(&wl->lru, &wakelocks_lru_list); - if (++wakelocks_gc_count > WL_GC_COUNT_MAX) - wakelocks_gc(); + + wakelocks_lru_most_recent(wl); + wakelocks_gc(); out: mutex_unlock(&wakelocks_lock); -- cgit v1.2.3 From 4fcac10d28e7a046120b51a106b19082d2e57401 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Mon, 7 May 2012 21:35:45 +0200 Subject: PM / Domains: Fix link checking when add subdomain Current pm_genpd_add_subdomain() will allow duplicated link between master and slave domain. This patch fixed it. Because when current pm_genpd_add_subdomain() checks whether the link between the master and slave generic PM domain already exists, slave_links instead of master_links of master domain is used. Signed-off-by: Huang Ying Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index cde5983de6c2..c3eaa08a8f96 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1448,7 +1448,7 @@ int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, goto out; } - list_for_each_entry(link, &genpd->slave_links, slave_node) { + list_for_each_entry(link, &genpd->master_links, master_node) { if (link->slave == subdomain && link->master == genpd) { ret = -EINVAL; goto out; -- cgit v1.2.3 From b723b0eb91e08a0ee9a401c0b22c0d52966d9daa Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 7 May 2012 22:00:59 +0200 Subject: PM / Domains: Fix computation of maximum domain off time The default domain power off governor function for generic PM domains, default_power_down_ok(), may violate subdomain maximum off time limit by allowing the master domain to be off for too long. Namely, it only finds the minium of all device maximum off times over the domain's devices and uses that to compute the domain's maximum off time, but it should do the same for the subdomains. Fix this problem by modifying default_power_down_ok() to compute the given domain's maximum off time as the difference between the minimum off time over all devices and subdomains in the domain and its power on latency. Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain_governor.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c index 3a5c5346bc47..28dee3053f1f 100644 --- a/drivers/base/power/domain_governor.c +++ b/drivers/base/power/domain_governor.c @@ -105,7 +105,7 @@ static bool default_power_down_ok(struct dev_pm_domain *pd) struct generic_pm_domain *genpd = pd_to_genpd(pd); struct gpd_link *link; struct pm_domain_data *pdd; - s64 min_dev_off_time_ns; + s64 min_off_time_ns; s64 off_on_time_ns; if (genpd->max_off_time_changed) { @@ -142,6 +142,7 @@ static bool default_power_down_ok(struct dev_pm_domain *pd) to_gpd_data(pdd)->td.save_state_latency_ns; } + min_off_time_ns = -1; /* * Check if subdomains can be off for enough time. * @@ -161,12 +162,14 @@ static bool default_power_down_ok(struct dev_pm_domain *pd) */ if (sd_max_off_ns <= off_on_time_ns) return false; + + if (min_off_time_ns > sd_max_off_ns || min_off_time_ns < 0) + min_off_time_ns = sd_max_off_ns; } /* * Check if the devices in the domain can be off enough time. */ - min_dev_off_time_ns = -1; list_for_each_entry(pdd, &genpd->dev_list, list_node) { struct gpd_timing_data *td; s64 constraint_ns; @@ -197,9 +200,8 @@ static bool default_power_down_ok(struct dev_pm_domain *pd) if (constraint_ns <= off_on_time_ns) return false; - if (min_dev_off_time_ns > constraint_ns - || min_dev_off_time_ns < 0) - min_dev_off_time_ns = constraint_ns; + if (min_off_time_ns > constraint_ns || min_off_time_ns < 0) + min_off_time_ns = constraint_ns; } genpd->cached_power_down_ok = true; @@ -209,16 +211,15 @@ static bool default_power_down_ok(struct dev_pm_domain *pd) * latency constraints, so the domain can spend arbitrary time in the * "off" state. */ - if (min_dev_off_time_ns < 0) + if (min_off_time_ns < 0) return true; /* - * The difference between the computed minimum device off time and the - * time needed to turn the domain on is the maximum theoretical time - * this domain can spend in the "off" state. + * The difference between the computed minimum subdomain or device off + * time and the time needed to turn the domain on is the maximum + * theoretical time this domain can spend in the "off" state. */ - genpd->max_off_time_ns = min_dev_off_time_ns - - genpd->power_on_latency_ns; + genpd->max_off_time_ns = min_off_time_ns - genpd->power_on_latency_ns; return true; } -- cgit v1.2.3 From 2df83fa4bce421f8176932142f1004adfba0f9dd Mon Sep 17 00:00:00 2001 From: Minho Ban Date: Mon, 14 May 2012 21:45:31 +0200 Subject: PM / Hibernate: Use get_gendisk to verify partition if resume_file is integer format Sometimes resume= parameter comes in integer style (e.g. major:minor) and then name_to_dev_t can not detect partition properly. (especially async device like usb, mmc). This patch calls get_gendisk() if resumewait is true and resume_file is in integer format to work around this problem. Signed-off-by: Minho Ban Signed-off-by: Rafael J. Wysocki --- Documentation/kernel-parameters.txt | 2 ++ kernel/power/hibernate.c | 13 +++++++++++++ 2 files changed, 15 insertions(+) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index c1601e5a8b71..5900b49323cc 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2372,6 +2372,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. resume= [SWSUSP] Specify the partition device for software suspend + Format: + {/dev/ | PARTUUID= | : | } resume_offset= [SWSUSP] Specify the offset from the beginning of the partition diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index e09dfbfeecee..8b53db38a279 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -25,6 +25,8 @@ #include #include #include +#include +#include #include #include "power.h" @@ -722,6 +724,17 @@ static int software_resume(void) /* Check if the device is there */ swsusp_resume_device = name_to_dev_t(resume_file); + + /* + * name_to_dev_t is ineffective to verify parition if resume_file is in + * integer format. (e.g. major:minor) + */ + if (isdigit(resume_file[0]) && resume_wait) { + int partno; + while (!get_gendisk(swsusp_resume_device, &partno)) + msleep(10); + } + if (!swsusp_resume_device) { /* * Some device discovery might still be in progress; we need -- cgit v1.2.3 From ca1d72f033d4a89e60db25f680896c76c721062b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 14 May 2012 21:45:52 +0200 Subject: PM / Domains: Make it possible to add devices to inactive domains The generic PM domains core code currently requires domains to be in the "power on" state for adding devices to them, but this limitation turns out to be inconvenient in some situations, so remove it. For this purpose, make __pm_genpd_add_device() set the device's need_restore flag if the domain is in the "power off" state, so that the device's "restore state" (usually .runtime_resume()) callback is executed when it is resumed after the domain has been turned on. If the domain is in the "power on" state, the device's need_restore flag will be cleared by __pm_genpd_add_device(), so that its "save state" (usually .runtime_suspend()) callback is executed when the domain is about to be turned off. However, since that default behavior need not be always desirable, add a helper function pm_genpd_dev_need_restore() allowing a device's need_restore flag to be set/unset at any time. Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 27 +++++++++++++++++++++------ include/linux/pm_domain.h | 2 ++ 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index c3eaa08a8f96..83aa694a8efe 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1263,11 +1263,6 @@ int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, genpd_acquire_lock(genpd); - if (genpd->status == GPD_STATE_POWER_OFF) { - ret = -EINVAL; - goto out; - } - if (genpd->prepared_count > 0) { ret = -EAGAIN; goto out; @@ -1290,7 +1285,7 @@ int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, dev->power.subsys_data->domain_data = &gpd_data->base; gpd_data->base.dev = dev; list_add_tail(&gpd_data->base.list_node, &genpd->dev_list); - gpd_data->need_restore = false; + gpd_data->need_restore = genpd->status == GPD_STATE_POWER_OFF; if (td) gpd_data->td = *td; @@ -1417,6 +1412,26 @@ void pm_genpd_dev_always_on(struct device *dev, bool val) } EXPORT_SYMBOL_GPL(pm_genpd_dev_always_on); +/** + * pm_genpd_dev_need_restore - Set/unset the device's "need restore" flag. + * @dev: Device to set/unset the flag for. + * @val: The new value of the device's "need restore" flag. + */ +void pm_genpd_dev_need_restore(struct device *dev, bool val) +{ + struct pm_subsys_data *psd; + unsigned long flags; + + spin_lock_irqsave(&dev->power.lock, flags); + + psd = dev_to_psd(dev); + if (psd && psd->domain_data) + to_gpd_data(psd->domain_data)->need_restore = val; + + spin_unlock_irqrestore(&dev->power.lock, flags); +} +EXPORT_SYMBOL_GPL(pm_genpd_dev_need_restore); + /** * pm_genpd_add_subdomain - Add a subdomain to an I/O PM domain. * @genpd: Master PM domain to add the subdomain to. diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 1e994eeacdf3..30f794eb3826 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -146,6 +146,7 @@ static inline int pm_genpd_of_add_device(struct device_node *genpd_node, extern int pm_genpd_remove_device(struct generic_pm_domain *genpd, struct device *dev); extern void pm_genpd_dev_always_on(struct device *dev, bool val); +extern void pm_genpd_dev_need_restore(struct device *dev, bool val); extern int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, struct generic_pm_domain *new_subdomain); extern int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, @@ -189,6 +190,7 @@ static inline int pm_genpd_remove_device(struct generic_pm_domain *genpd, return -ENOSYS; } static inline void pm_genpd_dev_always_on(struct device *dev, bool val) {} +static inline void pm_genpd_dev_need_restore(struct device *dev, bool val) {} static inline int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, struct generic_pm_domain *new_sd) { -- cgit v1.2.3 From a8159414d7e3af7233e7a5a82d1c5d85379bd75c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 21 May 2012 21:20:48 +0200 Subject: epoll: Fix user space breakage related to EPOLLWAKEUP Commit 4d7e30d (epoll: Add a flag, EPOLLWAKEUP, to prevent suspend while epoll events are ready) caused some applications to malfunction, because they set the bit corresponding to the new EPOLLWAKEUP flag in their eventpoll flags and they don't have the new CAP_EPOLLWAKEUP capability. To prevent that from happening, change epoll_ctl() to clear EPOLLWAKEUP in epds.events if the caller doesn't have the CAP_EPOLLWAKEUP capability instead of failing and returning an error code, which allows the affected applications to function normally. Reported-and-tested-by: Jiri Slaby Signed-off-by: Rafael J. Wysocki --- fs/eventpoll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 2cf0f2153be5..079d1be65ba9 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1711,7 +1711,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, /* Check if EPOLLWAKEUP is allowed */ if ((epds.events & EPOLLWAKEUP) && !capable(CAP_EPOLLWAKEUP)) - goto error_tgt_fput; + epds.events &= ~EPOLLWAKEUP; /* * We have to check that the file structure underneath the file descriptor -- cgit v1.2.3