From 05aa55dddb9ee4045c320661068bea78dad6a6e5 Mon Sep 17 00:00:00 2001 From: Ohad Ben-Cohen Date: Thu, 9 Sep 2010 00:46:16 +0200 Subject: PM / Runtime: Lenient generic runtime pm callbacks Allow drivers, that belong to subsystems which use the generic runtime pm callbacks, not to define runtime pm suspend/resume handlers, by implicitly assuming success in such cases. This is needed to eliminate nop handlers that would otherwise be necessary by drivers which enable runtime pm, but don't need to do anything when their devices are runtime-suspended/resumed. Signed-off-by: Ohad Ben-Cohen Acked-by: Kevin Hilman Acked-by: Mark Brown Signed-off-by: Rafael J. Wysocki --- drivers/base/power/generic_ops.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/base/power/generic_ops.c b/drivers/base/power/generic_ops.c index 4b29d4981253..81f2c84697f4 100644 --- a/drivers/base/power/generic_ops.c +++ b/drivers/base/power/generic_ops.c @@ -46,7 +46,7 @@ int pm_generic_runtime_suspend(struct device *dev) const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; int ret; - ret = pm && pm->runtime_suspend ? pm->runtime_suspend(dev) : -EINVAL; + ret = pm && pm->runtime_suspend ? pm->runtime_suspend(dev) : 0; return ret; } @@ -65,7 +65,7 @@ int pm_generic_runtime_resume(struct device *dev) const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; int ret; - ret = pm && pm->runtime_resume ? pm->runtime_resume(dev) : -EINVAL; + ret = pm && pm->runtime_resume ? pm->runtime_resume(dev) : 0; return ret; } -- cgit v1.2.3 From f996fc9671d088bd5f52a70f18c64bfe3d0e418f Mon Sep 17 00:00:00 2001 From: Bojan Smojver Date: Thu, 9 Sep 2010 23:06:23 +0200 Subject: PM / Hibernate: Compress hibernation image with LZO Compress hibernation image with LZO in order to save on I/O and therefore time to hibernate/thaw. [rjw: Added hibernate=nocompress command line option instead of just nocompress which would be confusing, fixed a couple of compiler warnings, fixed kerneldoc comments, minor cleanups.] Signed-off-by: Bojan Smojver Signed-off-by: Rafael J. Wysocki --- Documentation/kernel-parameters.txt | 5 + Documentation/power/swsusp.txt | 3 +- kernel/power/Kconfig | 2 + kernel/power/hibernate.c | 13 ++ kernel/power/power.h | 1 + kernel/power/swap.c | 290 +++++++++++++++++++++++++++++++++++- 6 files changed, 306 insertions(+), 8 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 8dd7248508a9..2c98b18864c5 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2165,6 +2165,11 @@ and is between 256 and 4096 characters. It is defined in the file in units (needed only for swap files). See Documentation/power/swsusp-and-swap-files.txt + hibernate= [HIBERNATION] + noresume Don't check if there's a hibernation image + present during boot. + nocompress Don't compress/decompress hibernation images. + retain_initrd [RAM] Keep initrd memory after extraction rhash_entries= [KNL,NET] diff --git a/Documentation/power/swsusp.txt b/Documentation/power/swsusp.txt index 9d60ab717a7b..ea718891a665 100644 --- a/Documentation/power/swsusp.txt +++ b/Documentation/power/swsusp.txt @@ -66,7 +66,8 @@ swsusp saves the state of the machine into active swaps and then reboots or powerdowns. You must explicitly specify the swap partition to resume from with ``resume='' kernel option. If signature is found it loads and restores saved state. If the option ``noresume'' is specified as a boot parameter, it skips -the resuming. +the resuming. If the option ``hibernate=nocompress'' is specified as a boot +parameter, it saves hibernation image without compression. In the meantime while the system is suspended you should not add/remove any of the hardware, write to the filesystems, etc. diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index ca6066a6952e..cb57eb99215f 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -137,6 +137,8 @@ config SUSPEND_FREEZER config HIBERNATION bool "Hibernation (aka 'suspend to disk')" depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE + select LZO_COMPRESS + select LZO_DECOMPRESS select SUSPEND_NVS if HAS_IOMEM ---help--- Enable the suspend to disk (STD) functionality, which is usually diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 8dc31e02ae12..6c9c9dc48c75 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -29,6 +29,7 @@ #include "power.h" +static int nocompress = 0; static int noresume = 0; static char resume_file[256] = CONFIG_PM_STD_PARTITION; dev_t swsusp_resume_device; @@ -638,6 +639,8 @@ int hibernate(void) if (hibernation_mode == HIBERNATION_PLATFORM) flags |= SF_PLATFORM_MODE; + if (nocompress) + flags |= SF_NOCOMPRESS_MODE; pr_debug("PM: writing image.\n"); error = swsusp_write(flags); swsusp_free(); @@ -1004,6 +1007,15 @@ static int __init resume_offset_setup(char *str) return 1; } +static int __init hibernate_setup(char *str) +{ + if (!strncmp(str, "noresume", 8)) + noresume = 1; + else if (!strncmp(str, "nocompress", 10)) + nocompress = 1; + return 1; +} + static int __init noresume_setup(char *str) { noresume = 1; @@ -1013,3 +1025,4 @@ static int __init noresume_setup(char *str) __setup("noresume", noresume_setup); __setup("resume_offset=", resume_offset_setup); __setup("resume=", resume_setup); +__setup("hibernate=", hibernate_setup); diff --git a/kernel/power/power.h b/kernel/power/power.h index 006270fe382d..c7e42e47eb0b 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -134,6 +134,7 @@ extern int swsusp_swap_in_use(void); * the image header. */ #define SF_PLATFORM_MODE 1 +#define SF_NOCOMPRESS_MODE 2 /* kernel/power/hibernate.c */ extern int swsusp_check(void); diff --git a/kernel/power/swap.c b/kernel/power/swap.c index e6a5bdf61a37..3dc0552cddfb 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -24,6 +24,8 @@ #include #include #include +#include +#include #include "power.h" @@ -357,6 +359,18 @@ static int swap_writer_finish(struct swap_map_handle *handle, return error; } +/* We need to remember how much compressed data we need to read. */ +#define LZO_HEADER sizeof(size_t) + +/* Number of pages/bytes we'll compress at one time. */ +#define LZO_UNC_PAGES 32 +#define LZO_UNC_SIZE (LZO_UNC_PAGES * PAGE_SIZE) + +/* Number of pages/bytes we need for compressed data (worst case). */ +#define LZO_CMP_PAGES DIV_ROUND_UP(lzo1x_worst_compress(LZO_UNC_SIZE) + \ + LZO_HEADER, PAGE_SIZE) +#define LZO_CMP_SIZE (LZO_CMP_PAGES * PAGE_SIZE) + /** * save_image - save the suspend image data */ @@ -404,6 +418,137 @@ static int save_image(struct swap_map_handle *handle, return ret; } + +/** + * save_image_lzo - Save the suspend image data compressed with LZO. + * @handle: Swap mam handle to use for saving the image. + * @snapshot: Image to read data from. + * @nr_to_write: Number of pages to save. + */ +static int save_image_lzo(struct swap_map_handle *handle, + struct snapshot_handle *snapshot, + unsigned int nr_to_write) +{ + unsigned int m; + int ret = 0; + int nr_pages; + int err2; + struct bio *bio; + struct timeval start; + struct timeval stop; + size_t off, unc_len, cmp_len; + unsigned char *unc, *cmp, *wrk, *page; + + page = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); + if (!page) { + printk(KERN_ERR "PM: Failed to allocate LZO page\n"); + return -ENOMEM; + } + + wrk = vmalloc(LZO1X_1_MEM_COMPRESS); + if (!wrk) { + printk(KERN_ERR "PM: Failed to allocate LZO workspace\n"); + free_page((unsigned long)page); + return -ENOMEM; + } + + unc = vmalloc(LZO_UNC_SIZE); + if (!unc) { + printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n"); + vfree(wrk); + free_page((unsigned long)page); + return -ENOMEM; + } + + cmp = vmalloc(LZO_CMP_SIZE); + if (!cmp) { + printk(KERN_ERR "PM: Failed to allocate LZO compressed\n"); + vfree(unc); + vfree(wrk); + free_page((unsigned long)page); + return -ENOMEM; + } + + printk(KERN_INFO + "PM: Compressing and saving image data (%u pages) ... ", + nr_to_write); + m = nr_to_write / 100; + if (!m) + m = 1; + nr_pages = 0; + bio = NULL; + do_gettimeofday(&start); + for (;;) { + for (off = 0; off < LZO_UNC_SIZE; off += PAGE_SIZE) { + ret = snapshot_read_next(snapshot); + if (ret < 0) + goto out_finish; + + if (!ret) + break; + + memcpy(unc + off, data_of(*snapshot), PAGE_SIZE); + + if (!(nr_pages % m)) + printk(KERN_CONT "\b\b\b\b%3d%%", nr_pages / m); + nr_pages++; + } + + if (!off) + break; + + unc_len = off; + ret = lzo1x_1_compress(unc, unc_len, + cmp + LZO_HEADER, &cmp_len, wrk); + if (ret < 0) { + printk(KERN_ERR "PM: LZO compression failed\n"); + break; + } + + if (unlikely(!cmp_len || + cmp_len > lzo1x_worst_compress(unc_len))) { + printk(KERN_ERR "PM: Invalid LZO compressed length\n"); + ret = -1; + break; + } + + *(size_t *)cmp = cmp_len; + + /* + * Given we are writing one page at a time to disk, we copy + * that much from the buffer, although the last bit will likely + * be smaller than full page. This is OK - we saved the length + * of the compressed data, so any garbage at the end will be + * discarded when we read it. + */ + for (off = 0; off < LZO_HEADER + cmp_len; off += PAGE_SIZE) { + memcpy(page, cmp + off, PAGE_SIZE); + + ret = swap_write_page(handle, page, &bio); + if (ret) + goto out_finish; + } + } + +out_finish: + err2 = hib_wait_on_bio_chain(&bio); + do_gettimeofday(&stop); + if (!ret) + ret = err2; + if (!ret) + printk(KERN_CONT "\b\b\b\bdone\n"); + else + printk(KERN_CONT "\n"); + swsusp_show_speed(&start, &stop, nr_to_write, "Wrote"); + + vfree(cmp); + vfree(unc); + vfree(wrk); + free_page((unsigned long)page); + + return ret; +} + /** * enough_swap - Make sure we have enough swap to save the image. * @@ -411,12 +556,16 @@ static int save_image(struct swap_map_handle *handle, * space avaiable from the resume partition. */ -static int enough_swap(unsigned int nr_pages) +static int enough_swap(unsigned int nr_pages, unsigned int flags) { unsigned int free_swap = count_swap_pages(root_swap, 1); + unsigned int required; pr_debug("PM: Free swap pages: %u\n", free_swap); - return free_swap > nr_pages + PAGES_FOR_IO; + + required = PAGES_FOR_IO + ((flags & SF_NOCOMPRESS_MODE) ? + nr_pages : (nr_pages * LZO_CMP_PAGES) / LZO_UNC_PAGES + 1); + return free_swap > required; } /** @@ -443,7 +592,7 @@ int swsusp_write(unsigned int flags) printk(KERN_ERR "PM: Cannot get swap writer\n"); return error; } - if (!enough_swap(pages)) { + if (!enough_swap(pages, flags)) { printk(KERN_ERR "PM: Not enough free swap\n"); error = -ENOSPC; goto out_finish; @@ -458,8 +607,11 @@ int swsusp_write(unsigned int flags) } header = (struct swsusp_info *)data_of(snapshot); error = swap_write_page(&handle, header, NULL); - if (!error) - error = save_image(&handle, &snapshot, pages - 1); + if (!error) { + error = (flags & SF_NOCOMPRESS_MODE) ? + save_image(&handle, &snapshot, pages - 1) : + save_image_lzo(&handle, &snapshot, pages - 1); + } out_finish: error = swap_writer_finish(&handle, flags, error); return error; @@ -589,6 +741,127 @@ static int load_image(struct swap_map_handle *handle, return error; } +/** + * load_image_lzo - Load compressed image data and decompress them with LZO. + * @handle: Swap map handle to use for loading data. + * @snapshot: Image to copy uncompressed data into. + * @nr_to_read: Number of pages to load. + */ +static int load_image_lzo(struct swap_map_handle *handle, + struct snapshot_handle *snapshot, + unsigned int nr_to_read) +{ + unsigned int m; + int error = 0; + struct timeval start; + struct timeval stop; + unsigned nr_pages; + size_t off, unc_len, cmp_len; + unsigned char *unc, *cmp, *page; + + page = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); + if (!page) { + printk(KERN_ERR "PM: Failed to allocate LZO page\n"); + return -ENOMEM; + } + + unc = vmalloc(LZO_UNC_SIZE); + if (!unc) { + printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n"); + free_page((unsigned long)page); + return -ENOMEM; + } + + cmp = vmalloc(LZO_CMP_SIZE); + if (!cmp) { + printk(KERN_ERR "PM: Failed to allocate LZO compressed\n"); + vfree(unc); + free_page((unsigned long)page); + return -ENOMEM; + } + + printk(KERN_INFO + "PM: Loading and decompressing image data (%u pages) ... ", + nr_to_read); + m = nr_to_read / 100; + if (!m) + m = 1; + nr_pages = 0; + do_gettimeofday(&start); + + error = snapshot_write_next(snapshot); + if (error <= 0) + goto out_finish; + + for (;;) { + error = swap_read_page(handle, page, NULL); /* sync */ + if (error) + break; + + cmp_len = *(size_t *)page; + if (unlikely(!cmp_len || + cmp_len > lzo1x_worst_compress(LZO_UNC_SIZE))) { + printk(KERN_ERR "PM: Invalid LZO compressed length\n"); + error = -1; + break; + } + + memcpy(cmp, page, PAGE_SIZE); + for (off = PAGE_SIZE; off < LZO_HEADER + cmp_len; off += PAGE_SIZE) { + error = swap_read_page(handle, page, NULL); /* sync */ + if (error) + goto out_finish; + + memcpy(cmp + off, page, PAGE_SIZE); + } + + unc_len = LZO_UNC_SIZE; + error = lzo1x_decompress_safe(cmp + LZO_HEADER, cmp_len, + unc, &unc_len); + if (error < 0) { + printk(KERN_ERR "PM: LZO decompression failed\n"); + break; + } + + if (unlikely(!unc_len || + unc_len > LZO_UNC_SIZE || + unc_len & (PAGE_SIZE - 1))) { + printk(KERN_ERR "PM: Invalid LZO uncompressed length\n"); + error = -1; + break; + } + + for (off = 0; off < unc_len; off += PAGE_SIZE) { + memcpy(data_of(*snapshot), unc + off, PAGE_SIZE); + + if (!(nr_pages % m)) + printk("\b\b\b\b%3d%%", nr_pages / m); + nr_pages++; + + error = snapshot_write_next(snapshot); + if (error <= 0) + goto out_finish; + } + } + +out_finish: + do_gettimeofday(&stop); + if (!error) { + printk("\b\b\b\bdone\n"); + snapshot_write_finalize(snapshot); + if (!snapshot_image_loaded(snapshot)) + error = -ENODATA; + } else + printk("\n"); + swsusp_show_speed(&start, &stop, nr_to_read, "Read"); + + vfree(cmp); + vfree(unc); + free_page((unsigned long)page); + + return error; +} + /** * swsusp_read - read the hibernation image. * @flags_p: flags passed by the "frozen" kernel in the image header should @@ -612,8 +885,11 @@ int swsusp_read(unsigned int *flags_p) goto end; if (!error) error = swap_read_page(&handle, header, NULL); - if (!error) - error = load_image(&handle, &snapshot, header->pages - 1); + if (!error) { + error = (*flags_p & SF_NOCOMPRESS_MODE) ? + load_image(&handle, &snapshot, header->pages - 1) : + load_image_lzo(&handle, &snapshot, header->pages - 1); + } swap_reader_finish(&handle); end: if (!error) -- cgit v1.2.3 From ede890c2c069d611ece0e184103a6b9236ce416a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 12 Sep 2010 21:40:01 +0200 Subject: PM: Fix unmet dependency warning from kconfig Fix the following build warning: warning: (PM_SLEEP_SMP && SMP && (ARCH_SUSPEND_POSSIBLE || \ ARCH_HIBERNATION_POSSIBLE) && PM_SLEEP) selects HOTPLUG_CPU which \ has unmet direct dependencies (SMP && HOTPLUG) by selecting HOTPLUG along with CPU_HOTPLUG. Signed-off-by: Rafael J. Wysocki Acked-by: Randy Dunlap --- kernel/power/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index cb57eb99215f..e45894c696ee 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -86,6 +86,7 @@ config PM_SLEEP_SMP depends on SMP depends on ARCH_SUSPEND_POSSIBLE || ARCH_HIBERNATION_POSSIBLE depends on PM_SLEEP + select HOTPLUG select HOTPLUG_CPU default y -- cgit v1.2.3 From bcb5ba8b4e8a5ae14b27351bdf499dd4c3bcc944 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 20 Sep 2010 19:44:17 +0200 Subject: PM / Runtime: Use alloc_workqueue() for creating the PM workqueue Although we need the PM workqueue to be freezable, we don't need it to be singlethread. Also, the number of concurrent work items running on a single CPU need not be constrained. For these reasons use alloc_workqueue() directly, with suitable arguments, instead of create_freezeable_workqueue(), to create the runtime PM workqueue. Signed-off-by: Rafael J. Wysocki Acked-by: Tejun Heo --- kernel/power/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/power/main.c b/kernel/power/main.c index 62b0bc6e4983..0a28d4db3597 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -308,7 +308,7 @@ EXPORT_SYMBOL_GPL(pm_wq); static int __init pm_start_workqueue(void) { - pm_wq = create_freezeable_workqueue("pm"); + pm_wq = alloc_workqueue("pm", WQ_FREEZEABLE, 0); return pm_wq ? 0 : -ENOMEM; } -- cgit v1.2.3 From 266f1a25eff5ff98c498d7754a419aacfd88f71c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 20 Sep 2010 19:44:38 +0200 Subject: PM / Hibernate: Improve comments in hibernate_preallocate_memory() One comment in hibernate_preallocate_memory() is wrong, so fix it and add one more comment to clarify the meaning of the fixed one. Signed-off-by: Rafael J. Wysocki --- kernel/power/snapshot.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index d3f795f01bbc..d9191b40cf6e 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -1318,12 +1318,14 @@ int hibernate_preallocate_memory(void) /* Compute the maximum number of saveable pages to leave in memory. */ max_size = (count - (size + PAGES_FOR_IO)) / 2 - 2 * SPARE_PAGES; + /* Compute the desired number of image pages specified by image_size. */ size = DIV_ROUND_UP(image_size, PAGE_SIZE); if (size > max_size) size = max_size; /* - * If the maximum is not less than the current number of saveable pages - * in memory, allocate page frames for the image and we're done. + * If the desired number of image pages is at least as large as the + * current number of saveable pages in memory, allocate page frames for + * the image and we're done. */ if (size >= saveable) { pages = preallocate_image_highmem(save_highmem); -- cgit v1.2.3 From ac5c24ec1e983313ef0015258fba6f630e54e7cf Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 20 Sep 2010 19:44:56 +0200 Subject: PM / Hibernate: Make default image size depend on total RAM size The default hibernation image size is currently hard coded and euqal to 500 MB, which is not a reasonable default on many contemporary systems. Make it equal 2/5 of the total RAM size (this is slightly below the maximum, i.e. 1/2 of the total RAM size, and seems to be generally suitable). Signed-off-by: Rafael J. Wysocki Tested-by: M. Vefa Bicakci --- Documentation/power/interface.txt | 2 +- kernel/power/main.c | 1 + kernel/power/power.h | 9 ++++++++- kernel/power/snapshot.c | 7 ++++++- 4 files changed, 16 insertions(+), 3 deletions(-) diff --git a/Documentation/power/interface.txt b/Documentation/power/interface.txt index e67211fe0ee2..c537834af005 100644 --- a/Documentation/power/interface.txt +++ b/Documentation/power/interface.txt @@ -57,7 +57,7 @@ smallest image possible. In particular, if "0" is written to this file, the suspend image will be as small as possible. Reading from this file will display the current image size limit, which -is set to 500 MB by default. +is set to 2/5 of available RAM by default. /sys/power/pm_trace controls the code which saves the last PM event point in the RTC across reboots, so that you can debug a machine that just hangs diff --git a/kernel/power/main.c b/kernel/power/main.c index 0a28d4db3597..f06ad6eff37a 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -321,6 +321,7 @@ static int __init pm_init(void) int error = pm_start_workqueue(); if (error) return error; + hibernate_image_size_init(); power_kobj = kobject_create_and_add("power", NULL); if (!power_kobj) return -ENOMEM; diff --git a/kernel/power/power.h b/kernel/power/power.h index c7e42e47eb0b..03634be55f62 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -14,6 +14,9 @@ struct swsusp_info { } __attribute__((aligned(PAGE_SIZE))); #ifdef CONFIG_HIBERNATION +/* kernel/power/snapshot.c */ +extern void __init hibernate_image_size_init(void); + #ifdef CONFIG_ARCH_HIBERNATION_HEADER /* Maximum size of architecture specific data in a hibernation header */ #define MAX_ARCH_HEADER_SIZE (sizeof(struct new_utsname) + 4) @@ -49,7 +52,11 @@ static inline char *check_image_kernel(struct swsusp_info *info) extern int hibernation_snapshot(int platform_mode); extern int hibernation_restore(int platform_mode); extern int hibernation_platform_enter(void); -#endif + +#else /* !CONFIG_HIBERNATION */ + +static inline void hibernate_image_size_init(void) {} +#endif /* !CONFIG_HIBERNATION */ extern int pfn_is_nosave(unsigned long); diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index d9191b40cf6e..ac7eb109f196 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -46,7 +46,12 @@ static void swsusp_unset_page_forbidden(struct page *); * size will not exceed N bytes, but if that is impossible, it will * try to create the smallest image possible. */ -unsigned long image_size = 500 * 1024 * 1024; +unsigned long image_size; + +void __init hibernate_image_size_init(void) +{ + image_size = ((totalram_pages * 2) / 5) * PAGE_SIZE; +} /* List of PBEs needed for restoring the pages that were allocated before * the suspend and included in the suspend image, but have also been -- cgit v1.2.3 From 0702d9ee0f1dcb6258789032f03b3208bfafaffc Mon Sep 17 00:00:00 2001 From: Kevin Cernekee Date: Mon, 20 Sep 2010 22:32:10 +0200 Subject: PM: Fix signed/unsigned warning in dpm_show_time() Seen on MIPS32, gcc 4.4.3, 2.6.36-rc4: drivers/base/power/main.c: In function 'dpm_show_time': drivers/base/power/main.c:415: warning: comparison of distinct pointer types lacks a cast do_div() takes unsigned parameters: uint32_t do_div(uint64_t *n, uint32_t base); Using an unsigned variable for usecs64 should not cause any problems, because calltime >= starttime . Signed-off-by: Kevin Cernekee Signed-off-by: Rafael J. Wysocki --- drivers/base/power/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 276d5a701dc3..db677199403c 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -407,7 +407,7 @@ static void pm_dev_err(struct device *dev, pm_message_t state, char *info, static void dpm_show_time(ktime_t starttime, pm_message_t state, char *info) { ktime_t calltime; - s64 usecs64; + u64 usecs64; int usecs; calltime = ktime_get(); -- cgit v1.2.3 From 074037ec79bea73edf1b1ec72fef1010e83e3cc5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 22 Sep 2010 22:09:10 +0200 Subject: PM / Wakeup: Introduce wakeup source objects and event statistics (v3) Introduce struct wakeup_source for representing system wakeup sources within the kernel and for collecting statistics related to them. Make the recently introduced helper functions pm_wakeup_event(), pm_stay_awake() and pm_relax() use struct wakeup_source objects internally, so that wakeup statistics associated with wakeup devices can be collected and reported in a consistent way (the definition of pm_relax() is changed, which is harmless, because this function is not called directly by anyone yet). Introduce new wakeup-related sysfs device attributes in /sys/devices/.../power for reporting the device wakeup statistics. Change the global wakeup events counters event_count and events_in_progress into atomic variables, so that it is not necessary to acquire a global spinlock in pm_wakeup_event(), pm_stay_awake() and pm_relax(), which should allow us to avoid lock contention in these functions on SMP systems with many wakeup devices. Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-devices-power | 70 ++++ drivers/base/power/main.c | 4 +- drivers/base/power/power.h | 1 + drivers/base/power/runtime.c | 2 - drivers/base/power/sysfs.c | 121 +++++- drivers/base/power/wakeup.c | 528 ++++++++++++++++++++++---- include/linux/pm.h | 16 +- include/linux/pm_wakeup.h | 127 +++++-- include/linux/suspend.h | 4 +- kernel/power/main.c | 8 +- 10 files changed, 756 insertions(+), 125 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-devices-power b/Documentation/ABI/testing/sysfs-devices-power index 6123c523bfd7..6bb2dd3c3a71 100644 --- a/Documentation/ABI/testing/sysfs-devices-power +++ b/Documentation/ABI/testing/sysfs-devices-power @@ -77,3 +77,73 @@ Description: devices this attribute is set to "enabled" by bus type code or device drivers and in that cases it should be safe to leave the default value. + +What: /sys/devices/.../power/wakeup_count +Date: September 2010 +Contact: Rafael J. Wysocki +Description: + The /sys/devices/.../wakeup_count attribute contains the number + of signaled wakeup events associated with the device. This + attribute is read-only. If the device is not enabled to wake up + the system from sleep states, this attribute is empty. + +What: /sys/devices/.../power/wakeup_active_count +Date: September 2010 +Contact: Rafael J. Wysocki +Description: + The /sys/devices/.../wakeup_active_count attribute contains the + number of times the processing of wakeup events associated with + the device was completed (at the kernel level). This attribute + is read-only. If the device is not enabled to wake up the + system from sleep states, this attribute is empty. + +What: /sys/devices/.../power/wakeup_hit_count +Date: September 2010 +Contact: Rafael J. Wysocki +Description: + The /sys/devices/.../wakeup_hit_count attribute contains the + number of times the processing of a wakeup event associated with + the device might prevent the system from entering a sleep state. + This attribute is read-only. If the device is not enabled to + wake up the system from sleep states, this attribute is empty. + +What: /sys/devices/.../power/wakeup_active +Date: September 2010 +Contact: Rafael J. Wysocki +Description: + The /sys/devices/.../wakeup_active attribute contains either 1, + or 0, depending on whether or not a wakeup event associated with + the device is being processed (1). This attribute is read-only. + If the device is not enabled to wake up the system from sleep + states, this attribute is empty. + +What: /sys/devices/.../power/wakeup_total_time_ms +Date: September 2010 +Contact: Rafael J. Wysocki +Description: + The /sys/devices/.../wakeup_total_time_ms attribute contains + the total time of processing wakeup events associated with the + device, in milliseconds. This attribute is read-only. If the + device is not enabled to wake up the system from sleep states, + this attribute is empty. + +What: /sys/devices/.../power/wakeup_max_time_ms +Date: September 2010 +Contact: Rafael J. Wysocki +Description: + The /sys/devices/.../wakeup_max_time_ms attribute contains + the maximum time of processing a single wakeup event associated + with the device, in milliseconds. This attribute is read-only. + If the device is not enabled to wake up the system from sleep + states, this attribute is empty. + +What: /sys/devices/.../power/wakeup_last_time_ms +Date: September 2010 +Contact: Rafael J. Wysocki +Description: + The /sys/devices/.../wakeup_last_time_ms attribute contains + the value of the monotonic clock corresponding to the time of + signaling the last wakeup event associated with the device, in + milliseconds. This attribute is read-only. If the device is + not enabled to wake up the system from sleep states, this + attribute is empty. diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index db677199403c..7ae6fe414c38 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -60,7 +60,8 @@ void device_pm_init(struct device *dev) dev->power.status = DPM_ON; init_completion(&dev->power.completion); complete_all(&dev->power.completion); - dev->power.wakeup_count = 0; + dev->power.wakeup = NULL; + spin_lock_init(&dev->power.lock); pm_runtime_init(dev); } @@ -120,6 +121,7 @@ void device_pm_remove(struct device *dev) mutex_lock(&dpm_list_mtx); list_del_init(&dev->power.entry); mutex_unlock(&dpm_list_mtx); + device_wakeup_disable(dev); pm_runtime_remove(dev); } diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h index c0bd03c83b9c..8b2745c69e2a 100644 --- a/drivers/base/power/power.h +++ b/drivers/base/power/power.h @@ -34,6 +34,7 @@ extern void device_pm_move_last(struct device *); static inline void device_pm_init(struct device *dev) { + spin_lock_init(&dev->power.lock); pm_runtime_init(dev); } diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index b78c401ffa73..e9520ad2d716 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -1099,8 +1099,6 @@ EXPORT_SYMBOL_GPL(pm_runtime_allow); */ void pm_runtime_init(struct device *dev) { - spin_lock_init(&dev->power.lock); - dev->power.runtime_status = RPM_SUSPENDED; dev->power.idle_notification = false; diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index e56b4388fe61..8859780817e1 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -210,11 +210,122 @@ static DEVICE_ATTR(wakeup, 0644, wake_show, wake_store); static ssize_t wakeup_count_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%lu\n", dev->power.wakeup_count); + unsigned long count = 0; + bool enabled = false; + + spin_lock_irq(&dev->power.lock); + if (dev->power.wakeup) { + count = dev->power.wakeup->event_count; + enabled = true; + } + spin_unlock_irq(&dev->power.lock); + return enabled ? sprintf(buf, "%lu\n", count) : sprintf(buf, "\n"); } static DEVICE_ATTR(wakeup_count, 0444, wakeup_count_show, NULL); -#endif + +static ssize_t wakeup_active_count_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + unsigned long count = 0; + bool enabled = false; + + spin_lock_irq(&dev->power.lock); + if (dev->power.wakeup) { + count = dev->power.wakeup->active_count; + enabled = true; + } + spin_unlock_irq(&dev->power.lock); + return enabled ? sprintf(buf, "%lu\n", count) : sprintf(buf, "\n"); +} + +static DEVICE_ATTR(wakeup_active_count, 0444, wakeup_active_count_show, NULL); + +static ssize_t wakeup_hit_count_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + unsigned long count = 0; + bool enabled = false; + + spin_lock_irq(&dev->power.lock); + if (dev->power.wakeup) { + count = dev->power.wakeup->hit_count; + enabled = true; + } + spin_unlock_irq(&dev->power.lock); + return enabled ? sprintf(buf, "%lu\n", count) : sprintf(buf, "\n"); +} + +static DEVICE_ATTR(wakeup_hit_count, 0444, wakeup_hit_count_show, NULL); + +static ssize_t wakeup_active_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + unsigned int active = 0; + bool enabled = false; + + spin_lock_irq(&dev->power.lock); + if (dev->power.wakeup) { + active = dev->power.wakeup->active; + enabled = true; + } + spin_unlock_irq(&dev->power.lock); + return enabled ? sprintf(buf, "%u\n", active) : sprintf(buf, "\n"); +} + +static DEVICE_ATTR(wakeup_active, 0444, wakeup_active_show, NULL); + +static ssize_t wakeup_total_time_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + s64 msec = 0; + bool enabled = false; + + spin_lock_irq(&dev->power.lock); + if (dev->power.wakeup) { + msec = ktime_to_ms(dev->power.wakeup->total_time); + enabled = true; + } + spin_unlock_irq(&dev->power.lock); + return enabled ? sprintf(buf, "%lld\n", msec) : sprintf(buf, "\n"); +} + +static DEVICE_ATTR(wakeup_total_time_ms, 0444, wakeup_total_time_show, NULL); + +static ssize_t wakeup_max_time_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + s64 msec = 0; + bool enabled = false; + + spin_lock_irq(&dev->power.lock); + if (dev->power.wakeup) { + msec = ktime_to_ms(dev->power.wakeup->max_time); + enabled = true; + } + spin_unlock_irq(&dev->power.lock); + return enabled ? sprintf(buf, "%lld\n", msec) : sprintf(buf, "\n"); +} + +static DEVICE_ATTR(wakeup_max_time_ms, 0444, wakeup_max_time_show, NULL); + +static ssize_t wakeup_last_time_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + s64 msec = 0; + bool enabled = false; + + spin_lock_irq(&dev->power.lock); + if (dev->power.wakeup) { + msec = ktime_to_ms(dev->power.wakeup->last_time); + enabled = true; + } + spin_unlock_irq(&dev->power.lock); + return enabled ? sprintf(buf, "%lld\n", msec) : sprintf(buf, "\n"); +} + +static DEVICE_ATTR(wakeup_last_time_ms, 0444, wakeup_last_time_show, NULL); +#endif /* CONFIG_PM_SLEEP */ #ifdef CONFIG_PM_ADVANCED_DEBUG #ifdef CONFIG_PM_RUNTIME @@ -288,6 +399,12 @@ static struct attribute * power_attrs[] = { &dev_attr_wakeup.attr, #ifdef CONFIG_PM_SLEEP &dev_attr_wakeup_count.attr, + &dev_attr_wakeup_active_count.attr, + &dev_attr_wakeup_hit_count.attr, + &dev_attr_wakeup_active.attr, + &dev_attr_wakeup_total_time_ms.attr, + &dev_attr_wakeup_max_time_ms.attr, + &dev_attr_wakeup_last_time_ms.attr, #endif #ifdef CONFIG_PM_ADVANCED_DEBUG &dev_attr_async.attr, diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index eb594facfc3f..03751a01dbad 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -11,7 +11,10 @@ #include #include #include -#include + +#include "power.h" + +#define TIMEOUT 100 /* * If set, the suspend/hibernate code will abort transitions to a sleep state @@ -20,18 +23,244 @@ bool events_check_enabled; /* The counter of registered wakeup events. */ -static unsigned long event_count; +static atomic_t event_count = ATOMIC_INIT(0); /* A preserved old value of event_count. */ -static unsigned long saved_event_count; +static unsigned int saved_count; /* The counter of wakeup events being processed. */ -static unsigned long events_in_progress; +static atomic_t events_in_progress = ATOMIC_INIT(0); static DEFINE_SPINLOCK(events_lock); static void pm_wakeup_timer_fn(unsigned long data); -static DEFINE_TIMER(events_timer, pm_wakeup_timer_fn, 0, 0); -static unsigned long events_timer_expires; +static LIST_HEAD(wakeup_sources); + +/** + * wakeup_source_create - Create a struct wakeup_source object. + * @name: Name of the new wakeup source. + */ +struct wakeup_source *wakeup_source_create(const char *name) +{ + struct wakeup_source *ws; + + ws = kzalloc(sizeof(*ws), GFP_KERNEL); + if (!ws) + return NULL; + + spin_lock_init(&ws->lock); + if (name) + ws->name = kstrdup(name, GFP_KERNEL); + + return ws; +} +EXPORT_SYMBOL_GPL(wakeup_source_create); + +/** + * wakeup_source_destroy - Destroy a struct wakeup_source object. + * @ws: Wakeup source to destroy. + */ +void wakeup_source_destroy(struct wakeup_source *ws) +{ + if (!ws) + return; + + spin_lock_irq(&ws->lock); + while (ws->active) { + spin_unlock_irq(&ws->lock); + + schedule_timeout_interruptible(msecs_to_jiffies(TIMEOUT)); + + spin_lock_irq(&ws->lock); + } + spin_unlock_irq(&ws->lock); + + kfree(ws->name); + kfree(ws); +} +EXPORT_SYMBOL_GPL(wakeup_source_destroy); + +/** + * wakeup_source_add - Add given object to the list of wakeup sources. + * @ws: Wakeup source object to add to the list. + */ +void wakeup_source_add(struct wakeup_source *ws) +{ + if (WARN_ON(!ws)) + return; + + setup_timer(&ws->timer, pm_wakeup_timer_fn, (unsigned long)ws); + ws->active = false; + + spin_lock_irq(&events_lock); + list_add_rcu(&ws->entry, &wakeup_sources); + spin_unlock_irq(&events_lock); + synchronize_rcu(); +} +EXPORT_SYMBOL_GPL(wakeup_source_add); + +/** + * wakeup_source_remove - Remove given object from the wakeup sources list. + * @ws: Wakeup source object to remove from the list. + */ +void wakeup_source_remove(struct wakeup_source *ws) +{ + if (WARN_ON(!ws)) + return; + + spin_lock_irq(&events_lock); + list_del_rcu(&ws->entry); + spin_unlock_irq(&events_lock); + synchronize_rcu(); +} +EXPORT_SYMBOL_GPL(wakeup_source_remove); + +/** + * wakeup_source_register - Create wakeup source and add it to the list. + * @name: Name of the wakeup source to register. + */ +struct wakeup_source *wakeup_source_register(const char *name) +{ + struct wakeup_source *ws; + + ws = wakeup_source_create(name); + if (ws) + wakeup_source_add(ws); + + return ws; +} +EXPORT_SYMBOL_GPL(wakeup_source_register); + +/** + * wakeup_source_unregister - Remove wakeup source from the list and remove it. + * @ws: Wakeup source object to unregister. + */ +void wakeup_source_unregister(struct wakeup_source *ws) +{ + wakeup_source_remove(ws); + wakeup_source_destroy(ws); +} +EXPORT_SYMBOL_GPL(wakeup_source_unregister); + +/** + * device_wakeup_attach - Attach a wakeup source object to a device object. + * @dev: Device to handle. + * @ws: Wakeup source object to attach to @dev. + * + * This causes @dev to be treated as a wakeup device. + */ +static int device_wakeup_attach(struct device *dev, struct wakeup_source *ws) +{ + spin_lock_irq(&dev->power.lock); + if (dev->power.wakeup) { + spin_unlock_irq(&dev->power.lock); + return -EEXIST; + } + dev->power.wakeup = ws; + spin_unlock_irq(&dev->power.lock); + return 0; +} + +/** + * device_wakeup_enable - Enable given device to be a wakeup source. + * @dev: Device to handle. + * + * Create a wakeup source object, register it and attach it to @dev. + */ +int device_wakeup_enable(struct device *dev) +{ + struct wakeup_source *ws; + int ret; + + if (!dev || !dev->power.can_wakeup) + return -EINVAL; + + ws = wakeup_source_register(dev_name(dev)); + if (!ws) + return -ENOMEM; + + ret = device_wakeup_attach(dev, ws); + if (ret) + wakeup_source_unregister(ws); + + return ret; +} +EXPORT_SYMBOL_GPL(device_wakeup_enable); + +/** + * device_wakeup_detach - Detach a device's wakeup source object from it. + * @dev: Device to detach the wakeup source object from. + * + * After it returns, @dev will not be treated as a wakeup device any more. + */ +static struct wakeup_source *device_wakeup_detach(struct device *dev) +{ + struct wakeup_source *ws; + + spin_lock_irq(&dev->power.lock); + ws = dev->power.wakeup; + dev->power.wakeup = NULL; + spin_unlock_irq(&dev->power.lock); + return ws; +} + +/** + * device_wakeup_disable - Do not regard a device as a wakeup source any more. + * @dev: Device to handle. + * + * Detach the @dev's wakeup source object from it, unregister this wakeup source + * object and destroy it. + */ +int device_wakeup_disable(struct device *dev) +{ + struct wakeup_source *ws; + + if (!dev || !dev->power.can_wakeup) + return -EINVAL; + + ws = device_wakeup_detach(dev); + if (ws) + wakeup_source_unregister(ws); + + return 0; +} +EXPORT_SYMBOL_GPL(device_wakeup_disable); + +/** + * device_init_wakeup - Device wakeup initialization. + * @dev: Device to handle. + * @enable: Whether or not to enable @dev as a wakeup device. + * + * By default, most devices should leave wakeup disabled. The exceptions are + * devices that everyone expects to be wakeup sources: keyboards, power buttons, + * possibly network interfaces, etc. + */ +int device_init_wakeup(struct device *dev, bool enable) +{ + int ret = 0; + + if (enable) { + device_set_wakeup_capable(dev, true); + ret = device_wakeup_enable(dev); + } else { + device_set_wakeup_capable(dev, false); + } + + return ret; +} +EXPORT_SYMBOL_GPL(device_init_wakeup); + +/** + * device_set_wakeup_enable - Enable or disable a device to wake up the system. + * @dev: Device to handle. + */ +int device_set_wakeup_enable(struct device *dev, bool enable) +{ + if (!dev || !dev->power.can_wakeup) + return -EINVAL; + + return enable ? device_wakeup_enable(dev) : device_wakeup_disable(dev); +} +EXPORT_SYMBOL_GPL(device_set_wakeup_enable); /* * The functions below use the observation that each wakeup event starts a @@ -55,118 +284,259 @@ static unsigned long events_timer_expires; * knowledge, however, may not be available to it, so it can simply specify time * to wait before the system can be suspended and pass it as the second * argument of pm_wakeup_event(). + * + * It is valid to call pm_relax() after pm_wakeup_event(), in which case the + * "no suspend" period will be ended either by the pm_relax(), or by the timer + * function executed when the timer expires, whichever comes first. */ +/** + * wakup_source_activate - Mark given wakeup source as active. + * @ws: Wakeup source to handle. + * + * Update the @ws' statistics and, if @ws has just been activated, notify the PM + * core of the event by incrementing the counter of of wakeup events being + * processed. + */ +static void wakeup_source_activate(struct wakeup_source *ws) +{ + ws->active = true; + ws->active_count++; + ws->timer_expires = jiffies; + ws->last_time = ktime_get(); + + atomic_inc(&events_in_progress); +} + +/** + * __pm_stay_awake - Notify the PM core of a wakeup event. + * @ws: Wakeup source object associated with the source of the event. + * + * It is safe to call this function from interrupt context. + */ +void __pm_stay_awake(struct wakeup_source *ws) +{ + unsigned long flags; + + if (!ws) + return; + + spin_lock_irqsave(&ws->lock, flags); + ws->event_count++; + if (!ws->active) + wakeup_source_activate(ws); + spin_unlock_irqrestore(&ws->lock, flags); +} +EXPORT_SYMBOL_GPL(__pm_stay_awake); + /** * pm_stay_awake - Notify the PM core that a wakeup event is being processed. * @dev: Device the wakeup event is related to. * - * Notify the PM core of a wakeup event (signaled by @dev) by incrementing the - * counter of wakeup events being processed. If @dev is not NULL, the counter - * of wakeup events related to @dev is incremented too. + * Notify the PM core of a wakeup event (signaled by @dev) by calling + * __pm_stay_awake for the @dev's wakeup source object. * * Call this function after detecting of a wakeup event if pm_relax() is going * to be called directly after processing the event (and possibly passing it to * user space for further processing). - * - * It is safe to call this function from interrupt context. */ void pm_stay_awake(struct device *dev) { unsigned long flags; - spin_lock_irqsave(&events_lock, flags); - if (dev) - dev->power.wakeup_count++; + if (!dev) + return; - events_in_progress++; - spin_unlock_irqrestore(&events_lock, flags); + spin_lock_irqsave(&dev->power.lock, flags); + __pm_stay_awake(dev->power.wakeup); + spin_unlock_irqrestore(&dev->power.lock, flags); } +EXPORT_SYMBOL_GPL(pm_stay_awake); /** - * pm_relax - Notify the PM core that processing of a wakeup event has ended. + * wakup_source_deactivate - Mark given wakeup source as inactive. + * @ws: Wakeup source to handle. * - * Notify the PM core that a wakeup event has been processed by decrementing - * the counter of wakeup events being processed and incrementing the counter - * of registered wakeup events. + * Update the @ws' statistics and notify the PM core that the wakeup source has + * become inactive by decrementing the counter of wakeup events being processed + * and incrementing the counter of registered wakeup events. + */ +static void wakeup_source_deactivate(struct wakeup_source *ws) +{ + ktime_t duration; + ktime_t now; + + ws->relax_count++; + /* + * __pm_relax() may be called directly or from a timer function. + * If it is called directly right after the timer function has been + * started, but before the timer function calls __pm_relax(), it is + * possible that __pm_stay_awake() will be called in the meantime and + * will set ws->active. Then, ws->active may be cleared immediately + * by the __pm_relax() called from the timer function, but in such a + * case ws->relax_count will be different from ws->active_count. + */ + if (ws->relax_count != ws->active_count) { + ws->relax_count--; + return; + } + + ws->active = false; + + now = ktime_get(); + duration = ktime_sub(now, ws->last_time); + ws->total_time = ktime_add(ws->total_time, duration); + if (ktime_to_ns(duration) > ktime_to_ns(ws->max_time)) + ws->max_time = duration; + + del_timer(&ws->timer); + + /* + * event_count has to be incremented before events_in_progress is + * modified, so that the callers of pm_check_wakeup_events() and + * pm_save_wakeup_count() don't see the old value of event_count and + * events_in_progress equal to zero at the same time. + */ + atomic_inc(&event_count); + smp_mb__before_atomic_dec(); + atomic_dec(&events_in_progress); +} + +/** + * __pm_relax - Notify the PM core that processing of a wakeup event has ended. + * @ws: Wakeup source object associated with the source of the event. * * Call this function for wakeup events whose processing started with calling - * pm_stay_awake(). + * __pm_stay_awake(). * * It is safe to call it from interrupt context. */ -void pm_relax(void) +void __pm_relax(struct wakeup_source *ws) { unsigned long flags; - spin_lock_irqsave(&events_lock, flags); - if (events_in_progress) { - events_in_progress--; - event_count++; - } - spin_unlock_irqrestore(&events_lock, flags); + if (!ws) + return; + + spin_lock_irqsave(&ws->lock, flags); + if (ws->active) + wakeup_source_deactivate(ws); + spin_unlock_irqrestore(&ws->lock, flags); +} +EXPORT_SYMBOL_GPL(__pm_relax); + +/** + * pm_relax - Notify the PM core that processing of a wakeup event has ended. + * @dev: Device that signaled the event. + * + * Execute __pm_relax() for the @dev's wakeup source object. + */ +void pm_relax(struct device *dev) +{ + unsigned long flags; + + if (!dev) + return; + + spin_lock_irqsave(&dev->power.lock, flags); + __pm_relax(dev->power.wakeup); + spin_unlock_irqrestore(&dev->power.lock, flags); } +EXPORT_SYMBOL_GPL(pm_relax); /** * pm_wakeup_timer_fn - Delayed finalization of a wakeup event. + * @data: Address of the wakeup source object associated with the event source. * - * Decrease the counter of wakeup events being processed after it was increased - * by pm_wakeup_event(). + * Call __pm_relax() for the wakeup source whose address is stored in @data. */ static void pm_wakeup_timer_fn(unsigned long data) +{ + __pm_relax((struct wakeup_source *)data); +} + +/** + * __pm_wakeup_event - Notify the PM core of a wakeup event. + * @ws: Wakeup source object associated with the event source. + * @msec: Anticipated event processing time (in milliseconds). + * + * Notify the PM core of a wakeup event whose source is @ws that will take + * approximately @msec milliseconds to be processed by the kernel. If @ws is + * not active, activate it. If @msec is nonzero, set up the @ws' timer to + * execute pm_wakeup_timer_fn() in future. + * + * It is safe to call this function from interrupt context. + */ +void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec) { unsigned long flags; + unsigned long expires; - spin_lock_irqsave(&events_lock, flags); - if (events_timer_expires - && time_before_eq(events_timer_expires, jiffies)) { - events_in_progress--; - events_timer_expires = 0; + if (!ws) + return; + + spin_lock_irqsave(&ws->lock, flags); + + ws->event_count++; + if (!ws->active) + wakeup_source_activate(ws); + + if (!msec) { + wakeup_source_deactivate(ws); + goto unlock; } - spin_unlock_irqrestore(&events_lock, flags); + + expires = jiffies + msecs_to_jiffies(msec); + if (!expires) + expires = 1; + + if (time_after(expires, ws->timer_expires)) { + mod_timer(&ws->timer, expires); + ws->timer_expires = expires; + } + + unlock: + spin_unlock_irqrestore(&ws->lock, flags); } +EXPORT_SYMBOL_GPL(__pm_wakeup_event); + /** * pm_wakeup_event - Notify the PM core of a wakeup event. * @dev: Device the wakeup event is related to. * @msec: Anticipated event processing time (in milliseconds). * - * Notify the PM core of a wakeup event (signaled by @dev) that will take - * approximately @msec milliseconds to be processed by the kernel. Increment - * the counter of registered wakeup events and (if @msec is nonzero) set up - * the wakeup events timer to execute pm_wakeup_timer_fn() in future (if the - * timer has not been set up already, increment the counter of wakeup events - * being processed). If @dev is not NULL, the counter of wakeup events related - * to @dev is incremented too. - * - * It is safe to call this function from interrupt context. + * Call __pm_wakeup_event() for the @dev's wakeup source object. */ void pm_wakeup_event(struct device *dev, unsigned int msec) { unsigned long flags; - spin_lock_irqsave(&events_lock, flags); - event_count++; - if (dev) - dev->power.wakeup_count++; - - if (msec) { - unsigned long expires; + if (!dev) + return; - expires = jiffies + msecs_to_jiffies(msec); - if (!expires) - expires = 1; + spin_lock_irqsave(&dev->power.lock, flags); + __pm_wakeup_event(dev->power.wakeup, msec); + spin_unlock_irqrestore(&dev->power.lock, flags); +} +EXPORT_SYMBOL_GPL(pm_wakeup_event); - if (!events_timer_expires - || time_after(expires, events_timer_expires)) { - if (!events_timer_expires) - events_in_progress++; +/** + * pm_wakeup_update_hit_counts - Update hit counts of all active wakeup sources. + */ +static void pm_wakeup_update_hit_counts(void) +{ + unsigned long flags; + struct wakeup_source *ws; - mod_timer(&events_timer, expires); - events_timer_expires = expires; - } + rcu_read_lock(); + list_for_each_entry_rcu(ws, &wakeup_sources, entry) { + spin_lock_irqsave(&ws->lock, flags); + if (ws->active) + ws->hit_count++; + spin_unlock_irqrestore(&ws->lock, flags); } - spin_unlock_irqrestore(&events_lock, flags); + rcu_read_unlock(); } /** @@ -184,10 +554,13 @@ bool pm_check_wakeup_events(void) spin_lock_irqsave(&events_lock, flags); if (events_check_enabled) { - ret = (event_count == saved_event_count) && !events_in_progress; + ret = ((unsigned int)atomic_read(&event_count) == saved_count) + && !atomic_read(&events_in_progress); events_check_enabled = ret; } spin_unlock_irqrestore(&events_lock, flags); + if (!ret) + pm_wakeup_update_hit_counts(); return ret; } @@ -202,24 +575,20 @@ bool pm_check_wakeup_events(void) * drop down to zero has been interrupted by a signal (and the current number * of wakeup events being processed is still nonzero). Otherwise return true. */ -bool pm_get_wakeup_count(unsigned long *count) +bool pm_get_wakeup_count(unsigned int *count) { bool ret; - spin_lock_irq(&events_lock); if (capable(CAP_SYS_ADMIN)) events_check_enabled = false; - while (events_in_progress && !signal_pending(current)) { - spin_unlock_irq(&events_lock); - - schedule_timeout_interruptible(msecs_to_jiffies(100)); - - spin_lock_irq(&events_lock); + while (atomic_read(&events_in_progress) && !signal_pending(current)) { + pm_wakeup_update_hit_counts(); + schedule_timeout_interruptible(msecs_to_jiffies(TIMEOUT)); } - *count = event_count; - ret = !events_in_progress; - spin_unlock_irq(&events_lock); + + ret = !atomic_read(&events_in_progress); + *count = atomic_read(&event_count); return ret; } @@ -232,16 +601,19 @@ bool pm_get_wakeup_count(unsigned long *count) * old number of registered wakeup events to be used by pm_check_wakeup_events() * and return true. Otherwise return false. */ -bool pm_save_wakeup_count(unsigned long count) +bool pm_save_wakeup_count(unsigned int count) { bool ret = false; spin_lock_irq(&events_lock); - if (count == event_count && !events_in_progress) { - saved_event_count = count; + if (count == (unsigned int)atomic_read(&event_count) + && !atomic_read(&events_in_progress)) { + saved_count = count; events_check_enabled = true; ret = true; } spin_unlock_irq(&events_lock); + if (!ret) + pm_wakeup_update_hit_counts(); return ret; } diff --git a/include/linux/pm.h b/include/linux/pm.h index 52e8c55ff314..a84118911ced 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -448,23 +448,24 @@ enum rpm_request { RPM_REQ_RESUME, }; +struct wakeup_source; + struct dev_pm_info { pm_message_t power_state; unsigned int can_wakeup:1; - unsigned int should_wakeup:1; unsigned async_suspend:1; enum dpm_state status; /* Owned by the PM core */ + spinlock_t lock; #ifdef CONFIG_PM_SLEEP struct list_head entry; struct completion completion; - unsigned long wakeup_count; + struct wakeup_source *wakeup; #endif #ifdef CONFIG_PM_RUNTIME struct timer_list suspend_timer; unsigned long timer_expires; struct work_struct work; wait_queue_head_t wait_queue; - spinlock_t lock; atomic_t usage_count; atomic_t child_count; unsigned int disable_depth:3; @@ -559,11 +560,6 @@ extern void __suspend_report_result(const char *function, void *fn, int ret); } while (0) extern void device_pm_wait_for_dev(struct device *sub, struct device *dev); - -/* drivers/base/power/wakeup.c */ -extern void pm_wakeup_event(struct device *dev, unsigned int msec); -extern void pm_stay_awake(struct device *dev); -extern void pm_relax(void); #else /* !CONFIG_PM_SLEEP */ #define device_pm_lock() do {} while (0) @@ -577,10 +573,6 @@ static inline int dpm_suspend_start(pm_message_t state) #define suspend_report_result(fn, ret) do {} while (0) static inline void device_pm_wait_for_dev(struct device *a, struct device *b) {} - -static inline void pm_wakeup_event(struct device *dev, unsigned int msec) {} -static inline void pm_stay_awake(struct device *dev) {} -static inline void pm_relax(void) {} #endif /* !CONFIG_PM_SLEEP */ /* How to reorder dpm_list after device_move() */ diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index 76aca48722ae..9cff00dd6b63 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -2,6 +2,7 @@ * pm_wakeup.h - Power management wakeup interface * * Copyright (C) 2008 Alan Stern + * Copyright (C) 2010 Rafael J. Wysocki, Novell Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -27,19 +28,77 @@ #include -#ifdef CONFIG_PM - -/* Changes to device_may_wakeup take effect on the next pm state change. +/** + * struct wakeup_source - Representation of wakeup sources * - * By default, most devices should leave wakeup disabled. The exceptions - * are devices that everyone expects to be wakeup sources: keyboards, - * power buttons, possibly network interfaces, etc. + * @total_time: Total time this wakeup source has been active. + * @max_time: Maximum time this wakeup source has been continuously active. + * @last_time: Monotonic clock when the wakeup source's was activated last time. + * @event_count: Number of signaled wakeup events. + * @active_count: Number of times the wakeup sorce was activated. + * @relax_count: Number of times the wakeup sorce was deactivated. + * @hit_count: Number of times the wakeup sorce might abort system suspend. + * @active: Status of the wakeup source. */ -static inline void device_init_wakeup(struct device *dev, bool val) +struct wakeup_source { + char *name; + struct list_head entry; + spinlock_t lock; + struct timer_list timer; + unsigned long timer_expires; + ktime_t total_time; + ktime_t max_time; + ktime_t last_time; + unsigned long event_count; + unsigned long active_count; + unsigned long relax_count; + unsigned long hit_count; + unsigned int active:1; +}; + +#ifdef CONFIG_PM_SLEEP + +/* + * Changes to device_may_wakeup take effect on the next pm state change. + */ + +static inline void device_set_wakeup_capable(struct device *dev, bool capable) +{ + dev->power.can_wakeup = capable; +} + +static inline bool device_can_wakeup(struct device *dev) +{ + return dev->power.can_wakeup; +} + + + +static inline bool device_may_wakeup(struct device *dev) { - dev->power.can_wakeup = dev->power.should_wakeup = val; + return dev->power.can_wakeup && !!dev->power.wakeup; } +/* drivers/base/power/wakeup.c */ +extern struct wakeup_source *wakeup_source_create(const char *name); +extern void wakeup_source_destroy(struct wakeup_source *ws); +extern void wakeup_source_add(struct wakeup_source *ws); +extern void wakeup_source_remove(struct wakeup_source *ws); +extern struct wakeup_source *wakeup_source_register(const char *name); +extern void wakeup_source_unregister(struct wakeup_source *ws); +extern int device_wakeup_enable(struct device *dev); +extern int device_wakeup_disable(struct device *dev); +extern int device_init_wakeup(struct device *dev, bool val); +extern int device_set_wakeup_enable(struct device *dev, bool enable); +extern void __pm_stay_awake(struct wakeup_source *ws); +extern void pm_stay_awake(struct device *dev); +extern void __pm_relax(struct wakeup_source *ws); +extern void pm_relax(struct device *dev); +extern void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec); +extern void pm_wakeup_event(struct device *dev, unsigned int msec); + +#else /* !CONFIG_PM_SLEEP */ + static inline void device_set_wakeup_capable(struct device *dev, bool capable) { dev->power.can_wakeup = capable; @@ -50,43 +109,63 @@ static inline bool device_can_wakeup(struct device *dev) return dev->power.can_wakeup; } -static inline void device_set_wakeup_enable(struct device *dev, bool enable) +static inline bool device_may_wakeup(struct device *dev) { - dev->power.should_wakeup = enable; + return false; } -static inline bool device_may_wakeup(struct device *dev) +static inline struct wakeup_source *wakeup_source_create(const char *name) { - return dev->power.can_wakeup && dev->power.should_wakeup; + return NULL; } -#else /* !CONFIG_PM */ +static inline void wakeup_source_destroy(struct wakeup_source *ws) {} + +static inline void wakeup_source_add(struct wakeup_source *ws) {} -/* For some reason the following routines work even without CONFIG_PM */ -static inline void device_init_wakeup(struct device *dev, bool val) +static inline void wakeup_source_remove(struct wakeup_source *ws) {} + +static inline struct wakeup_source *wakeup_source_register(const char *name) { - dev->power.can_wakeup = val; + return NULL; } -static inline void device_set_wakeup_capable(struct device *dev, bool capable) +static inline void wakeup_source_unregister(struct wakeup_source *ws) {} + +static inline int device_wakeup_enable(struct device *dev) { - dev->power.can_wakeup = capable; + return -EINVAL; } -static inline bool device_can_wakeup(struct device *dev) +static inline int device_wakeup_disable(struct device *dev) { - return dev->power.can_wakeup; + return 0; } -static inline void device_set_wakeup_enable(struct device *dev, bool enable) +static inline int device_init_wakeup(struct device *dev, bool val) { + dev->power.can_wakeup = val; + return val ? -EINVAL : 0; } -static inline bool device_may_wakeup(struct device *dev) + +static inline int device_set_wakeup_enable(struct device *dev, bool enable) { - return false; + return -EINVAL; } -#endif /* !CONFIG_PM */ +static inline void __pm_stay_awake(struct wakeup_source *ws) {} + +static inline void pm_stay_awake(struct device *dev) {} + +static inline void __pm_relax(struct wakeup_source *ws) {} + +static inline void pm_relax(struct device *dev) {} + +static inline void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec) {} + +static inline void pm_wakeup_event(struct device *dev, unsigned int msec) {} + +#endif /* !CONFIG_PM_SLEEP */ #endif /* _LINUX_PM_WAKEUP_H */ diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 4af270ec2204..6b1712c51102 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -293,8 +293,8 @@ extern int unregister_pm_notifier(struct notifier_block *nb); extern bool events_check_enabled; extern bool pm_check_wakeup_events(void); -extern bool pm_get_wakeup_count(unsigned long *count); -extern bool pm_save_wakeup_count(unsigned long count); +extern bool pm_get_wakeup_count(unsigned int *count); +extern bool pm_save_wakeup_count(unsigned int count); #else /* !CONFIG_PM_SLEEP */ static inline int register_pm_notifier(struct notifier_block *nb) diff --git a/kernel/power/main.c b/kernel/power/main.c index f06ad6eff37a..6b12a0cf4d9f 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -237,18 +237,18 @@ static ssize_t wakeup_count_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - unsigned long val; + unsigned int val; - return pm_get_wakeup_count(&val) ? sprintf(buf, "%lu\n", val) : -EINTR; + return pm_get_wakeup_count(&val) ? sprintf(buf, "%u\n", val) : -EINTR; } static ssize_t wakeup_count_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t n) { - unsigned long val; + unsigned int val; - if (sscanf(buf, "%lu", &val) == 1) { + if (sscanf(buf, "%u", &val) == 1) { if (pm_save_wakeup_count(val)) return n; } -- cgit v1.2.3 From 098dff738abbeaea15fc95c4f4fdaee1e9bbea75 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 22 Sep 2010 22:10:57 +0200 Subject: PM: Fix potential issue with failing asynchronous suspend There is a potential issue with the asynchronous suspend code that a device driver suspending asynchronously may not notice that it should back off. There are two failing scenarions, (1) when the driver is waiting for a driver suspending synchronously to complete and that second driver returns error code, in which case async_error won't be set and the waiting driver will continue suspending and (2) after the driver has called device_pm_wait_for_dev() and the waited for driver returns error code, in which case the caller of device_pm_wait_for_dev() will not know that there was an error and will continue suspending. To fix this issue make __device_suspend() set async_error, so async_suspend() doesn't need to set it any more, and make device_pm_wait_for_dev() return async_error, so that its callers can check whether or not they should continue suspending. No more changes are necessary, since device_pm_wait_for_dev() is not used by any drivers' suspend routines. Reported-by: Colin Cross Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman --- drivers/base/power/main.c | 15 +++++++++------ include/linux/pm.h | 7 +++++-- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 7ae6fe414c38..31b526661ec4 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -51,6 +51,8 @@ static pm_message_t pm_transition; */ static bool transition_started; +static int async_error; + /** * device_pm_init - Initialize the PM-related part of a device object. * @dev: Device object being initialized. @@ -602,6 +604,7 @@ static void dpm_resume(pm_message_t state) INIT_LIST_HEAD(&list); mutex_lock(&dpm_list_mtx); pm_transition = state; + async_error = 0; list_for_each_entry(dev, &dpm_list, power.entry) { if (dev->power.status < DPM_OFF) @@ -831,8 +834,6 @@ static int legacy_suspend(struct device *dev, pm_message_t state, return error; } -static int async_error; - /** * device_suspend - Execute "suspend" callbacks for given device. * @dev: Device to handle. @@ -887,6 +888,9 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) device_unlock(dev); complete_all(&dev->power.completion); + if (error) + async_error = error; + return error; } @@ -896,10 +900,8 @@ static void async_suspend(void *data, async_cookie_t cookie) int error; error = __device_suspend(dev, pm_transition, true); - if (error) { + if (error) pm_dev_err(dev, pm_transition, " async", error); - async_error = error; - } put_device(dev); } @@ -1087,8 +1089,9 @@ EXPORT_SYMBOL_GPL(__suspend_report_result); * @dev: Device to wait for. * @subordinate: Device that needs to wait for @dev. */ -void device_pm_wait_for_dev(struct device *subordinate, struct device *dev) +int device_pm_wait_for_dev(struct device *subordinate, struct device *dev) { dpm_wait(dev, subordinate->power.async_suspend); + return async_error; } EXPORT_SYMBOL_GPL(device_pm_wait_for_dev); diff --git a/include/linux/pm.h b/include/linux/pm.h index a84118911ced..1abfe84f447d 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -559,7 +559,7 @@ extern void __suspend_report_result(const char *function, void *fn, int ret); __suspend_report_result(__func__, fn, ret); \ } while (0) -extern void device_pm_wait_for_dev(struct device *sub, struct device *dev); +extern int device_pm_wait_for_dev(struct device *sub, struct device *dev); #else /* !CONFIG_PM_SLEEP */ #define device_pm_lock() do {} while (0) @@ -572,7 +572,10 @@ static inline int dpm_suspend_start(pm_message_t state) #define suspend_report_result(fn, ret) do {} while (0) -static inline void device_pm_wait_for_dev(struct device *a, struct device *b) {} +static inline int device_pm_wait_for_dev(struct device *a, struct device *b) +{ + return 0; +} #endif /* !CONFIG_PM_SLEEP */ /* How to reorder dpm_list after device_move() */ -- cgit v1.2.3 From 69d44ffbd772bede8c2a6d182e6e14f94826520b Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Sat, 25 Sep 2010 23:34:22 +0200 Subject: sysfs: Add sysfs_merge_group() and sysfs_unmerge_group() This patch (as1420) adds sysfs_merge_group() and sysfs_unmerge_group() functions, allowing drivers easily to add and remove sets of attributes to a pre-existing attribute group directory. Signed-off-by: Alan Stern Acked-by: Greg Kroah-Hartman Signed-off-by: Rafael J. Wysocki --- fs/sysfs/group.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/sysfs.h | 15 +++++++++++++ 2 files changed, 74 insertions(+) diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index 23c1e598792a..442f34ff1af8 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -148,6 +148,65 @@ void sysfs_remove_group(struct kobject * kobj, sysfs_put(sd); } +/** + * sysfs_merge_group - merge files into a pre-existing attribute group. + * @kobj: The kobject containing the group. + * @grp: The files to create and the attribute group they belong to. + * + * This function returns an error if the group doesn't exist or any of the + * files already exist in that group, in which case none of the new files + * are created. + */ +int sysfs_merge_group(struct kobject *kobj, + const struct attribute_group *grp) +{ + struct sysfs_dirent *dir_sd; + int error = 0; + struct attribute *const *attr; + int i; + + if (grp) + dir_sd = sysfs_get_dirent(kobj->sd, NULL, grp->name); + else + dir_sd = sysfs_get(kobj->sd); + if (!dir_sd) + return -ENOENT; + + for ((i = 0, attr = grp->attrs); *attr && !error; (++i, ++attr)) + error = sysfs_add_file(dir_sd, *attr, SYSFS_KOBJ_ATTR); + if (error) { + while (--i >= 0) + sysfs_hash_and_remove(dir_sd, NULL, (*--attr)->name); + } + sysfs_put(dir_sd); + + return error; +} +EXPORT_SYMBOL_GPL(sysfs_merge_group); + +/** + * sysfs_unmerge_group - remove files from a pre-existing attribute group. + * @kobj: The kobject containing the group. + * @grp: The files to remove and the attribute group they belong to. + */ +void sysfs_unmerge_group(struct kobject *kobj, + const struct attribute_group *grp) +{ + struct sysfs_dirent *dir_sd; + struct attribute *const *attr; + + if (grp) + dir_sd = sysfs_get_dirent(kobj->sd, NULL, grp->name); + else + dir_sd = sysfs_get(kobj->sd); + if (dir_sd) { + for (attr = grp->attrs; *attr; ++attr) + sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name); + sysfs_put(dir_sd); + } +} +EXPORT_SYMBOL_GPL(sysfs_unmerge_group); + EXPORT_SYMBOL_GPL(sysfs_create_group); EXPORT_SYMBOL_GPL(sysfs_update_group); diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 96eb576d82fd..30b881555fa5 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -164,6 +164,10 @@ int sysfs_add_file_to_group(struct kobject *kobj, const struct attribute *attr, const char *group); void sysfs_remove_file_from_group(struct kobject *kobj, const struct attribute *attr, const char *group); +int sysfs_merge_group(struct kobject *kobj, + const struct attribute_group *grp); +void sysfs_unmerge_group(struct kobject *kobj, + const struct attribute_group *grp); void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr); void sysfs_notify_dirent(struct sysfs_dirent *sd); @@ -302,6 +306,17 @@ static inline void sysfs_remove_file_from_group(struct kobject *kobj, { } +static inline int sysfs_merge_group(struct kobject *kobj, + const struct attribute_group *grp) +{ + return 0; +} + +static inline void sysfs_unmerge_group(struct kobject *kobj, + const struct attribute_group *grp) +{ +} + static inline void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr) { -- cgit v1.2.3 From 4769373ca2c8d0b999749a070c48fd8648888831 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Sat, 25 Sep 2010 23:34:46 +0200 Subject: PM / Runtime: Move code in drivers/base/power/runtime.c This patch (as1421) moves the PM runtime accounting subroutines up to the beginning of runtime.c, taking them out of the middle of the functions that do the actual work. No operational changes. Signed-off-by: Alan Stern Signed-off-by: Rafael J. Wysocki --- drivers/base/power/runtime.c | 77 ++++++++++++++++++++++---------------------- 1 file changed, 38 insertions(+), 39 deletions(-) diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index e9520ad2d716..ec08f1ae63f1 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -14,6 +14,44 @@ static int __pm_runtime_resume(struct device *dev, bool from_wq); static int __pm_request_idle(struct device *dev); static int __pm_request_resume(struct device *dev); +/** + * update_pm_runtime_accounting - Update the time accounting of power states + * @dev: Device to update the accounting for + * + * In order to be able to have time accounting of the various power states + * (as used by programs such as PowerTOP to show the effectiveness of runtime + * PM), we need to track the time spent in each state. + * update_pm_runtime_accounting must be called each time before the + * runtime_status field is updated, to account the time in the old state + * correctly. + */ +void update_pm_runtime_accounting(struct device *dev) +{ + unsigned long now = jiffies; + int delta; + + delta = now - dev->power.accounting_timestamp; + + if (delta < 0) + delta = 0; + + dev->power.accounting_timestamp = now; + + if (dev->power.disable_depth > 0) + return; + + if (dev->power.runtime_status == RPM_SUSPENDED) + dev->power.suspended_jiffies += delta; + else + dev->power.active_jiffies += delta; +} + +static void __update_runtime_status(struct device *dev, enum rpm_status status) +{ + update_pm_runtime_accounting(dev); + dev->power.runtime_status = status; +} + /** * pm_runtime_deactivate_timer - Deactivate given device's suspend timer. * @dev: Device to handle. @@ -123,45 +161,6 @@ int pm_runtime_idle(struct device *dev) } EXPORT_SYMBOL_GPL(pm_runtime_idle); - -/** - * update_pm_runtime_accounting - Update the time accounting of power states - * @dev: Device to update the accounting for - * - * In order to be able to have time accounting of the various power states - * (as used by programs such as PowerTOP to show the effectiveness of runtime - * PM), we need to track the time spent in each state. - * update_pm_runtime_accounting must be called each time before the - * runtime_status field is updated, to account the time in the old state - * correctly. - */ -void update_pm_runtime_accounting(struct device *dev) -{ - unsigned long now = jiffies; - int delta; - - delta = now - dev->power.accounting_timestamp; - - if (delta < 0) - delta = 0; - - dev->power.accounting_timestamp = now; - - if (dev->power.disable_depth > 0) - return; - - if (dev->power.runtime_status == RPM_SUSPENDED) - dev->power.suspended_jiffies += delta; - else - dev->power.active_jiffies += delta; -} - -static void __update_runtime_status(struct device *dev, enum rpm_status status) -{ - update_pm_runtime_accounting(dev); - dev->power.runtime_status = status; -} - /** * __pm_runtime_suspend - Carry out run-time suspend of given device. * @dev: Device to suspend. -- cgit v1.2.3 From 3f9af0513ae5b1f185302c2d0ba656640926d970 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Sat, 25 Sep 2010 23:34:54 +0200 Subject: PM / Runtime: Replace boolean arguments with bitflags The "from_wq" argument in __pm_runtime_suspend() and __pm_runtime_resume() supposedly indicates whether or not the function was called by the PM workqueue thread, but in fact it isn't always used this way. It really indicates whether or not the function should return early if the requested operation is already in progress. Along with this badly-named boolean argument, later patches in this series will add several other boolean arguments to these functions and others. Therefore this patch (as1422) begins the conversion process by replacing from_wq with a bitflag argument. The same bitflags are also used in __pm_runtime_get() and __pm_runtime_put(), where they indicate whether or not the operation should be asynchronous. Signed-off-by: Alan Stern Signed-off-by: Rafael J. Wysocki --- drivers/base/power/runtime.c | 75 +++++++++++++++++++++++--------------------- include/linux/pm_runtime.h | 23 +++++++++----- 2 files changed, 54 insertions(+), 44 deletions(-) diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index ec08f1ae63f1..0c1db879544b 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -10,7 +10,7 @@ #include #include -static int __pm_runtime_resume(struct device *dev, bool from_wq); +static int __pm_runtime_resume(struct device *dev, int rpmflags); static int __pm_request_idle(struct device *dev); static int __pm_request_resume(struct device *dev); @@ -164,24 +164,24 @@ EXPORT_SYMBOL_GPL(pm_runtime_idle); /** * __pm_runtime_suspend - Carry out run-time suspend of given device. * @dev: Device to suspend. - * @from_wq: If set, the function has been called via pm_wq. + * @rpmflags: Flag bits. * * Check if the device can be suspended and run the ->runtime_suspend() callback - * provided by its bus type. If another suspend has been started earlier, wait - * for it to finish. If an idle notification or suspend request is pending or + * provided by its bus type. If another suspend has been started earlier, + * either return immediately or wait for it to finish, depending on the + * RPM_NOWAIT flag. If an idle notification or suspend request is pending or * scheduled, cancel it. * * This function must be called under dev->power.lock with interrupts disabled. */ -int __pm_runtime_suspend(struct device *dev, bool from_wq) +static int __pm_runtime_suspend(struct device *dev, int rpmflags) __releases(&dev->power.lock) __acquires(&dev->power.lock) { struct device *parent = NULL; bool notify = false; int retval = 0; - dev_dbg(dev, "__pm_runtime_suspend()%s!\n", - from_wq ? " from workqueue" : ""); + dev_dbg(dev, "%s flags 0x%x\n", __func__, rpmflags); repeat: if (dev->power.runtime_error) { @@ -213,7 +213,7 @@ int __pm_runtime_suspend(struct device *dev, bool from_wq) if (dev->power.runtime_status == RPM_SUSPENDING) { DEFINE_WAIT(wait); - if (from_wq) { + if (rpmflags & RPM_NOWAIT) { retval = -EINPROGRESS; goto out; } @@ -286,7 +286,7 @@ int __pm_runtime_suspend(struct device *dev, bool from_wq) wake_up_all(&dev->power.wait_queue); if (dev->power.deferred_resume) { - __pm_runtime_resume(dev, false); + __pm_runtime_resume(dev, 0); retval = -EAGAIN; goto out; } @@ -303,7 +303,7 @@ int __pm_runtime_suspend(struct device *dev, bool from_wq) } out: - dev_dbg(dev, "__pm_runtime_suspend() returns %d!\n", retval); + dev_dbg(dev, "%s returns %d\n", __func__, retval); return retval; } @@ -317,7 +317,7 @@ int pm_runtime_suspend(struct device *dev) int retval; spin_lock_irq(&dev->power.lock); - retval = __pm_runtime_suspend(dev, false); + retval = __pm_runtime_suspend(dev, 0); spin_unlock_irq(&dev->power.lock); return retval; @@ -327,24 +327,25 @@ EXPORT_SYMBOL_GPL(pm_runtime_suspend); /** * __pm_runtime_resume - Carry out run-time resume of given device. * @dev: Device to resume. - * @from_wq: If set, the function has been called via pm_wq. + * @rpmflags: Flag bits. * * Check if the device can be woken up and run the ->runtime_resume() callback - * provided by its bus type. If another resume has been started earlier, wait - * for it to finish. If there's a suspend running in parallel with this - * function, wait for it to finish and resume the device. Cancel any scheduled - * or pending requests. + * provided by its bus type. If another resume has been started earlier, + * either return imediately or wait for it to finish, depending on the + * RPM_NOWAIT flag. If there's a suspend running in parallel with this + * function, either tell the other process to resume after suspending + * (deferred_resume) or wait for it to finish, depending on the RPM_NOWAIT + * flag. Cancel any scheduled or pending requests. * * This function must be called under dev->power.lock with interrupts disabled. */ -int __pm_runtime_resume(struct device *dev, bool from_wq) +static int __pm_runtime_resume(struct device *dev, int rpmflags) __releases(&dev->power.lock) __acquires(&dev->power.lock) { struct device *parent = NULL; int retval = 0; - dev_dbg(dev, "__pm_runtime_resume()%s!\n", - from_wq ? " from workqueue" : ""); + dev_dbg(dev, "%s flags 0x%x\n", __func__, rpmflags); repeat: if (dev->power.runtime_error) { @@ -365,7 +366,7 @@ int __pm_runtime_resume(struct device *dev, bool from_wq) || dev->power.runtime_status == RPM_SUSPENDING) { DEFINE_WAIT(wait); - if (from_wq) { + if (rpmflags & RPM_NOWAIT) { if (dev->power.runtime_status == RPM_SUSPENDING) dev->power.deferred_resume = true; retval = -EINPROGRESS; @@ -407,7 +408,7 @@ int __pm_runtime_resume(struct device *dev, bool from_wq) */ if (!parent->power.disable_depth && !parent->power.ignore_children) { - __pm_runtime_resume(parent, false); + __pm_runtime_resume(parent, 0); if (parent->power.runtime_status != RPM_ACTIVE) retval = -EBUSY; } @@ -470,7 +471,7 @@ int __pm_runtime_resume(struct device *dev, bool from_wq) spin_lock_irq(&dev->power.lock); } - dev_dbg(dev, "__pm_runtime_resume() returns %d!\n", retval); + dev_dbg(dev, "%s returns %d\n", __func__, retval); return retval; } @@ -484,7 +485,7 @@ int pm_runtime_resume(struct device *dev) int retval; spin_lock_irq(&dev->power.lock); - retval = __pm_runtime_resume(dev, false); + retval = __pm_runtime_resume(dev, 0); spin_unlock_irq(&dev->power.lock); return retval; @@ -519,10 +520,10 @@ static void pm_runtime_work(struct work_struct *work) __pm_runtime_idle(dev); break; case RPM_REQ_SUSPEND: - __pm_runtime_suspend(dev, true); + __pm_runtime_suspend(dev, RPM_NOWAIT); break; case RPM_REQ_RESUME: - __pm_runtime_resume(dev, true); + __pm_runtime_resume(dev, RPM_NOWAIT); break; } @@ -782,17 +783,18 @@ EXPORT_SYMBOL_GPL(pm_request_resume); /** * __pm_runtime_get - Reference count a device and wake it up, if necessary. * @dev: Device to handle. - * @sync: If set and the device is suspended, resume it synchronously. + * @rpmflags: Flag bits. * * Increment the usage count of the device and resume it or submit a resume - * request for it, depending on the value of @sync. + * request for it, depending on the RPM_ASYNC flag bit. */ -int __pm_runtime_get(struct device *dev, bool sync) +int __pm_runtime_get(struct device *dev, int rpmflags) { int retval; atomic_inc(&dev->power.usage_count); - retval = sync ? pm_runtime_resume(dev) : pm_request_resume(dev); + retval = (rpmflags & RPM_ASYNC) ? + pm_request_resume(dev) : pm_runtime_resume(dev); return retval; } @@ -801,18 +803,19 @@ EXPORT_SYMBOL_GPL(__pm_runtime_get); /** * __pm_runtime_put - Decrement the device's usage counter and notify its bus. * @dev: Device to handle. - * @sync: If the device's bus type is to be notified, do that synchronously. + * @rpmflags: Flag bits. * * Decrement the usage count of the device and if it reaches zero, carry out a * synchronous idle notification or submit an idle notification request for it, - * depending on the value of @sync. + * depending on the RPM_ASYNC flag bit. */ -int __pm_runtime_put(struct device *dev, bool sync) +int __pm_runtime_put(struct device *dev, int rpmflags) { int retval = 0; if (atomic_dec_and_test(&dev->power.usage_count)) - retval = sync ? pm_runtime_idle(dev) : pm_request_idle(dev); + retval = (rpmflags & RPM_ASYNC) ? + pm_request_idle(dev) : pm_runtime_idle(dev); return retval; } @@ -967,7 +970,7 @@ int pm_runtime_barrier(struct device *dev) if (dev->power.request_pending && dev->power.request == RPM_REQ_RESUME) { - __pm_runtime_resume(dev, false); + __pm_runtime_resume(dev, 0); retval = 1; } @@ -1016,7 +1019,7 @@ void __pm_runtime_disable(struct device *dev, bool check_resume) */ pm_runtime_get_noresume(dev); - __pm_runtime_resume(dev, false); + __pm_runtime_resume(dev, 0); pm_runtime_put_noidle(dev); } @@ -1064,7 +1067,7 @@ void pm_runtime_forbid(struct device *dev) dev->power.runtime_auto = false; atomic_inc(&dev->power.usage_count); - __pm_runtime_resume(dev, false); + __pm_runtime_resume(dev, 0); out: spin_unlock_irq(&dev->power.lock); diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 6e81888c6222..c030cac59aac 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -12,6 +12,11 @@ #include #include +/* Runtime PM flag argument bits */ +#define RPM_ASYNC 0x01 /* Request is asynchronous */ +#define RPM_NOWAIT 0x02 /* Don't wait for concurrent + state change */ + #ifdef CONFIG_PM_RUNTIME extern struct workqueue_struct *pm_wq; @@ -22,8 +27,8 @@ extern int pm_runtime_resume(struct device *dev); extern int pm_request_idle(struct device *dev); extern int pm_schedule_suspend(struct device *dev, unsigned int delay); extern int pm_request_resume(struct device *dev); -extern int __pm_runtime_get(struct device *dev, bool sync); -extern int __pm_runtime_put(struct device *dev, bool sync); +extern int __pm_runtime_get(struct device *dev, int rpmflags); +extern int __pm_runtime_put(struct device *dev, int rpmflags); extern int __pm_runtime_set_status(struct device *dev, unsigned int status); extern int pm_runtime_barrier(struct device *dev); extern void pm_runtime_enable(struct device *dev); @@ -81,8 +86,10 @@ static inline int pm_schedule_suspend(struct device *dev, unsigned int delay) return -ENOSYS; } static inline int pm_request_resume(struct device *dev) { return 0; } -static inline int __pm_runtime_get(struct device *dev, bool sync) { return 1; } -static inline int __pm_runtime_put(struct device *dev, bool sync) { return 0; } +static inline int __pm_runtime_get(struct device *dev, int rpmflags) + { return 1; } +static inline int __pm_runtime_put(struct device *dev, int rpmflags) + { return 0; } static inline int __pm_runtime_set_status(struct device *dev, unsigned int status) { return 0; } static inline int pm_runtime_barrier(struct device *dev) { return 0; } @@ -107,22 +114,22 @@ static inline int pm_generic_runtime_resume(struct device *dev) { return 0; } static inline int pm_runtime_get(struct device *dev) { - return __pm_runtime_get(dev, false); + return __pm_runtime_get(dev, RPM_ASYNC); } static inline int pm_runtime_get_sync(struct device *dev) { - return __pm_runtime_get(dev, true); + return __pm_runtime_get(dev, 0); } static inline int pm_runtime_put(struct device *dev) { - return __pm_runtime_put(dev, false); + return __pm_runtime_put(dev, RPM_ASYNC); } static inline int pm_runtime_put_sync(struct device *dev) { - return __pm_runtime_put(dev, true); + return __pm_runtime_put(dev, 0); } static inline int pm_runtime_set_active(struct device *dev) -- cgit v1.2.3 From 1bfee5bc86fdaecc912e06080583eddab7263df2 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Sat, 25 Sep 2010 23:35:00 +0200 Subject: PM / Runtime: Merge synchronous and async runtime routines This patch (as1423) merges the asynchronous routines __pm_request_idle(), __pm_request_suspend(), and __pm_request_resume() with their synchronous counterparts. The RPM_ASYNC bitflag argument serves to indicate what sort of operation to perform. In the course of performing this merger, it became apparent that the various functions don't all behave consistenly with regard to error reporting and cancellation of outstanding requests. A new routine, rpm_check_suspend_allowed(), was written to centralize much of the testing, and the other functions were revised to follow a simple algorithm: If the operation is disallowed because of the device's settings or current state, return an error. Cancel pending or scheduled requests of lower priority. Schedule, queue, or perform the desired operation. A few special cases and exceptions are noted in comments. Signed-off-by: Alan Stern Signed-off-by: Rafael J. Wysocki --- drivers/base/power/runtime.c | 375 ++++++++++++++++--------------------------- 1 file changed, 140 insertions(+), 235 deletions(-) diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 0c1db879544b..d7b5d84c235c 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -2,6 +2,7 @@ * drivers/base/power/runtime.c - Helper functions for device run-time PM * * Copyright (c) 2009 Rafael J. Wysocki , Novell Inc. + * Copyright (C) 2010 Alan Stern * * This file is released under the GPLv2. */ @@ -11,8 +12,6 @@ #include static int __pm_runtime_resume(struct device *dev, int rpmflags); -static int __pm_request_idle(struct device *dev); -static int __pm_request_resume(struct device *dev); /** * update_pm_runtime_accounting - Update the time accounting of power states @@ -79,40 +78,84 @@ static void pm_runtime_cancel_pending(struct device *dev) } /** - * __pm_runtime_idle - Notify device bus type if the device can be suspended. - * @dev: Device to notify the bus type about. - * - * This function must be called under dev->power.lock with interrupts disabled. + * rpm_check_suspend_allowed - Test whether a device may be suspended. + * @dev: Device to test. */ -static int __pm_runtime_idle(struct device *dev) - __releases(&dev->power.lock) __acquires(&dev->power.lock) +static int rpm_check_suspend_allowed(struct device *dev) { int retval = 0; if (dev->power.runtime_error) retval = -EINVAL; - else if (dev->power.idle_notification) - retval = -EINPROGRESS; else if (atomic_read(&dev->power.usage_count) > 0 - || dev->power.disable_depth > 0 - || dev->power.runtime_status != RPM_ACTIVE) + || dev->power.disable_depth > 0) retval = -EAGAIN; else if (!pm_children_suspended(dev)) retval = -EBUSY; + + /* Pending resume requests take precedence over suspends. */ + else if ((dev->power.deferred_resume + && dev->power.status == RPM_SUSPENDING) + || (dev->power.request_pending + && dev->power.request == RPM_REQ_RESUME)) + retval = -EAGAIN; + else if (dev->power.runtime_status == RPM_SUSPENDED) + retval = 1; + + return retval; +} + + +/** + * __pm_runtime_idle - Notify device bus type if the device can be suspended. + * @dev: Device to notify the bus type about. + * @rpmflags: Flag bits. + * + * Check if the device's run-time PM status allows it to be suspended. If + * another idle notification has been started earlier, return immediately. If + * the RPM_ASYNC flag is set then queue an idle-notification request; otherwise + * run the ->runtime_idle() callback directly. + * + * This function must be called under dev->power.lock with interrupts disabled. + */ +static int __pm_runtime_idle(struct device *dev, int rpmflags) + __releases(&dev->power.lock) __acquires(&dev->power.lock) +{ + int retval; + + retval = rpm_check_suspend_allowed(dev); + if (retval < 0) + ; /* Conditions are wrong. */ + + /* Idle notifications are allowed only in the RPM_ACTIVE state. */ + else if (dev->power.runtime_status != RPM_ACTIVE) + retval = -EAGAIN; + + /* + * Any pending request other than an idle notification takes + * precedence over us, except that the timer may be running. + */ + else if (dev->power.request_pending && + dev->power.request > RPM_REQ_IDLE) + retval = -EAGAIN; + + /* Act as though RPM_NOWAIT is always set. */ + else if (dev->power.idle_notification) + retval = -EINPROGRESS; if (retval) goto out; - if (dev->power.request_pending) { - /* - * If an idle notification request is pending, cancel it. Any - * other pending request takes precedence over us. - */ - if (dev->power.request == RPM_REQ_IDLE) { - dev->power.request = RPM_REQ_NONE; - } else if (dev->power.request != RPM_REQ_NONE) { - retval = -EAGAIN; - goto out; + /* Pending requests need to be canceled. */ + dev->power.request = RPM_REQ_NONE; + + /* Carry out an asynchronous or a synchronous idle notification. */ + if (rpmflags & RPM_ASYNC) { + dev->power.request = RPM_REQ_IDLE; + if (!dev->power.request_pending) { + dev->power.request_pending = true; + queue_work(pm_wq, &dev->power.work); } + goto out; } dev->power.idle_notification = true; @@ -154,7 +197,7 @@ int pm_runtime_idle(struct device *dev) int retval; spin_lock_irq(&dev->power.lock); - retval = __pm_runtime_idle(dev); + retval = __pm_runtime_idle(dev, 0); spin_unlock_irq(&dev->power.lock); return retval; @@ -166,11 +209,14 @@ EXPORT_SYMBOL_GPL(pm_runtime_idle); * @dev: Device to suspend. * @rpmflags: Flag bits. * - * Check if the device can be suspended and run the ->runtime_suspend() callback - * provided by its bus type. If another suspend has been started earlier, - * either return immediately or wait for it to finish, depending on the - * RPM_NOWAIT flag. If an idle notification or suspend request is pending or - * scheduled, cancel it. + * Check if the device's run-time PM status allows it to be suspended. If + * another suspend has been started earlier, either return immediately or wait + * for it to finish, depending on the RPM_NOWAIT and RPM_ASYNC flags. Cancel a + * pending idle notification. If the RPM_ASYNC flag is set then queue a + * suspend request; otherwise run the ->runtime_suspend() callback directly. + * If a deferred resume was requested while the callback was running then carry + * it out; otherwise send an idle notification for the device (if the suspend + * failed) or for its parent (if the suspend succeeded). * * This function must be called under dev->power.lock with interrupts disabled. */ @@ -179,41 +225,30 @@ static int __pm_runtime_suspend(struct device *dev, int rpmflags) { struct device *parent = NULL; bool notify = false; - int retval = 0; + int retval; dev_dbg(dev, "%s flags 0x%x\n", __func__, rpmflags); repeat: - if (dev->power.runtime_error) { - retval = -EINVAL; - goto out; - } + retval = rpm_check_suspend_allowed(dev); - /* Pending resume requests take precedence over us. */ - if (dev->power.request_pending - && dev->power.request == RPM_REQ_RESUME) { + if (retval < 0) + ; /* Conditions are wrong. */ + + /* Synchronous suspends are not allowed in the RPM_RESUMING state. */ + else if (dev->power.runtime_status == RPM_RESUMING && + !(rpmflags & RPM_ASYNC)) retval = -EAGAIN; + if (retval) goto out; - } /* Other scheduled or pending requests need to be canceled. */ pm_runtime_cancel_pending(dev); - if (dev->power.runtime_status == RPM_SUSPENDED) - retval = 1; - else if (dev->power.runtime_status == RPM_RESUMING - || dev->power.disable_depth > 0 - || atomic_read(&dev->power.usage_count) > 0) - retval = -EAGAIN; - else if (!pm_children_suspended(dev)) - retval = -EBUSY; - if (retval) - goto out; - if (dev->power.runtime_status == RPM_SUSPENDING) { DEFINE_WAIT(wait); - if (rpmflags & RPM_NOWAIT) { + if (rpmflags & (RPM_ASYNC | RPM_NOWAIT)) { retval = -EINPROGRESS; goto out; } @@ -235,6 +270,16 @@ static int __pm_runtime_suspend(struct device *dev, int rpmflags) goto repeat; } + /* Carry out an asynchronous or a synchronous suspend. */ + if (rpmflags & RPM_ASYNC) { + dev->power.request = RPM_REQ_SUSPEND; + if (!dev->power.request_pending) { + dev->power.request_pending = true; + queue_work(pm_wq, &dev->power.work); + } + goto out; + } + __update_runtime_status(dev, RPM_SUSPENDING); dev->power.deferred_resume = false; @@ -267,6 +312,7 @@ static int __pm_runtime_suspend(struct device *dev, int rpmflags) if (retval) { __update_runtime_status(dev, RPM_ACTIVE); + dev->power.deferred_resume = 0; if (retval == -EAGAIN || retval == -EBUSY) { if (dev->power.timer_expires == 0) notify = true; @@ -292,7 +338,7 @@ static int __pm_runtime_suspend(struct device *dev, int rpmflags) } if (notify) - __pm_runtime_idle(dev); + __pm_runtime_idle(dev, 0); if (parent && !parent->power.ignore_children) { spin_unlock_irq(&dev->power.lock); @@ -329,13 +375,15 @@ EXPORT_SYMBOL_GPL(pm_runtime_suspend); * @dev: Device to resume. * @rpmflags: Flag bits. * - * Check if the device can be woken up and run the ->runtime_resume() callback - * provided by its bus type. If another resume has been started earlier, - * either return imediately or wait for it to finish, depending on the - * RPM_NOWAIT flag. If there's a suspend running in parallel with this - * function, either tell the other process to resume after suspending - * (deferred_resume) or wait for it to finish, depending on the RPM_NOWAIT - * flag. Cancel any scheduled or pending requests. + * Check if the device's run-time PM status allows it to be resumed. Cancel + * any scheduled or pending requests. If another resume has been started + * earlier, either return imediately or wait for it to finish, depending on the + * RPM_NOWAIT and RPM_ASYNC flags. Similarly, if there's a suspend running in + * parallel with this function, either tell the other process to resume after + * suspending (deferred_resume) or wait for it to finish. If the RPM_ASYNC + * flag is set then queue a resume request; otherwise run the + * ->runtime_resume() callback directly. Queue an idle notification for the + * device if the resume succeeded. * * This function must be called under dev->power.lock with interrupts disabled. */ @@ -348,28 +396,30 @@ static int __pm_runtime_resume(struct device *dev, int rpmflags) dev_dbg(dev, "%s flags 0x%x\n", __func__, rpmflags); repeat: - if (dev->power.runtime_error) { + if (dev->power.runtime_error) retval = -EINVAL; + else if (dev->power.disable_depth > 0) + retval = -EAGAIN; + if (retval) goto out; - } + /* Other scheduled or pending requests need to be canceled. */ pm_runtime_cancel_pending(dev); - if (dev->power.runtime_status == RPM_ACTIVE) + if (dev->power.runtime_status == RPM_ACTIVE) { retval = 1; - else if (dev->power.disable_depth > 0) - retval = -EAGAIN; - if (retval) goto out; + } if (dev->power.runtime_status == RPM_RESUMING || dev->power.runtime_status == RPM_SUSPENDING) { DEFINE_WAIT(wait); - if (rpmflags & RPM_NOWAIT) { + if (rpmflags & (RPM_ASYNC | RPM_NOWAIT)) { if (dev->power.runtime_status == RPM_SUSPENDING) dev->power.deferred_resume = true; - retval = -EINPROGRESS; + else + retval = -EINPROGRESS; goto out; } @@ -391,6 +441,17 @@ static int __pm_runtime_resume(struct device *dev, int rpmflags) goto repeat; } + /* Carry out an asynchronous or a synchronous resume. */ + if (rpmflags & RPM_ASYNC) { + dev->power.request = RPM_REQ_RESUME; + if (!dev->power.request_pending) { + dev->power.request_pending = true; + queue_work(pm_wq, &dev->power.work); + } + retval = 0; + goto out; + } + if (!parent && dev->parent) { /* * Increment the parent's resume counter and resume it if @@ -460,7 +521,7 @@ static int __pm_runtime_resume(struct device *dev, int rpmflags) wake_up_all(&dev->power.wait_queue); if (!retval) - __pm_request_idle(dev); + __pm_runtime_idle(dev, RPM_ASYNC); out: if (parent) { @@ -517,7 +578,7 @@ static void pm_runtime_work(struct work_struct *work) case RPM_REQ_NONE: break; case RPM_REQ_IDLE: - __pm_runtime_idle(dev); + __pm_runtime_idle(dev, RPM_NOWAIT); break; case RPM_REQ_SUSPEND: __pm_runtime_suspend(dev, RPM_NOWAIT); @@ -531,47 +592,6 @@ static void pm_runtime_work(struct work_struct *work) spin_unlock_irq(&dev->power.lock); } -/** - * __pm_request_idle - Submit an idle notification request for given device. - * @dev: Device to handle. - * - * Check if the device's run-time PM status is correct for suspending the device - * and queue up a request to run __pm_runtime_idle() for it. - * - * This function must be called under dev->power.lock with interrupts disabled. - */ -static int __pm_request_idle(struct device *dev) -{ - int retval = 0; - - if (dev->power.runtime_error) - retval = -EINVAL; - else if (atomic_read(&dev->power.usage_count) > 0 - || dev->power.disable_depth > 0 - || dev->power.runtime_status == RPM_SUSPENDED - || dev->power.runtime_status == RPM_SUSPENDING) - retval = -EAGAIN; - else if (!pm_children_suspended(dev)) - retval = -EBUSY; - if (retval) - return retval; - - if (dev->power.request_pending) { - /* Any requests other then RPM_REQ_IDLE take precedence. */ - if (dev->power.request == RPM_REQ_NONE) - dev->power.request = RPM_REQ_IDLE; - else if (dev->power.request != RPM_REQ_IDLE) - retval = -EAGAIN; - return retval; - } - - dev->power.request = RPM_REQ_IDLE; - dev->power.request_pending = true; - queue_work(pm_wq, &dev->power.work); - - return retval; -} - /** * pm_request_idle - Submit an idle notification request for given device. * @dev: Device to handle. @@ -582,67 +602,18 @@ int pm_request_idle(struct device *dev) int retval; spin_lock_irqsave(&dev->power.lock, flags); - retval = __pm_request_idle(dev); + retval = __pm_runtime_idle(dev, RPM_ASYNC); spin_unlock_irqrestore(&dev->power.lock, flags); return retval; } EXPORT_SYMBOL_GPL(pm_request_idle); -/** - * __pm_request_suspend - Submit a suspend request for given device. - * @dev: Device to suspend. - * - * This function must be called under dev->power.lock with interrupts disabled. - */ -static int __pm_request_suspend(struct device *dev) -{ - int retval = 0; - - if (dev->power.runtime_error) - return -EINVAL; - - if (dev->power.runtime_status == RPM_SUSPENDED) - retval = 1; - else if (atomic_read(&dev->power.usage_count) > 0 - || dev->power.disable_depth > 0) - retval = -EAGAIN; - else if (dev->power.runtime_status == RPM_SUSPENDING) - retval = -EINPROGRESS; - else if (!pm_children_suspended(dev)) - retval = -EBUSY; - if (retval < 0) - return retval; - - pm_runtime_deactivate_timer(dev); - - if (dev->power.request_pending) { - /* - * Pending resume requests take precedence over us, but we can - * overtake any other pending request. - */ - if (dev->power.request == RPM_REQ_RESUME) - retval = -EAGAIN; - else if (dev->power.request != RPM_REQ_SUSPEND) - dev->power.request = retval ? - RPM_REQ_NONE : RPM_REQ_SUSPEND; - return retval; - } else if (retval) { - return retval; - } - - dev->power.request = RPM_REQ_SUSPEND; - dev->power.request_pending = true; - queue_work(pm_wq, &dev->power.work); - - return 0; -} - /** * pm_suspend_timer_fn - Timer function for pm_schedule_suspend(). * @data: Device pointer passed by pm_schedule_suspend(). * - * Check if the time is right and execute __pm_request_suspend() in that case. + * Check if the time is right and queue a suspend request. */ static void pm_suspend_timer_fn(unsigned long data) { @@ -656,7 +627,7 @@ static void pm_suspend_timer_fn(unsigned long data) /* If 'expire' is after 'jiffies' we've been called too early. */ if (expires > 0 && !time_after(expires, jiffies)) { dev->power.timer_expires = 0; - __pm_request_suspend(dev); + __pm_runtime_suspend(dev, RPM_ASYNC); } spin_unlock_irqrestore(&dev->power.lock, flags); @@ -670,47 +641,24 @@ static void pm_suspend_timer_fn(unsigned long data) int pm_schedule_suspend(struct device *dev, unsigned int delay) { unsigned long flags; - int retval = 0; + int retval; spin_lock_irqsave(&dev->power.lock, flags); - if (dev->power.runtime_error) { - retval = -EINVAL; - goto out; - } - if (!delay) { - retval = __pm_request_suspend(dev); + retval = __pm_runtime_suspend(dev, RPM_ASYNC); goto out; } - pm_runtime_deactivate_timer(dev); - - if (dev->power.request_pending) { - /* - * Pending resume requests take precedence over us, but any - * other pending requests have to be canceled. - */ - if (dev->power.request == RPM_REQ_RESUME) { - retval = -EAGAIN; - goto out; - } - dev->power.request = RPM_REQ_NONE; - } - - if (dev->power.runtime_status == RPM_SUSPENDED) - retval = 1; - else if (atomic_read(&dev->power.usage_count) > 0 - || dev->power.disable_depth > 0) - retval = -EAGAIN; - else if (!pm_children_suspended(dev)) - retval = -EBUSY; + retval = rpm_check_suspend_allowed(dev); if (retval) goto out; + /* Other scheduled or pending requests need to be canceled. */ + pm_runtime_cancel_pending(dev); + dev->power.timer_expires = jiffies + msecs_to_jiffies(delay); - if (!dev->power.timer_expires) - dev->power.timer_expires = 1; + dev->power.timer_expires += !dev->power.timer_expires; mod_timer(&dev->power.suspend_timer, dev->power.timer_expires); out: @@ -720,49 +668,6 @@ int pm_schedule_suspend(struct device *dev, unsigned int delay) } EXPORT_SYMBOL_GPL(pm_schedule_suspend); -/** - * pm_request_resume - Submit a resume request for given device. - * @dev: Device to resume. - * - * This function must be called under dev->power.lock with interrupts disabled. - */ -static int __pm_request_resume(struct device *dev) -{ - int retval = 0; - - if (dev->power.runtime_error) - return -EINVAL; - - if (dev->power.runtime_status == RPM_ACTIVE) - retval = 1; - else if (dev->power.runtime_status == RPM_RESUMING) - retval = -EINPROGRESS; - else if (dev->power.disable_depth > 0) - retval = -EAGAIN; - if (retval < 0) - return retval; - - pm_runtime_deactivate_timer(dev); - - if (dev->power.runtime_status == RPM_SUSPENDING) { - dev->power.deferred_resume = true; - return retval; - } - if (dev->power.request_pending) { - /* If non-resume request is pending, we can overtake it. */ - dev->power.request = retval ? RPM_REQ_NONE : RPM_REQ_RESUME; - return retval; - } - if (retval) - return retval; - - dev->power.request = RPM_REQ_RESUME; - dev->power.request_pending = true; - queue_work(pm_wq, &dev->power.work); - - return retval; -} - /** * pm_request_resume - Submit a resume request for given device. * @dev: Device to resume. @@ -773,7 +678,7 @@ int pm_request_resume(struct device *dev) int retval; spin_lock_irqsave(&dev->power.lock, flags); - retval = __pm_request_resume(dev); + retval = __pm_runtime_resume(dev, RPM_ASYNC); spin_unlock_irqrestore(&dev->power.lock, flags); return retval; @@ -1088,7 +993,7 @@ void pm_runtime_allow(struct device *dev) dev->power.runtime_auto = true; if (atomic_dec_and_test(&dev->power.usage_count)) - __pm_runtime_idle(dev); + __pm_runtime_idle(dev, 0); out: spin_unlock_irq(&dev->power.lock); -- cgit v1.2.3 From 140a6c945211ee911dec776fafa52e03a7d7bb9a Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Sat, 25 Sep 2010 23:35:07 +0200 Subject: PM / Runtime: Combine runtime PM entry points This patch (as1424) combines the various public entry points for the runtime PM routines into three simple functions: one for idle, one for suspend, and one for resume. A new bitflag specifies whether or not to increment or decrement the usage_count field. The new entry points are named __pm_runtime_idle, __pm_runtime_suspend, and __pm_runtime_resume, to reflect that they are trampolines. Simultaneously, the corresponding internal routines are renamed to rpm_idle, rpm_suspend, and rpm_resume. Signed-off-by: Alan Stern Signed-off-by: Rafael J. Wysocki --- drivers/base/power/runtime.c | 174 ++++++++++++++++--------------------------- include/linux/pm_runtime.h | 66 +++++++++++----- 2 files changed, 110 insertions(+), 130 deletions(-) diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index d7b5d84c235c..ed227b7c1bb5 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -11,7 +11,7 @@ #include #include -static int __pm_runtime_resume(struct device *dev, int rpmflags); +static int rpm_resume(struct device *dev, int rpmflags); /** * update_pm_runtime_accounting - Update the time accounting of power states @@ -107,7 +107,7 @@ static int rpm_check_suspend_allowed(struct device *dev) /** - * __pm_runtime_idle - Notify device bus type if the device can be suspended. + * rpm_idle - Notify device bus type if the device can be suspended. * @dev: Device to notify the bus type about. * @rpmflags: Flag bits. * @@ -118,7 +118,7 @@ static int rpm_check_suspend_allowed(struct device *dev) * * This function must be called under dev->power.lock with interrupts disabled. */ -static int __pm_runtime_idle(struct device *dev, int rpmflags) +static int rpm_idle(struct device *dev, int rpmflags) __releases(&dev->power.lock) __acquires(&dev->power.lock) { int retval; @@ -189,23 +189,7 @@ static int __pm_runtime_idle(struct device *dev, int rpmflags) } /** - * pm_runtime_idle - Notify device bus type if the device can be suspended. - * @dev: Device to notify the bus type about. - */ -int pm_runtime_idle(struct device *dev) -{ - int retval; - - spin_lock_irq(&dev->power.lock); - retval = __pm_runtime_idle(dev, 0); - spin_unlock_irq(&dev->power.lock); - - return retval; -} -EXPORT_SYMBOL_GPL(pm_runtime_idle); - -/** - * __pm_runtime_suspend - Carry out run-time suspend of given device. + * rpm_suspend - Carry out run-time suspend of given device. * @dev: Device to suspend. * @rpmflags: Flag bits. * @@ -220,7 +204,7 @@ EXPORT_SYMBOL_GPL(pm_runtime_idle); * * This function must be called under dev->power.lock with interrupts disabled. */ -static int __pm_runtime_suspend(struct device *dev, int rpmflags) +static int rpm_suspend(struct device *dev, int rpmflags) __releases(&dev->power.lock) __acquires(&dev->power.lock) { struct device *parent = NULL; @@ -332,13 +316,13 @@ static int __pm_runtime_suspend(struct device *dev, int rpmflags) wake_up_all(&dev->power.wait_queue); if (dev->power.deferred_resume) { - __pm_runtime_resume(dev, 0); + rpm_resume(dev, 0); retval = -EAGAIN; goto out; } if (notify) - __pm_runtime_idle(dev, 0); + rpm_idle(dev, 0); if (parent && !parent->power.ignore_children) { spin_unlock_irq(&dev->power.lock); @@ -355,23 +339,7 @@ static int __pm_runtime_suspend(struct device *dev, int rpmflags) } /** - * pm_runtime_suspend - Carry out run-time suspend of given device. - * @dev: Device to suspend. - */ -int pm_runtime_suspend(struct device *dev) -{ - int retval; - - spin_lock_irq(&dev->power.lock); - retval = __pm_runtime_suspend(dev, 0); - spin_unlock_irq(&dev->power.lock); - - return retval; -} -EXPORT_SYMBOL_GPL(pm_runtime_suspend); - -/** - * __pm_runtime_resume - Carry out run-time resume of given device. + * rpm_resume - Carry out run-time resume of given device. * @dev: Device to resume. * @rpmflags: Flag bits. * @@ -387,7 +355,7 @@ EXPORT_SYMBOL_GPL(pm_runtime_suspend); * * This function must be called under dev->power.lock with interrupts disabled. */ -static int __pm_runtime_resume(struct device *dev, int rpmflags) +static int rpm_resume(struct device *dev, int rpmflags) __releases(&dev->power.lock) __acquires(&dev->power.lock) { struct device *parent = NULL; @@ -469,7 +437,7 @@ static int __pm_runtime_resume(struct device *dev, int rpmflags) */ if (!parent->power.disable_depth && !parent->power.ignore_children) { - __pm_runtime_resume(parent, 0); + rpm_resume(parent, 0); if (parent->power.runtime_status != RPM_ACTIVE) retval = -EBUSY; } @@ -521,7 +489,7 @@ static int __pm_runtime_resume(struct device *dev, int rpmflags) wake_up_all(&dev->power.wait_queue); if (!retval) - __pm_runtime_idle(dev, RPM_ASYNC); + rpm_idle(dev, RPM_ASYNC); out: if (parent) { @@ -537,22 +505,6 @@ static int __pm_runtime_resume(struct device *dev, int rpmflags) return retval; } -/** - * pm_runtime_resume - Carry out run-time resume of given device. - * @dev: Device to suspend. - */ -int pm_runtime_resume(struct device *dev) -{ - int retval; - - spin_lock_irq(&dev->power.lock); - retval = __pm_runtime_resume(dev, 0); - spin_unlock_irq(&dev->power.lock); - - return retval; -} -EXPORT_SYMBOL_GPL(pm_runtime_resume); - /** * pm_runtime_work - Universal run-time PM work function. * @work: Work structure used for scheduling the execution of this function. @@ -578,13 +530,13 @@ static void pm_runtime_work(struct work_struct *work) case RPM_REQ_NONE: break; case RPM_REQ_IDLE: - __pm_runtime_idle(dev, RPM_NOWAIT); + rpm_idle(dev, RPM_NOWAIT); break; case RPM_REQ_SUSPEND: - __pm_runtime_suspend(dev, RPM_NOWAIT); + rpm_suspend(dev, RPM_NOWAIT); break; case RPM_REQ_RESUME: - __pm_runtime_resume(dev, RPM_NOWAIT); + rpm_resume(dev, RPM_NOWAIT); break; } @@ -592,23 +544,6 @@ static void pm_runtime_work(struct work_struct *work) spin_unlock_irq(&dev->power.lock); } -/** - * pm_request_idle - Submit an idle notification request for given device. - * @dev: Device to handle. - */ -int pm_request_idle(struct device *dev) -{ - unsigned long flags; - int retval; - - spin_lock_irqsave(&dev->power.lock, flags); - retval = __pm_runtime_idle(dev, RPM_ASYNC); - spin_unlock_irqrestore(&dev->power.lock, flags); - - return retval; -} -EXPORT_SYMBOL_GPL(pm_request_idle); - /** * pm_suspend_timer_fn - Timer function for pm_schedule_suspend(). * @data: Device pointer passed by pm_schedule_suspend(). @@ -627,7 +562,7 @@ static void pm_suspend_timer_fn(unsigned long data) /* If 'expire' is after 'jiffies' we've been called too early. */ if (expires > 0 && !time_after(expires, jiffies)) { dev->power.timer_expires = 0; - __pm_runtime_suspend(dev, RPM_ASYNC); + rpm_suspend(dev, RPM_ASYNC); } spin_unlock_irqrestore(&dev->power.lock, flags); @@ -646,7 +581,7 @@ int pm_schedule_suspend(struct device *dev, unsigned int delay) spin_lock_irqsave(&dev->power.lock, flags); if (!delay) { - retval = __pm_runtime_suspend(dev, RPM_ASYNC); + retval = rpm_suspend(dev, RPM_ASYNC); goto out; } @@ -669,62 +604,81 @@ int pm_schedule_suspend(struct device *dev, unsigned int delay) EXPORT_SYMBOL_GPL(pm_schedule_suspend); /** - * pm_request_resume - Submit a resume request for given device. - * @dev: Device to resume. + * __pm_runtime_idle - Entry point for run-time idle operations. + * @dev: Device to send idle notification for. + * @rpmflags: Flag bits. + * + * If the RPM_GET_PUT flag is set, decrement the device's usage count and + * return immediately if it is larger than zero. Then carry out an idle + * notification, either synchronous or asynchronous. + * + * This routine may be called in atomic context if the RPM_ASYNC flag is set. */ -int pm_request_resume(struct device *dev) +int __pm_runtime_idle(struct device *dev, int rpmflags) { unsigned long flags; int retval; + if (rpmflags & RPM_GET_PUT) { + if (!atomic_dec_and_test(&dev->power.usage_count)) + return 0; + } + spin_lock_irqsave(&dev->power.lock, flags); - retval = __pm_runtime_resume(dev, RPM_ASYNC); + retval = rpm_idle(dev, rpmflags); spin_unlock_irqrestore(&dev->power.lock, flags); return retval; } -EXPORT_SYMBOL_GPL(pm_request_resume); +EXPORT_SYMBOL_GPL(__pm_runtime_idle); /** - * __pm_runtime_get - Reference count a device and wake it up, if necessary. - * @dev: Device to handle. + * __pm_runtime_suspend - Entry point for run-time put/suspend operations. + * @dev: Device to suspend. * @rpmflags: Flag bits. * - * Increment the usage count of the device and resume it or submit a resume - * request for it, depending on the RPM_ASYNC flag bit. + * Carry out a suspend, either synchronous or asynchronous. + * + * This routine may be called in atomic context if the RPM_ASYNC flag is set. */ -int __pm_runtime_get(struct device *dev, int rpmflags) +int __pm_runtime_suspend(struct device *dev, int rpmflags) { + unsigned long flags; int retval; - atomic_inc(&dev->power.usage_count); - retval = (rpmflags & RPM_ASYNC) ? - pm_request_resume(dev) : pm_runtime_resume(dev); + spin_lock_irqsave(&dev->power.lock, flags); + retval = rpm_suspend(dev, rpmflags); + spin_unlock_irqrestore(&dev->power.lock, flags); return retval; } -EXPORT_SYMBOL_GPL(__pm_runtime_get); +EXPORT_SYMBOL_GPL(__pm_runtime_suspend); /** - * __pm_runtime_put - Decrement the device's usage counter and notify its bus. - * @dev: Device to handle. + * __pm_runtime_resume - Entry point for run-time resume operations. + * @dev: Device to resume. * @rpmflags: Flag bits. * - * Decrement the usage count of the device and if it reaches zero, carry out a - * synchronous idle notification or submit an idle notification request for it, - * depending on the RPM_ASYNC flag bit. + * If the RPM_GET_PUT flag is set, increment the device's usage count. Then + * carry out a resume, either synchronous or asynchronous. + * + * This routine may be called in atomic context if the RPM_ASYNC flag is set. */ -int __pm_runtime_put(struct device *dev, int rpmflags) +int __pm_runtime_resume(struct device *dev, int rpmflags) { - int retval = 0; + unsigned long flags; + int retval; - if (atomic_dec_and_test(&dev->power.usage_count)) - retval = (rpmflags & RPM_ASYNC) ? - pm_request_idle(dev) : pm_runtime_idle(dev); + if (rpmflags & RPM_GET_PUT) + atomic_inc(&dev->power.usage_count); + + spin_lock_irqsave(&dev->power.lock, flags); + retval = rpm_resume(dev, rpmflags); + spin_unlock_irqrestore(&dev->power.lock, flags); return retval; } -EXPORT_SYMBOL_GPL(__pm_runtime_put); +EXPORT_SYMBOL_GPL(__pm_runtime_resume); /** * __pm_runtime_set_status - Set run-time PM status of a device. @@ -875,7 +829,7 @@ int pm_runtime_barrier(struct device *dev) if (dev->power.request_pending && dev->power.request == RPM_REQ_RESUME) { - __pm_runtime_resume(dev, 0); + rpm_resume(dev, 0); retval = 1; } @@ -924,7 +878,7 @@ void __pm_runtime_disable(struct device *dev, bool check_resume) */ pm_runtime_get_noresume(dev); - __pm_runtime_resume(dev, 0); + rpm_resume(dev, 0); pm_runtime_put_noidle(dev); } @@ -972,7 +926,7 @@ void pm_runtime_forbid(struct device *dev) dev->power.runtime_auto = false; atomic_inc(&dev->power.usage_count); - __pm_runtime_resume(dev, 0); + rpm_resume(dev, 0); out: spin_unlock_irq(&dev->power.lock); @@ -993,7 +947,7 @@ void pm_runtime_allow(struct device *dev) dev->power.runtime_auto = true; if (atomic_dec_and_test(&dev->power.usage_count)) - __pm_runtime_idle(dev, 0); + rpm_idle(dev, 0); out: spin_unlock_irq(&dev->power.lock); diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index c030cac59aac..5869d87fffac 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -16,19 +16,17 @@ #define RPM_ASYNC 0x01 /* Request is asynchronous */ #define RPM_NOWAIT 0x02 /* Don't wait for concurrent state change */ +#define RPM_GET_PUT 0x04 /* Increment/decrement the + usage_count */ #ifdef CONFIG_PM_RUNTIME extern struct workqueue_struct *pm_wq; -extern int pm_runtime_idle(struct device *dev); -extern int pm_runtime_suspend(struct device *dev); -extern int pm_runtime_resume(struct device *dev); -extern int pm_request_idle(struct device *dev); +extern int __pm_runtime_idle(struct device *dev, int rpmflags); +extern int __pm_runtime_suspend(struct device *dev, int rpmflags); +extern int __pm_runtime_resume(struct device *dev, int rpmflags); extern int pm_schedule_suspend(struct device *dev, unsigned int delay); -extern int pm_request_resume(struct device *dev); -extern int __pm_runtime_get(struct device *dev, int rpmflags); -extern int __pm_runtime_put(struct device *dev, int rpmflags); extern int __pm_runtime_set_status(struct device *dev, unsigned int status); extern int pm_runtime_barrier(struct device *dev); extern void pm_runtime_enable(struct device *dev); @@ -77,19 +75,22 @@ static inline bool pm_runtime_suspended(struct device *dev) #else /* !CONFIG_PM_RUNTIME */ -static inline int pm_runtime_idle(struct device *dev) { return -ENOSYS; } -static inline int pm_runtime_suspend(struct device *dev) { return -ENOSYS; } -static inline int pm_runtime_resume(struct device *dev) { return 0; } -static inline int pm_request_idle(struct device *dev) { return -ENOSYS; } +static inline int __pm_runtime_idle(struct device *dev, int rpmflags) +{ + return -ENOSYS; +} +static inline int __pm_runtime_suspend(struct device *dev, int rpmflags) +{ + return -ENOSYS; +} +static inline int __pm_runtime_resume(struct device *dev, int rpmflags) +{ + return 1; +} static inline int pm_schedule_suspend(struct device *dev, unsigned int delay) { return -ENOSYS; } -static inline int pm_request_resume(struct device *dev) { return 0; } -static inline int __pm_runtime_get(struct device *dev, int rpmflags) - { return 1; } -static inline int __pm_runtime_put(struct device *dev, int rpmflags) - { return 0; } static inline int __pm_runtime_set_status(struct device *dev, unsigned int status) { return 0; } static inline int pm_runtime_barrier(struct device *dev) { return 0; } @@ -112,24 +113,49 @@ static inline int pm_generic_runtime_resume(struct device *dev) { return 0; } #endif /* !CONFIG_PM_RUNTIME */ +static inline int pm_runtime_idle(struct device *dev) +{ + return __pm_runtime_idle(dev, 0); +} + +static inline int pm_runtime_suspend(struct device *dev) +{ + return __pm_runtime_suspend(dev, 0); +} + +static inline int pm_runtime_resume(struct device *dev) +{ + return __pm_runtime_resume(dev, 0); +} + +static inline int pm_request_idle(struct device *dev) +{ + return __pm_runtime_idle(dev, RPM_ASYNC); +} + +static inline int pm_request_resume(struct device *dev) +{ + return __pm_runtime_resume(dev, RPM_ASYNC); +} + static inline int pm_runtime_get(struct device *dev) { - return __pm_runtime_get(dev, RPM_ASYNC); + return __pm_runtime_resume(dev, RPM_GET_PUT | RPM_ASYNC); } static inline int pm_runtime_get_sync(struct device *dev) { - return __pm_runtime_get(dev, 0); + return __pm_runtime_resume(dev, RPM_GET_PUT); } static inline int pm_runtime_put(struct device *dev) { - return __pm_runtime_put(dev, RPM_ASYNC); + return __pm_runtime_idle(dev, RPM_GET_PUT | RPM_ASYNC); } static inline int pm_runtime_put_sync(struct device *dev) { - return __pm_runtime_put(dev, 0); + return __pm_runtime_idle(dev, RPM_GET_PUT); } static inline int pm_runtime_set_active(struct device *dev) -- cgit v1.2.3 From 7490e44239e60293bca0c2663229050c36c660c2 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Sat, 25 Sep 2010 23:35:15 +0200 Subject: PM / Runtime: Add no_callbacks flag Some devices, such as USB interfaces, cannot be power-managed independently of their parents, i.e., they cannot be put in low power while the parent remains at full power. This patch (as1425) creates a new "no_callbacks" flag, which tells the PM core not to invoke the runtime-PM callback routines for the such devices but instead to assume that the callbacks always succeed. In addition, the non-debugging runtime-PM sysfs attributes for the devices are removed, since they are pretty much meaningless. The advantage of this scheme comes not so much from avoiding the callbacks themselves, but rather from the fact that without the need for a process context in which to run the callbacks, more work can be done in interrupt context. Signed-off-by: Alan Stern Signed-off-by: Rafael J. Wysocki --- Documentation/power/runtime_pm.txt | 37 +++++++++++++++++++++++++ drivers/base/power/power.h | 1 + drivers/base/power/runtime.c | 54 +++++++++++++++++++++++++++++++++++- drivers/base/power/sysfs.c | 56 +++++++++++++++++++++++++++++++++----- include/linux/pm.h | 7 +++++ include/linux/pm_runtime.h | 2 ++ 6 files changed, 149 insertions(+), 8 deletions(-) diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt index 55b859b3bc72..9ba49b21ac86 100644 --- a/Documentation/power/runtime_pm.txt +++ b/Documentation/power/runtime_pm.txt @@ -1,6 +1,7 @@ Run-time Power Management Framework for I/O Devices (C) 2009 Rafael J. Wysocki , Novell Inc. +(C) 2010 Alan Stern 1. Introduction @@ -230,6 +231,11 @@ defined in include/linux/pm.h: interface; it may only be modified with the help of the pm_runtime_allow() and pm_runtime_forbid() helper functions + unsigned int no_callbacks; + - indicates that the device does not use the run-time PM callbacks (see + Section 8); it may be modified only by the pm_runtime_no_callbacks() + helper function + All of the above fields are members of the 'power' member of 'struct device'. 4. Run-time PM Device Helper Functions @@ -349,6 +355,11 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h: counter (used by the /sys/devices/.../power/control interface to effectively prevent the device from being power managed at run time) + void pm_runtime_no_callbacks(struct device *dev); + - set the power.no_callbacks flag for the device and remove the run-time + PM attributes from /sys/devices/.../power (or prevent them from being + added when the device is registered) + It is safe to execute the following helper functions from interrupt context: pm_request_idle() @@ -524,3 +535,29 @@ poweroff and run-time suspend callback, and similarly for system resume, thaw, restore, and run-time resume, can achieve this with the help of the UNIVERSAL_DEV_PM_OPS macro defined in include/linux/pm.h (possibly setting its last argument to NULL). + +8. "No-Callback" Devices + +Some "devices" are only logical sub-devices of their parent and cannot be +power-managed on their own. (The prototype example is a USB interface. Entire +USB devices can go into low-power mode or send wake-up requests, but neither is +possible for individual interfaces.) The drivers for these devices have no +need of run-time PM callbacks; if the callbacks did exist, ->runtime_suspend() +and ->runtime_resume() would always return 0 without doing anything else and +->runtime_idle() would always call pm_runtime_suspend(). + +Subsystems can tell the PM core about these devices by calling +pm_runtime_no_callbacks(). This should be done after the device structure is +initialized and before it is registered (although after device registration is +also okay). The routine will set the device's power.no_callbacks flag and +prevent the non-debugging run-time PM sysfs attributes from being created. + +When power.no_callbacks is set, the PM core will not invoke the +->runtime_idle(), ->runtime_suspend(), or ->runtime_resume() callbacks. +Instead it will assume that suspends and resumes always succeed and that idle +devices should be suspended. + +As a consequence, the PM core will never directly inform the device's subsystem +or driver about run-time power changes. Instead, the driver for the device's +parent must take responsibility for telling the device's driver when the +parent's power state changes. diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h index 8b2745c69e2a..698dde742587 100644 --- a/drivers/base/power/power.h +++ b/drivers/base/power/power.h @@ -60,6 +60,7 @@ static inline void device_pm_move_last(struct device *dev) {} extern int dpm_sysfs_add(struct device *); extern void dpm_sysfs_remove(struct device *); +extern void rpm_sysfs_remove(struct device *); #else /* CONFIG_PM */ diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index ed227b7c1bb5..5bd4daa93ef1 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -10,8 +10,10 @@ #include #include #include +#include "power.h" static int rpm_resume(struct device *dev, int rpmflags); +static int rpm_suspend(struct device *dev, int rpmflags); /** * update_pm_runtime_accounting - Update the time accounting of power states @@ -148,6 +150,12 @@ static int rpm_idle(struct device *dev, int rpmflags) /* Pending requests need to be canceled. */ dev->power.request = RPM_REQ_NONE; + if (dev->power.no_callbacks) { + /* Assume ->runtime_idle() callback would have suspended. */ + retval = rpm_suspend(dev, rpmflags); + goto out; + } + /* Carry out an asynchronous or a synchronous idle notification. */ if (rpmflags & RPM_ASYNC) { dev->power.request = RPM_REQ_IDLE; @@ -254,6 +262,10 @@ static int rpm_suspend(struct device *dev, int rpmflags) goto repeat; } + dev->power.deferred_resume = false; + if (dev->power.no_callbacks) + goto no_callback; /* Assume success. */ + /* Carry out an asynchronous or a synchronous suspend. */ if (rpmflags & RPM_ASYNC) { dev->power.request = RPM_REQ_SUSPEND; @@ -265,7 +277,6 @@ static int rpm_suspend(struct device *dev, int rpmflags) } __update_runtime_status(dev, RPM_SUSPENDING); - dev->power.deferred_resume = false; if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_suspend) { spin_unlock_irq(&dev->power.lock); @@ -305,6 +316,7 @@ static int rpm_suspend(struct device *dev, int rpmflags) pm_runtime_cancel_pending(dev); } } else { + no_callback: __update_runtime_status(dev, RPM_SUSPENDED); pm_runtime_deactivate_timer(dev); @@ -409,6 +421,23 @@ static int rpm_resume(struct device *dev, int rpmflags) goto repeat; } + /* + * See if we can skip waking up the parent. This is safe only if + * power.no_callbacks is set, because otherwise we don't know whether + * the resume will actually succeed. + */ + if (dev->power.no_callbacks && !parent && dev->parent) { + spin_lock(&dev->parent->power.lock); + if (dev->parent->power.disable_depth > 0 + || dev->parent->power.ignore_children + || dev->parent->power.runtime_status == RPM_ACTIVE) { + atomic_inc(&dev->parent->power.child_count); + spin_unlock(&dev->parent->power.lock); + goto no_callback; /* Assume success. */ + } + spin_unlock(&dev->parent->power.lock); + } + /* Carry out an asynchronous or a synchronous resume. */ if (rpmflags & RPM_ASYNC) { dev->power.request = RPM_REQ_RESUME; @@ -449,6 +478,9 @@ static int rpm_resume(struct device *dev, int rpmflags) goto repeat; } + if (dev->power.no_callbacks) + goto no_callback; /* Assume success. */ + __update_runtime_status(dev, RPM_RESUMING); if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_resume) { @@ -482,6 +514,7 @@ static int rpm_resume(struct device *dev, int rpmflags) __update_runtime_status(dev, RPM_SUSPENDED); pm_runtime_cancel_pending(dev); } else { + no_callback: __update_runtime_status(dev, RPM_ACTIVE); if (parent) atomic_inc(&parent->power.child_count); @@ -954,6 +987,25 @@ void pm_runtime_allow(struct device *dev) } EXPORT_SYMBOL_GPL(pm_runtime_allow); +/** + * pm_runtime_no_callbacks - Ignore run-time PM callbacks for a device. + * @dev: Device to handle. + * + * Set the power.no_callbacks flag, which tells the PM core that this + * device is power-managed through its parent and has no run-time PM + * callbacks of its own. The run-time sysfs attributes will be removed. + * + */ +void pm_runtime_no_callbacks(struct device *dev) +{ + spin_lock_irq(&dev->power.lock); + dev->power.no_callbacks = 1; + spin_unlock_irq(&dev->power.lock); + if (device_is_registered(dev)) + rpm_sysfs_remove(dev); +} +EXPORT_SYMBOL_GPL(pm_runtime_no_callbacks); + /** * pm_runtime_init - Initialize run-time PM fields in given device object. * @dev: Device object to initialize. diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index 8859780817e1..b5708c47ce2d 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -81,6 +81,9 @@ static const char enabled[] = "enabled"; static const char disabled[] = "disabled"; +const char power_group_name[] = "power"; +EXPORT_SYMBOL_GPL(power_group_name); + #ifdef CONFIG_PM_RUNTIME static const char ctrl_auto[] = "auto"; static const char ctrl_on[] = "on"; @@ -390,12 +393,6 @@ static DEVICE_ATTR(async, 0644, async_show, async_store); #endif /* CONFIG_PM_ADVANCED_DEBUG */ static struct attribute * power_attrs[] = { -#ifdef CONFIG_PM_RUNTIME - &dev_attr_control.attr, - &dev_attr_runtime_status.attr, - &dev_attr_runtime_suspended_time.attr, - &dev_attr_runtime_active_time.attr, -#endif &dev_attr_wakeup.attr, #ifdef CONFIG_PM_SLEEP &dev_attr_wakeup_count.attr, @@ -409,6 +406,7 @@ static struct attribute * power_attrs[] = { #ifdef CONFIG_PM_ADVANCED_DEBUG &dev_attr_async.attr, #ifdef CONFIG_PM_RUNTIME + &dev_attr_runtime_status.attr, &dev_attr_runtime_usage.attr, &dev_attr_runtime_active_kids.attr, &dev_attr_runtime_enabled.attr, @@ -417,10 +415,52 @@ static struct attribute * power_attrs[] = { NULL, }; static struct attribute_group pm_attr_group = { - .name = "power", + .name = power_group_name, .attrs = power_attrs, }; +#ifdef CONFIG_PM_RUNTIME + +static struct attribute *runtime_attrs[] = { +#ifndef CONFIG_PM_ADVANCED_DEBUG + &dev_attr_runtime_status.attr, +#endif + &dev_attr_control.attr, + &dev_attr_runtime_suspended_time.attr, + &dev_attr_runtime_active_time.attr, + NULL, +}; +static struct attribute_group pm_runtime_attr_group = { + .name = power_group_name, + .attrs = runtime_attrs, +}; + +int dpm_sysfs_add(struct device *dev) +{ + int rc; + + rc = sysfs_create_group(&dev->kobj, &pm_attr_group); + if (rc == 0 && !dev->power.no_callbacks) { + rc = sysfs_merge_group(&dev->kobj, &pm_runtime_attr_group); + if (rc) + sysfs_remove_group(&dev->kobj, &pm_attr_group); + } + return rc; +} + +void rpm_sysfs_remove(struct device *dev) +{ + sysfs_unmerge_group(&dev->kobj, &pm_runtime_attr_group); +} + +void dpm_sysfs_remove(struct device *dev) +{ + rpm_sysfs_remove(dev); + sysfs_remove_group(&dev->kobj, &pm_attr_group); +} + +#else /* CONFIG_PM_RUNTIME */ + int dpm_sysfs_add(struct device * dev) { return sysfs_create_group(&dev->kobj, &pm_attr_group); @@ -430,3 +470,5 @@ void dpm_sysfs_remove(struct device * dev) { sysfs_remove_group(&dev->kobj, &pm_attr_group); } + +#endif diff --git a/include/linux/pm.h b/include/linux/pm.h index 1abfe84f447d..abd81ffaba3c 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -41,6 +41,12 @@ extern void (*pm_power_off_prepare)(void); struct device; +#ifdef CONFIG_PM +extern const char power_group_name[]; /* = "power" */ +#else +#define power_group_name NULL +#endif + typedef struct pm_message { int event; } pm_message_t; @@ -475,6 +481,7 @@ struct dev_pm_info { unsigned int deferred_resume:1; unsigned int run_wake:1; unsigned int runtime_auto:1; + unsigned int no_callbacks:1; enum rpm_request request; enum rpm_status runtime_status; int runtime_error; diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 5869d87fffac..8ca52f7c357e 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -36,6 +36,7 @@ extern void pm_runtime_forbid(struct device *dev); extern int pm_generic_runtime_idle(struct device *dev); extern int pm_generic_runtime_suspend(struct device *dev); extern int pm_generic_runtime_resume(struct device *dev); +extern void pm_runtime_no_callbacks(struct device *dev); static inline bool pm_children_suspended(struct device *dev) { @@ -110,6 +111,7 @@ static inline bool pm_runtime_suspended(struct device *dev) { return false; } static inline int pm_generic_runtime_idle(struct device *dev) { return 0; } static inline int pm_generic_runtime_suspend(struct device *dev) { return 0; } static inline int pm_generic_runtime_resume(struct device *dev) { return 0; } +static inline void pm_runtime_no_callbacks(struct device *dev) {} #endif /* !CONFIG_PM_RUNTIME */ -- cgit v1.2.3 From 15bcb91d7e607d8a2e060f01f7784a7454668da4 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Sat, 25 Sep 2010 23:35:21 +0200 Subject: PM / Runtime: Implement autosuspend support This patch (as1427) implements the "autosuspend" facility for runtime PM. A few new fields are added to the dev_pm_info structure and several new PM helper functions are defined, for telling the PM core whether or not a device uses autosuspend, for setting the autosuspend delay, and for marking periods of device activity. Drivers that do not want to use autosuspend can continue using the same helper functions as before; their behavior will not change. In addition, drivers supporting autosuspend can also call the old helper functions to get the old behavior. The details are all explained in Documentation/power/runtime_pm.txt and Documentation/ABI/testing/sysfs-devices-power. Signed-off-by: Alan Stern Signed-off-by: Rafael J. Wysocki --- Documentation/ABI/testing/sysfs-devices-power | 18 +++ Documentation/power/runtime_pm.txt | 190 +++++++++++++++++++++++++- drivers/base/power/runtime.c | 186 ++++++++++++++++++++++++- drivers/base/power/sysfs.c | 40 ++++++ include/linux/pm.h | 8 ++ include/linux/pm_runtime.h | 45 ++++++ 6 files changed, 473 insertions(+), 14 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-devices-power b/Documentation/ABI/testing/sysfs-devices-power index 6bb2dd3c3a71..7628cd1bc36a 100644 --- a/Documentation/ABI/testing/sysfs-devices-power +++ b/Documentation/ABI/testing/sysfs-devices-power @@ -147,3 +147,21 @@ Description: milliseconds. This attribute is read-only. If the device is not enabled to wake up the system from sleep states, this attribute is empty. + +What: /sys/devices/.../power/autosuspend_delay_ms +Date: September 2010 +Contact: Alan Stern +Description: + The /sys/devices/.../power/autosuspend_delay_ms attribute + contains the autosuspend delay value (in milliseconds). Some + drivers do not want their device to suspend as soon as it + becomes idle at run time; they want the device to remain + inactive for a certain minimum period of time first. That + period is called the autosuspend delay. Negative values will + prevent the device from being suspended at run time (similar + to writing "on" to the power/control attribute). Values >= + 1000 will cause the autosuspend timer expiration to be rounded + up to the nearest second. + + Not all drivers support this attribute. If it isn't supported, + attempts to read or write it will yield I/O errors. diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt index 9ba49b21ac86..489e9bacd165 100644 --- a/Documentation/power/runtime_pm.txt +++ b/Documentation/power/runtime_pm.txt @@ -158,7 +158,8 @@ rules: to execute it, the other callbacks will not be executed for the same device. * A request to execute ->runtime_resume() will cancel any pending or - scheduled requests to execute the other callbacks for the same device. + scheduled requests to execute the other callbacks for the same device, + except for scheduled autosuspends. 3. Run-time PM Device Fields @@ -166,7 +167,7 @@ The following device run-time PM fields are present in 'struct dev_pm_info', as defined in include/linux/pm.h: struct timer_list suspend_timer; - - timer used for scheduling (delayed) suspend request + - timer used for scheduling (delayed) suspend and autosuspend requests unsigned long timer_expires; - timer expiration time, in jiffies (if this is different from zero, the @@ -236,6 +237,23 @@ defined in include/linux/pm.h: Section 8); it may be modified only by the pm_runtime_no_callbacks() helper function + unsigned int use_autosuspend; + - indicates that the device's driver supports delayed autosuspend (see + Section 9); it may be modified only by the + pm_runtime{_dont}_use_autosuspend() helper functions + + unsigned int timer_autosuspends; + - indicates that the PM core should attempt to carry out an autosuspend + when the timer expires rather than a normal suspend + + int autosuspend_delay; + - the delay time (in milliseconds) to be used for autosuspend + + unsigned long last_busy; + - the time (in jiffies) when the pm_runtime_mark_last_busy() helper + function was last called for this device; used in calculating inactivity + periods for autosuspend + All of the above fields are members of the 'power' member of 'struct device'. 4. Run-time PM Device Helper Functions @@ -261,6 +279,12 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h: error code on failure, where -EAGAIN or -EBUSY means it is safe to attempt to suspend the device again in future + int pm_runtime_autosuspend(struct device *dev); + - same as pm_runtime_suspend() except that the autosuspend delay is taken + into account; if pm_runtime_autosuspend_expiration() says the delay has + not yet expired then an autosuspend is scheduled for the appropriate time + and 0 is returned + int pm_runtime_resume(struct device *dev); - execute the subsystem-level resume callback for the device; returns 0 on success, 1 if the device's run-time PM status was already 'active' or @@ -273,6 +297,11 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h: device (the request is represented by a work item in pm_wq); returns 0 on success or error code if the request has not been queued up + int pm_request_autosuspend(struct device *dev); + - schedule the execution of the subsystem-level suspend callback for the + device when the autosuspend delay has expired; if the delay has already + expired then the work item is queued up immediately + int pm_schedule_suspend(struct device *dev, unsigned int delay); - schedule the execution of the subsystem-level suspend callback for the device in future, where 'delay' is the time to wait before queuing up a @@ -304,12 +333,20 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h: - decrement the device's usage counter int pm_runtime_put(struct device *dev); - - decrement the device's usage counter, run pm_request_idle(dev) and return - its result + - decrement the device's usage counter; if the result is 0 then run + pm_request_idle(dev) and return its result + + int pm_runtime_put_autosuspend(struct device *dev); + - decrement the device's usage counter; if the result is 0 then run + pm_request_autosuspend(dev) and return its result int pm_runtime_put_sync(struct device *dev); - - decrement the device's usage counter, run pm_runtime_idle(dev) and return - its result + - decrement the device's usage counter; if the result is 0 then run + pm_runtime_idle(dev) and return its result + + int pm_runtime_put_sync_autosuspend(struct device *dev); + - decrement the device's usage counter; if the result is 0 then run + pm_runtime_autosuspend(dev) and return its result void pm_runtime_enable(struct device *dev); - enable the run-time PM helper functions to run the device bus type's @@ -360,19 +397,46 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h: PM attributes from /sys/devices/.../power (or prevent them from being added when the device is registered) + void pm_runtime_mark_last_busy(struct device *dev); + - set the power.last_busy field to the current time + + void pm_runtime_use_autosuspend(struct device *dev); + - set the power.use_autosuspend flag, enabling autosuspend delays + + void pm_runtime_dont_use_autosuspend(struct device *dev); + - clear the power.use_autosuspend flag, disabling autosuspend delays + + void pm_runtime_set_autosuspend_delay(struct device *dev, int delay); + - set the power.autosuspend_delay value to 'delay' (expressed in + milliseconds); if 'delay' is negative then run-time suspends are + prevented + + unsigned long pm_runtime_autosuspend_expiration(struct device *dev); + - calculate the time when the current autosuspend delay period will expire, + based on power.last_busy and power.autosuspend_delay; if the delay time + is 1000 ms or larger then the expiration time is rounded up to the + nearest second; returns 0 if the delay period has already expired or + power.use_autosuspend isn't set, otherwise returns the expiration time + in jiffies + It is safe to execute the following helper functions from interrupt context: pm_request_idle() +pm_request_autosuspend() pm_schedule_suspend() pm_request_resume() pm_runtime_get_noresume() pm_runtime_get() pm_runtime_put_noidle() pm_runtime_put() +pm_runtime_put_autosuspend() +pm_runtime_enable() pm_suspend_ignore_children() pm_runtime_set_active() pm_runtime_set_suspended() -pm_runtime_enable() +pm_runtime_suspended() +pm_runtime_mark_last_busy() +pm_runtime_autosuspend_expiration() 5. Run-time PM Initialization, Device Probing and Removal @@ -561,3 +625,115 @@ As a consequence, the PM core will never directly inform the device's subsystem or driver about run-time power changes. Instead, the driver for the device's parent must take responsibility for telling the device's driver when the parent's power state changes. + +9. Autosuspend, or automatically-delayed suspends + +Changing a device's power state isn't free; it requires both time and energy. +A device should be put in a low-power state only when there's some reason to +think it will remain in that state for a substantial time. A common heuristic +says that a device which hasn't been used for a while is liable to remain +unused; following this advice, drivers should not allow devices to be suspended +at run-time until they have been inactive for some minimum period. Even when +the heuristic ends up being non-optimal, it will still prevent devices from +"bouncing" too rapidly between low-power and full-power states. + +The term "autosuspend" is an historical remnant. It doesn't mean that the +device is automatically suspended (the subsystem or driver still has to call +the appropriate PM routines); rather it means that run-time suspends will +automatically be delayed until the desired period of inactivity has elapsed. + +Inactivity is determined based on the power.last_busy field. Drivers should +call pm_runtime_mark_last_busy() to update this field after carrying out I/O, +typically just before calling pm_runtime_put_autosuspend(). The desired length +of the inactivity period is a matter of policy. Subsystems can set this length +initially by calling pm_runtime_set_autosuspend_delay(), but after device +registration the length should be controlled by user space, using the +/sys/devices/.../power/autosuspend_delay_ms attribute. + +In order to use autosuspend, subsystems or drivers must call +pm_runtime_use_autosuspend() (preferably before registering the device), and +thereafter they should use the various *_autosuspend() helper functions instead +of the non-autosuspend counterparts: + + Instead of: pm_runtime_suspend use: pm_runtime_autosuspend; + Instead of: pm_schedule_suspend use: pm_request_autosuspend; + Instead of: pm_runtime_put use: pm_runtime_put_autosuspend; + Instead of: pm_runtime_put_sync use: pm_runtime_put_sync_autosuspend. + +Drivers may also continue to use the non-autosuspend helper functions; they +will behave normally, not taking the autosuspend delay into account. +Similarly, if the power.use_autosuspend field isn't set then the autosuspend +helper functions will behave just like the non-autosuspend counterparts. + +The implementation is well suited for asynchronous use in interrupt contexts. +However such use inevitably involves races, because the PM core can't +synchronize ->runtime_suspend() callbacks with the arrival of I/O requests. +This synchronization must be handled by the driver, using its private lock. +Here is a schematic pseudo-code example: + + foo_read_or_write(struct foo_priv *foo, void *data) + { + lock(&foo->private_lock); + add_request_to_io_queue(foo, data); + if (foo->num_pending_requests++ == 0) + pm_runtime_get(&foo->dev); + if (!foo->is_suspended) + foo_process_next_request(foo); + unlock(&foo->private_lock); + } + + foo_io_completion(struct foo_priv *foo, void *req) + { + lock(&foo->private_lock); + if (--foo->num_pending_requests == 0) { + pm_runtime_mark_last_busy(&foo->dev); + pm_runtime_put_autosuspend(&foo->dev); + } else { + foo_process_next_request(foo); + } + unlock(&foo->private_lock); + /* Send req result back to the user ... */ + } + + int foo_runtime_suspend(struct device *dev) + { + struct foo_priv foo = container_of(dev, ...); + int ret = 0; + + lock(&foo->private_lock); + if (foo->num_pending_requests > 0) { + ret = -EBUSY; + } else { + /* ... suspend the device ... */ + foo->is_suspended = 1; + } + unlock(&foo->private_lock); + return ret; + } + + int foo_runtime_resume(struct device *dev) + { + struct foo_priv foo = container_of(dev, ...); + + lock(&foo->private_lock); + /* ... resume the device ... */ + foo->is_suspended = 0; + pm_runtime_mark_last_busy(&foo->dev); + if (foo->num_pending_requests > 0) + foo_process_requests(foo); + unlock(&foo->private_lock); + return 0; + } + +The important point is that after foo_io_completion() asks for an autosuspend, +the foo_runtime_suspend() callback may race with foo_read_or_write(). +Therefore foo_runtime_suspend() has to check whether there are any pending I/O +requests (while holding the private lock) before allowing the suspend to +proceed. + +In addition, the power.autosuspend_delay field can be changed by user space at +any time. If a driver cares about this, it can call +pm_runtime_autosuspend_expiration() from within the ->runtime_suspend() +callback while holding its private lock. If the function returns a nonzero +value then the delay has not yet expired and the callback should return +-EAGAIN. diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 5bd4daa93ef1..cd4e100a1362 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -9,7 +9,6 @@ #include #include -#include #include "power.h" static int rpm_resume(struct device *dev, int rpmflags); @@ -79,6 +78,53 @@ static void pm_runtime_cancel_pending(struct device *dev) dev->power.request = RPM_REQ_NONE; } +/* + * pm_runtime_autosuspend_expiration - Get a device's autosuspend-delay expiration time. + * @dev: Device to handle. + * + * Compute the autosuspend-delay expiration time based on the device's + * power.last_busy time. If the delay has already expired or is disabled + * (negative) or the power.use_autosuspend flag isn't set, return 0. + * Otherwise return the expiration time in jiffies (adjusted to be nonzero). + * + * This function may be called either with or without dev->power.lock held. + * Either way it can be racy, since power.last_busy may be updated at any time. + */ +unsigned long pm_runtime_autosuspend_expiration(struct device *dev) +{ + int autosuspend_delay; + long elapsed; + unsigned long last_busy; + unsigned long expires = 0; + + if (!dev->power.use_autosuspend) + goto out; + + autosuspend_delay = ACCESS_ONCE(dev->power.autosuspend_delay); + if (autosuspend_delay < 0) + goto out; + + last_busy = ACCESS_ONCE(dev->power.last_busy); + elapsed = jiffies - last_busy; + if (elapsed < 0) + goto out; /* jiffies has wrapped around. */ + + /* + * If the autosuspend_delay is >= 1 second, align the timer by rounding + * up to the nearest second. + */ + expires = last_busy + msecs_to_jiffies(autosuspend_delay); + if (autosuspend_delay >= 1000) + expires = round_jiffies(expires); + expires += !expires; + if (elapsed >= expires - last_busy) + expires = 0; /* Already expired. */ + + out: + return expires; +} +EXPORT_SYMBOL_GPL(pm_runtime_autosuspend_expiration); + /** * rpm_check_suspend_allowed - Test whether a device may be suspended. * @dev: Device to test. @@ -234,6 +280,32 @@ static int rpm_suspend(struct device *dev, int rpmflags) if (retval) goto out; + /* If the autosuspend_delay time hasn't expired yet, reschedule. */ + if ((rpmflags & RPM_AUTO) + && dev->power.runtime_status != RPM_SUSPENDING) { + unsigned long expires = pm_runtime_autosuspend_expiration(dev); + + if (expires != 0) { + /* Pending requests need to be canceled. */ + dev->power.request = RPM_REQ_NONE; + + /* + * Optimization: If the timer is already running and is + * set to expire at or before the autosuspend delay, + * avoid the overhead of resetting it. Just let it + * expire; pm_suspend_timer_fn() will take care of the + * rest. + */ + if (!(dev->power.timer_expires && time_before_eq( + dev->power.timer_expires, expires))) { + dev->power.timer_expires = expires; + mod_timer(&dev->power.suspend_timer, expires); + } + dev->power.timer_autosuspends = 1; + goto out; + } + } + /* Other scheduled or pending requests need to be canceled. */ pm_runtime_cancel_pending(dev); @@ -268,7 +340,8 @@ static int rpm_suspend(struct device *dev, int rpmflags) /* Carry out an asynchronous or a synchronous suspend. */ if (rpmflags & RPM_ASYNC) { - dev->power.request = RPM_REQ_SUSPEND; + dev->power.request = (rpmflags & RPM_AUTO) ? + RPM_REQ_AUTOSUSPEND : RPM_REQ_SUSPEND; if (!dev->power.request_pending) { dev->power.request_pending = true; queue_work(pm_wq, &dev->power.work); @@ -383,8 +456,15 @@ static int rpm_resume(struct device *dev, int rpmflags) if (retval) goto out; - /* Other scheduled or pending requests need to be canceled. */ - pm_runtime_cancel_pending(dev); + /* + * Other scheduled or pending requests need to be canceled. Small + * optimization: If an autosuspend timer is running, leave it running + * rather than cancelling it now only to restart it again in the near + * future. + */ + dev->power.request = RPM_REQ_NONE; + if (!dev->power.timer_autosuspends) + pm_runtime_deactivate_timer(dev); if (dev->power.runtime_status == RPM_ACTIVE) { retval = 1; @@ -568,6 +648,9 @@ static void pm_runtime_work(struct work_struct *work) case RPM_REQ_SUSPEND: rpm_suspend(dev, RPM_NOWAIT); break; + case RPM_REQ_AUTOSUSPEND: + rpm_suspend(dev, RPM_NOWAIT | RPM_AUTO); + break; case RPM_REQ_RESUME: rpm_resume(dev, RPM_NOWAIT); break; @@ -595,7 +678,8 @@ static void pm_suspend_timer_fn(unsigned long data) /* If 'expire' is after 'jiffies' we've been called too early. */ if (expires > 0 && !time_after(expires, jiffies)) { dev->power.timer_expires = 0; - rpm_suspend(dev, RPM_ASYNC); + rpm_suspend(dev, dev->power.timer_autosuspends ? + (RPM_ASYNC | RPM_AUTO) : RPM_ASYNC); } spin_unlock_irqrestore(&dev->power.lock, flags); @@ -627,6 +711,7 @@ int pm_schedule_suspend(struct device *dev, unsigned int delay) dev->power.timer_expires = jiffies + msecs_to_jiffies(delay); dev->power.timer_expires += !dev->power.timer_expires; + dev->power.timer_autosuspends = 0; mod_timer(&dev->power.suspend_timer, dev->power.timer_expires); out: @@ -670,7 +755,9 @@ EXPORT_SYMBOL_GPL(__pm_runtime_idle); * @dev: Device to suspend. * @rpmflags: Flag bits. * - * Carry out a suspend, either synchronous or asynchronous. + * If the RPM_GET_PUT flag is set, decrement the device's usage count and + * return immediately if it is larger than zero. Then carry out a suspend, + * either synchronous or asynchronous. * * This routine may be called in atomic context if the RPM_ASYNC flag is set. */ @@ -679,6 +766,11 @@ int __pm_runtime_suspend(struct device *dev, int rpmflags) unsigned long flags; int retval; + if (rpmflags & RPM_GET_PUT) { + if (!atomic_dec_and_test(&dev->power.usage_count)) + return 0; + } + spin_lock_irqsave(&dev->power.lock, flags); retval = rpm_suspend(dev, rpmflags); spin_unlock_irqrestore(&dev->power.lock, flags); @@ -980,7 +1072,7 @@ void pm_runtime_allow(struct device *dev) dev->power.runtime_auto = true; if (atomic_dec_and_test(&dev->power.usage_count)) - rpm_idle(dev, 0); + rpm_idle(dev, RPM_AUTO); out: spin_unlock_irq(&dev->power.lock); @@ -1006,6 +1098,86 @@ void pm_runtime_no_callbacks(struct device *dev) } EXPORT_SYMBOL_GPL(pm_runtime_no_callbacks); +/** + * update_autosuspend - Handle a change to a device's autosuspend settings. + * @dev: Device to handle. + * @old_delay: The former autosuspend_delay value. + * @old_use: The former use_autosuspend value. + * + * Prevent runtime suspend if the new delay is negative and use_autosuspend is + * set; otherwise allow it. Send an idle notification if suspends are allowed. + * + * This function must be called under dev->power.lock with interrupts disabled. + */ +static void update_autosuspend(struct device *dev, int old_delay, int old_use) +{ + int delay = dev->power.autosuspend_delay; + + /* Should runtime suspend be prevented now? */ + if (dev->power.use_autosuspend && delay < 0) { + + /* If it used to be allowed then prevent it. */ + if (!old_use || old_delay >= 0) { + atomic_inc(&dev->power.usage_count); + rpm_resume(dev, 0); + } + } + + /* Runtime suspend should be allowed now. */ + else { + + /* If it used to be prevented then allow it. */ + if (old_use && old_delay < 0) + atomic_dec(&dev->power.usage_count); + + /* Maybe we can autosuspend now. */ + rpm_idle(dev, RPM_AUTO); + } +} + +/** + * pm_runtime_set_autosuspend_delay - Set a device's autosuspend_delay value. + * @dev: Device to handle. + * @delay: Value of the new delay in milliseconds. + * + * Set the device's power.autosuspend_delay value. If it changes to negative + * and the power.use_autosuspend flag is set, prevent run-time suspends. If it + * changes the other way, allow run-time suspends. + */ +void pm_runtime_set_autosuspend_delay(struct device *dev, int delay) +{ + int old_delay, old_use; + + spin_lock_irq(&dev->power.lock); + old_delay = dev->power.autosuspend_delay; + old_use = dev->power.use_autosuspend; + dev->power.autosuspend_delay = delay; + update_autosuspend(dev, old_delay, old_use); + spin_unlock_irq(&dev->power.lock); +} +EXPORT_SYMBOL_GPL(pm_runtime_set_autosuspend_delay); + +/** + * __pm_runtime_use_autosuspend - Set a device's use_autosuspend flag. + * @dev: Device to handle. + * @use: New value for use_autosuspend. + * + * Set the device's power.use_autosuspend flag, and allow or prevent run-time + * suspends as needed. + */ +void __pm_runtime_use_autosuspend(struct device *dev, bool use) +{ + int old_delay, old_use; + + spin_lock_irq(&dev->power.lock); + old_delay = dev->power.autosuspend_delay; + old_use = dev->power.use_autosuspend; + dev->power.use_autosuspend = use; + update_autosuspend(dev, old_delay, old_use); + spin_unlock_irq(&dev->power.lock); +} +EXPORT_SYMBOL_GPL(__pm_runtime_use_autosuspend); + /** * pm_runtime_init - Initialize run-time PM fields in given device object. * @dev: Device object to initialize. diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index b5708c47ce2d..0b1e46bf3e56 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -75,6 +75,18 @@ * attribute is set to "enabled" by bus type code or device drivers and in * that cases it should be safe to leave the default value. * + * autosuspend_delay_ms - Report/change a device's autosuspend_delay value + * + * Some drivers don't want to carry out a runtime suspend as soon as a + * device becomes idle; they want it always to remain idle for some period + * of time before suspending it. This period is the autosuspend_delay + * value (expressed in milliseconds) and it can be controlled by the user. + * If the value is negative then the device will never be runtime + * suspended. + * + * NOTE: The autosuspend_delay_ms attribute and the autosuspend_delay + * value are used only if the driver calls pm_runtime_use_autosuspend(). + * * wakeup_count - Report the number of wakeup events related to the device */ @@ -173,6 +185,33 @@ static ssize_t rtpm_status_show(struct device *dev, } static DEVICE_ATTR(runtime_status, 0444, rtpm_status_show, NULL); + +static ssize_t autosuspend_delay_ms_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + if (!dev->power.use_autosuspend) + return -EIO; + return sprintf(buf, "%d\n", dev->power.autosuspend_delay); +} + +static ssize_t autosuspend_delay_ms_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t n) +{ + long delay; + + if (!dev->power.use_autosuspend) + return -EIO; + + if (strict_strtol(buf, 10, &delay) != 0 || delay != (int) delay) + return -EINVAL; + + pm_runtime_set_autosuspend_delay(dev, delay); + return n; +} + +static DEVICE_ATTR(autosuspend_delay_ms, 0644, autosuspend_delay_ms_show, + autosuspend_delay_ms_store); + #endif static ssize_t @@ -428,6 +467,7 @@ static struct attribute *runtime_attrs[] = { &dev_attr_control.attr, &dev_attr_runtime_suspended_time.attr, &dev_attr_runtime_active_time.attr, + &dev_attr_autosuspend_delay_ms.attr, NULL, }; static struct attribute_group pm_runtime_attr_group = { diff --git a/include/linux/pm.h b/include/linux/pm.h index abd81ffaba3c..40f3f45702ba 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -444,6 +444,9 @@ enum rpm_status { * * RPM_REQ_SUSPEND Run the device bus type's ->runtime_suspend() callback * + * RPM_REQ_AUTOSUSPEND Same as RPM_REQ_SUSPEND, but not until the device has + * been inactive for as long as power.autosuspend_delay + * * RPM_REQ_RESUME Run the device bus type's ->runtime_resume() callback */ @@ -451,6 +454,7 @@ enum rpm_request { RPM_REQ_NONE = 0, RPM_REQ_IDLE, RPM_REQ_SUSPEND, + RPM_REQ_AUTOSUSPEND, RPM_REQ_RESUME, }; @@ -482,9 +486,13 @@ struct dev_pm_info { unsigned int run_wake:1; unsigned int runtime_auto:1; unsigned int no_callbacks:1; + unsigned int use_autosuspend:1; + unsigned int timer_autosuspends:1; enum rpm_request request; enum rpm_status runtime_status; int runtime_error; + int autosuspend_delay; + unsigned long last_busy; unsigned long active_jiffies; unsigned long suspended_jiffies; unsigned long accounting_timestamp; diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 8ca52f7c357e..99ed1aa8f933 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -12,12 +12,15 @@ #include #include +#include + /* Runtime PM flag argument bits */ #define RPM_ASYNC 0x01 /* Request is asynchronous */ #define RPM_NOWAIT 0x02 /* Don't wait for concurrent state change */ #define RPM_GET_PUT 0x04 /* Increment/decrement the usage_count */ +#define RPM_AUTO 0x08 /* Use autosuspend_delay */ #ifdef CONFIG_PM_RUNTIME @@ -37,6 +40,9 @@ extern int pm_generic_runtime_idle(struct device *dev); extern int pm_generic_runtime_suspend(struct device *dev); extern int pm_generic_runtime_resume(struct device *dev); extern void pm_runtime_no_callbacks(struct device *dev); +extern void __pm_runtime_use_autosuspend(struct device *dev, bool use); +extern void pm_runtime_set_autosuspend_delay(struct device *dev, int delay); +extern unsigned long pm_runtime_autosuspend_expiration(struct device *dev); static inline bool pm_children_suspended(struct device *dev) { @@ -74,6 +80,11 @@ static inline bool pm_runtime_suspended(struct device *dev) return dev->power.runtime_status == RPM_SUSPENDED; } +static inline void pm_runtime_mark_last_busy(struct device *dev) +{ + ACCESS_ONCE(dev->power.last_busy) = jiffies; +} + #else /* !CONFIG_PM_RUNTIME */ static inline int __pm_runtime_idle(struct device *dev, int rpmflags) @@ -113,6 +124,14 @@ static inline int pm_generic_runtime_suspend(struct device *dev) { return 0; } static inline int pm_generic_runtime_resume(struct device *dev) { return 0; } static inline void pm_runtime_no_callbacks(struct device *dev) {} +static inline void pm_runtime_mark_last_busy(struct device *dev) {} +static inline void __pm_runtime_use_autosuspend(struct device *dev, + bool use) {} +static inline void pm_runtime_set_autosuspend_delay(struct device *dev, + int delay) {} +static inline unsigned long pm_runtime_autosuspend_expiration( + struct device *dev) { return 0; } + #endif /* !CONFIG_PM_RUNTIME */ static inline int pm_runtime_idle(struct device *dev) @@ -125,6 +144,11 @@ static inline int pm_runtime_suspend(struct device *dev) return __pm_runtime_suspend(dev, 0); } +static inline int pm_runtime_autosuspend(struct device *dev) +{ + return __pm_runtime_suspend(dev, RPM_AUTO); +} + static inline int pm_runtime_resume(struct device *dev) { return __pm_runtime_resume(dev, 0); @@ -155,11 +179,22 @@ static inline int pm_runtime_put(struct device *dev) return __pm_runtime_idle(dev, RPM_GET_PUT | RPM_ASYNC); } +static inline int pm_runtime_put_autosuspend(struct device *dev) +{ + return __pm_runtime_suspend(dev, + RPM_GET_PUT | RPM_ASYNC | RPM_AUTO); +} + static inline int pm_runtime_put_sync(struct device *dev) { return __pm_runtime_idle(dev, RPM_GET_PUT); } +static inline int pm_runtime_put_sync_autosuspend(struct device *dev) +{ + return __pm_runtime_suspend(dev, RPM_GET_PUT | RPM_AUTO); +} + static inline int pm_runtime_set_active(struct device *dev) { return __pm_runtime_set_status(dev, RPM_ACTIVE); @@ -175,4 +210,14 @@ static inline void pm_runtime_disable(struct device *dev) __pm_runtime_disable(dev, true); } +static inline void pm_runtime_use_autosuspend(struct device *dev) +{ + __pm_runtime_use_autosuspend(dev, true); +} + +static inline void pm_runtime_dont_use_autosuspend(struct device *dev) +{ + __pm_runtime_use_autosuspend(dev, false); +} + #endif -- cgit v1.2.3 From d0941ead3fdd31aafff992d211bcefdbff1eaedb Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 28 Sep 2010 23:31:22 +0200 Subject: PM / Hibernate: Make some boot messages look less scary The hibernate resume code checks if there is an image to resume from on every boot and, if the kernel is built with CONFIG_PM_DEBUG set and the image is not present, it prints some scary messages suggesting there was a boot error of some sort. Apparently, some users are confused by them, so make them look less scary and adjust the other hibernate resume debug messages to match them. Signed-off-by: Rafael J. Wysocki --- kernel/power/hibernate.c | 12 ++++++------ kernel/power/swap.c | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 6c9c9dc48c75..657272e91d0a 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -708,7 +708,7 @@ static int software_resume(void) goto Unlock; } - pr_debug("PM: Checking image partition %s\n", resume_file); + pr_debug("PM: Checking hibernation image partition %s\n", resume_file); /* Check if the device is there */ swsusp_resume_device = name_to_dev_t(resume_file); @@ -733,10 +733,10 @@ static int software_resume(void) } Check_image: - pr_debug("PM: Resume from partition %d:%d\n", + pr_debug("PM: Hibernation image partition %d:%d present\n", MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device)); - pr_debug("PM: Checking hibernation image.\n"); + pr_debug("PM: Looking for hibernation image.\n"); error = swsusp_check(); if (error) goto Unlock; @@ -768,14 +768,14 @@ static int software_resume(void) goto Done; } - pr_debug("PM: Reading hibernation image.\n"); + pr_debug("PM: Loading hibernation image.\n"); error = swsusp_read(&flags); swsusp_close(FMODE_READ); if (!error) hibernation_restore(flags & SF_PLATFORM_MODE); - printk(KERN_ERR "PM: Restore failed, recovering.\n"); + printk(KERN_ERR "PM: Failed to load hibernation image, recovering.\n"); swsusp_free(); thaw_processes(); Done: @@ -788,7 +788,7 @@ static int software_resume(void) /* For success case, the suspend path will release the lock */ Unlock: mutex_unlock(&pm_mutex); - pr_debug("PM: Resume from disk failed.\n"); + pr_debug("PM: Hibernation image not present or could not be loaded.\n"); return error; close_finish: swsusp_close(FMODE_READ); diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 3dc0552cddfb..2dfa87869b49 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -929,13 +929,13 @@ put: if (error) blkdev_put(hib_resume_bdev, FMODE_READ); else - pr_debug("PM: Signature found, resuming\n"); + pr_debug("PM: Image signature found, resuming\n"); } else { error = PTR_ERR(hib_resume_bdev); } if (error) - pr_debug("PM: Error %d checking image file\n", error); + pr_debug("PM: Image not found (code %d)\n", error); return error; } -- cgit v1.2.3 From 5fc62aad4e7779c2f04691e48b351d08c050c1f1 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 29 Sep 2010 00:12:22 +0200 Subject: PM: runtime: add missed pm_request_autosuspend The patch "PM / Runtime: Implement autosuspend support" introduces "autosuspend" facility for runtime PM, but misses helper function of pm_request_autosuspend, so add it. Signed-off-by: Ming Lei Signed-off-by: Rafael J. Wysocki --- include/linux/pm_runtime.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 99ed1aa8f933..3ec2358f8692 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -164,6 +164,11 @@ static inline int pm_request_resume(struct device *dev) return __pm_runtime_resume(dev, RPM_ASYNC); } +static inline int pm_request_autosuspend(struct device *dev) +{ + return __pm_runtime_suspend(dev, RPM_ASYNC | RPM_AUTO); +} + static inline int pm_runtime_get(struct device *dev) { return __pm_runtime_resume(dev, RPM_GET_PUT | RPM_ASYNC); -- cgit v1.2.3 From dbeeec5fe868f2e2e92fe94daa2c5a047240fdc4 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 4 Oct 2010 22:07:32 +0200 Subject: PM: Allow wakeup events to abort freezing of tasks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If there is a wakeup event during the freezing of tasks, suspend or hibernation will fail anyway. Since try_to_freeze_tasks() can take up to 20 seconds to complete or fail, aborting it as soon as a wakeup event is detected improves the worst case wakeup latency. Based on a patch from Arve Hjønnevåg. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek --- include/linux/suspend.h | 2 ++ kernel/power/process.c | 11 +++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 6b1712c51102..26697514c5ec 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -308,6 +308,8 @@ static inline int unregister_pm_notifier(struct notifier_block *nb) } #define pm_notifier(fn, pri) do { (void)(fn); } while (0) + +static inline bool pm_check_wakeup_events(void) { return true; } #endif /* !CONFIG_PM_SLEEP */ extern struct mutex pm_mutex; diff --git a/kernel/power/process.c b/kernel/power/process.c index 028a99598f49..e50b4c1b2a0f 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -40,6 +40,7 @@ static int try_to_freeze_tasks(bool sig_only) struct timeval start, end; u64 elapsed_csecs64; unsigned int elapsed_csecs; + bool wakeup = false; do_gettimeofday(&start); @@ -78,6 +79,11 @@ static int try_to_freeze_tasks(bool sig_only) if (!todo || time_after(jiffies, end_time)) break; + if (!pm_check_wakeup_events()) { + wakeup = true; + break; + } + /* * We need to retry, but first give the freezing tasks some * time to enter the regrigerator. @@ -97,8 +103,9 @@ static int try_to_freeze_tasks(bool sig_only) * but it cleans up leftover PF_FREEZE requests. */ printk("\n"); - printk(KERN_ERR "Freezing of tasks failed after %d.%02d seconds " + printk(KERN_ERR "Freezing of tasks %s after %d.%02d seconds " "(%d tasks refusing to freeze, wq_busy=%d):\n", + wakeup ? "aborted" : "failed", elapsed_csecs / 100, elapsed_csecs % 100, todo - wq_busy, wq_busy); @@ -107,7 +114,7 @@ static int try_to_freeze_tasks(bool sig_only) read_lock(&tasklist_lock); do_each_thread(g, p) { task_lock(p); - if (freezing(p) && !freezer_should_skip(p)) + if (!wakeup && freezing(p) && !freezer_should_skip(p)) sched_show_task(p); cancel_freezing(p); task_unlock(p); -- cgit v1.2.3 From 71c63122c4609a917f14a79c32067a68909fc487 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 4 Oct 2010 22:08:01 +0200 Subject: PM / Runtime: Reduce code duplication in core helper functions Reduce code duplication in rpm_idle(), rpm_suspend() and rpm_resume() by using local pointers to store callback addresses and moving some duplicated code into a separate function. Signed-off-by: Rafael J. Wysocki Reviewed-by: Alan Stern --- drivers/base/power/runtime.c | 122 +++++++++++++++++++------------------------ 1 file changed, 54 insertions(+), 68 deletions(-) diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index cd4e100a1362..e957c496a1b1 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -153,7 +153,6 @@ static int rpm_check_suspend_allowed(struct device *dev) return retval; } - /** * rpm_idle - Notify device bus type if the device can be suspended. * @dev: Device to notify the bus type about. @@ -167,8 +166,8 @@ static int rpm_check_suspend_allowed(struct device *dev) * This function must be called under dev->power.lock with interrupts disabled. */ static int rpm_idle(struct device *dev, int rpmflags) - __releases(&dev->power.lock) __acquires(&dev->power.lock) { + int (*callback)(struct device *); int retval; retval = rpm_check_suspend_allowed(dev); @@ -214,23 +213,19 @@ static int rpm_idle(struct device *dev, int rpmflags) dev->power.idle_notification = true; - if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_idle) { - spin_unlock_irq(&dev->power.lock); - - dev->bus->pm->runtime_idle(dev); - - spin_lock_irq(&dev->power.lock); - } else if (dev->type && dev->type->pm && dev->type->pm->runtime_idle) { - spin_unlock_irq(&dev->power.lock); - - dev->type->pm->runtime_idle(dev); + if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_idle) + callback = dev->bus->pm->runtime_idle; + else if (dev->type && dev->type->pm && dev->type->pm->runtime_idle) + callback = dev->type->pm->runtime_idle; + else if (dev->class && dev->class->pm) + callback = dev->class->pm->runtime_idle; + else + callback = NULL; - spin_lock_irq(&dev->power.lock); - } else if (dev->class && dev->class->pm - && dev->class->pm->runtime_idle) { + if (callback) { spin_unlock_irq(&dev->power.lock); - dev->class->pm->runtime_idle(dev); + callback(dev); spin_lock_irq(&dev->power.lock); } @@ -242,6 +237,29 @@ static int rpm_idle(struct device *dev, int rpmflags) return retval; } +/** + * rpm_callback - Run a given runtime PM callback for a given device. + * @cb: Runtime PM callback to run. + * @dev: Device to run the callback for. + */ +static int rpm_callback(int (*cb)(struct device *), struct device *dev) + __releases(&dev->power.lock) __acquires(&dev->power.lock) +{ + int retval; + + if (!cb) + return -ENOSYS; + + spin_unlock_irq(&dev->power.lock); + + retval = cb(dev); + + spin_lock_irq(&dev->power.lock); + dev->power.runtime_error = retval; + + return retval; +} + /** * rpm_suspend - Carry out run-time suspend of given device. * @dev: Device to suspend. @@ -261,6 +279,7 @@ static int rpm_idle(struct device *dev, int rpmflags) static int rpm_suspend(struct device *dev, int rpmflags) __releases(&dev->power.lock) __acquires(&dev->power.lock) { + int (*callback)(struct device *); struct device *parent = NULL; bool notify = false; int retval; @@ -351,33 +370,16 @@ static int rpm_suspend(struct device *dev, int rpmflags) __update_runtime_status(dev, RPM_SUSPENDING); - if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_suspend) { - spin_unlock_irq(&dev->power.lock); - - retval = dev->bus->pm->runtime_suspend(dev); - - spin_lock_irq(&dev->power.lock); - dev->power.runtime_error = retval; - } else if (dev->type && dev->type->pm - && dev->type->pm->runtime_suspend) { - spin_unlock_irq(&dev->power.lock); - - retval = dev->type->pm->runtime_suspend(dev); - - spin_lock_irq(&dev->power.lock); - dev->power.runtime_error = retval; - } else if (dev->class && dev->class->pm - && dev->class->pm->runtime_suspend) { - spin_unlock_irq(&dev->power.lock); - - retval = dev->class->pm->runtime_suspend(dev); - - spin_lock_irq(&dev->power.lock); - dev->power.runtime_error = retval; - } else { - retval = -ENOSYS; - } + if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_suspend) + callback = dev->bus->pm->runtime_suspend; + else if (dev->type && dev->type->pm && dev->type->pm->runtime_suspend) + callback = dev->type->pm->runtime_suspend; + else if (dev->class && dev->class->pm) + callback = dev->class->pm->runtime_suspend; + else + callback = NULL; + retval = rpm_callback(callback, dev); if (retval) { __update_runtime_status(dev, RPM_ACTIVE); dev->power.deferred_resume = 0; @@ -443,6 +445,7 @@ static int rpm_suspend(struct device *dev, int rpmflags) static int rpm_resume(struct device *dev, int rpmflags) __releases(&dev->power.lock) __acquires(&dev->power.lock) { + int (*callback)(struct device *); struct device *parent = NULL; int retval = 0; @@ -563,33 +566,16 @@ static int rpm_resume(struct device *dev, int rpmflags) __update_runtime_status(dev, RPM_RESUMING); - if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_resume) { - spin_unlock_irq(&dev->power.lock); - - retval = dev->bus->pm->runtime_resume(dev); - - spin_lock_irq(&dev->power.lock); - dev->power.runtime_error = retval; - } else if (dev->type && dev->type->pm - && dev->type->pm->runtime_resume) { - spin_unlock_irq(&dev->power.lock); - - retval = dev->type->pm->runtime_resume(dev); - - spin_lock_irq(&dev->power.lock); - dev->power.runtime_error = retval; - } else if (dev->class && dev->class->pm - && dev->class->pm->runtime_resume) { - spin_unlock_irq(&dev->power.lock); - - retval = dev->class->pm->runtime_resume(dev); - - spin_lock_irq(&dev->power.lock); - dev->power.runtime_error = retval; - } else { - retval = -ENOSYS; - } + if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_resume) + callback = dev->bus->pm->runtime_resume; + else if (dev->type && dev->type->pm && dev->type->pm->runtime_resume) + callback = dev->type->pm->runtime_resume; + else if (dev->class && dev->class->pm) + callback = dev->class->pm->runtime_resume; + else + callback = NULL; + retval = rpm_callback(callback, dev); if (retval) { __update_runtime_status(dev, RPM_SUSPENDED); pm_runtime_cancel_pending(dev); -- cgit v1.2.3 From 3624eb04c24861ab296842414f9752a393e68372 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 4 Oct 2010 22:08:12 +0200 Subject: PM / Hibernate: Modify signature used to mark swap Since we are adding compression to the kernel's hibernate code, change signature used by it to mark swap spaces, so that earlier kernels don't attempt to restore compressed images they cannot handle. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek --- kernel/power/swap.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 2dfa87869b49..916eaa790399 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -29,7 +29,7 @@ #include "power.h" -#define SWSUSP_SIG "S1SUSPEND" +#define HIBERNATE_SIG "LINHIB0001" /* * The swap map is a data structure used for keeping track of each page @@ -195,7 +195,7 @@ static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags) if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) || !memcmp("SWAPSPACE2",swsusp_header->sig, 10)) { memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10); - memcpy(swsusp_header->sig,SWSUSP_SIG, 10); + memcpy(swsusp_header->sig, HIBERNATE_SIG, 10); swsusp_header->image = handle->first_sector; swsusp_header->flags = flags; error = hib_bio_write_page(swsusp_resume_block, @@ -916,7 +916,7 @@ int swsusp_check(void) if (error) goto put; - if (!memcmp(SWSUSP_SIG, swsusp_header->sig, 10)) { + if (!memcmp(HIBERNATE_SIG, swsusp_header->sig, 10)) { memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10); /* Reset swap signature now */ error = hib_bio_write_page(swsusp_resume_block, -- cgit v1.2.3 From f71648d73c1650b8b4aceb3856bebbde6daa3b86 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 11 Oct 2010 01:02:27 +0200 Subject: PM / Runtime: Remove idle notification after failing suspend If runtime suspend of a device fails returning -EAGAIN or -EBUSY, which means that it's safe to try to suspend it again, the PM core runs the runtime idle helper function for it. Unfortunately this may lead to problems, for example for PCI devices whose drivers don't implement the ->runtime_idle() callback, because in that case the PCI bus type's ->runtime_idle() always calls pm_runtime_suspend() for the given device. Then, if there's an automatic idle notification after the driver's ->runtime_suspend() returning -EAGAIN or -EBUSY, it will make the suspend happen again possibly causing a busy loop to appear. To avoid that, remove the idle notification after failing runtime suspend of a device altogether and let the callers of pm_runtime_suspend() repeat the operation if need be. Signed-off-by: Rafael J. Wysocki Acked-by: Alan Stern --- drivers/base/power/runtime.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index e957c496a1b1..1dd8676d7f55 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -281,7 +281,6 @@ static int rpm_suspend(struct device *dev, int rpmflags) { int (*callback)(struct device *); struct device *parent = NULL; - bool notify = false; int retval; dev_dbg(dev, "%s flags 0x%x\n", __func__, rpmflags); @@ -383,13 +382,10 @@ static int rpm_suspend(struct device *dev, int rpmflags) if (retval) { __update_runtime_status(dev, RPM_ACTIVE); dev->power.deferred_resume = 0; - if (retval == -EAGAIN || retval == -EBUSY) { - if (dev->power.timer_expires == 0) - notify = true; + if (retval == -EAGAIN || retval == -EBUSY) dev->power.runtime_error = 0; - } else { + else pm_runtime_cancel_pending(dev); - } } else { no_callback: __update_runtime_status(dev, RPM_SUSPENDED); @@ -408,9 +404,6 @@ static int rpm_suspend(struct device *dev, int rpmflags) goto out; } - if (notify) - rpm_idle(dev, 0); - if (parent && !parent->power.ignore_children) { spin_unlock_irq(&dev->power.lock); -- cgit v1.2.3 From 2ac21c6bc4249ee6d922f18dbec7266377592c32 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Mon, 11 Oct 2010 23:59:58 +0200 Subject: PM: Lock PM device list mutex in show_dev_hash() Lock the PM device list mutex using device_pm_lock() and device_pm_unlock() around the list iteration in show_dev_hash(). show_dev_hash() was reverse iterating dpm_list without first locking the mutex that the functions in drivers/base/power/main.c lock. I assume this was unintentional since there is no comment suggesting why the lock might not be necessary. Signed-off-by: James Hogan Signed-off-by: Rafael J. Wysocki --- drivers/base/power/trace.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c index 0a1a2c4dbc6e..17e24e3f4422 100644 --- a/drivers/base/power/trace.c +++ b/drivers/base/power/trace.c @@ -188,8 +188,10 @@ static int show_file_hash(unsigned int value) static int show_dev_hash(unsigned int value) { int match = 0; - struct list_head *entry = dpm_list.prev; + struct list_head *entry; + device_pm_lock(); + entry = dpm_list.prev; while (entry != &dpm_list) { struct device * dev = to_device(entry); unsigned int hash = hash_string(DEVSEED, dev_name(dev), DEVHASH); @@ -199,6 +201,7 @@ static int show_dev_hash(unsigned int value) } entry = entry->prev; } + device_pm_unlock(); return match; } -- cgit v1.2.3 From d33ac60beaf2c7dee5cd90aba7c1eb385dd70937 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 12 Oct 2010 00:00:25 +0200 Subject: PM: Add sysfs attr for rechecking dev hash from PM trace If the device which fails to resume is part of a loadable kernel module it won't be checked at startup against the magic number stored in the RTC. Add a read-only sysfs attribute /sys/power/pm_trace_dev_match which contains a list of newline separated devices (usually just the one) which currently match the last magic number. This allows the device which is failing to resume to be found after the modules are loaded again. Signed-off-by: James Hogan Signed-off-by: Rafael J. Wysocki --- Documentation/ABI/testing/sysfs-power | 29 +++++++++++++++++++++++++++++ Documentation/power/s2ram.txt | 7 +++++++ drivers/base/power/trace.c | 31 +++++++++++++++++++++++++++++++ include/linux/resume-trace.h | 2 ++ kernel/power/main.c | 18 ++++++++++++++++++ 5 files changed, 87 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power index 2875f1f74a07..194ca446ac28 100644 --- a/Documentation/ABI/testing/sysfs-power +++ b/Documentation/ABI/testing/sysfs-power @@ -99,9 +99,38 @@ Description: dmesg -s 1000000 | grep 'hash matches' + If you do not get any matches (or they appear to be false + positives), it is possible that the last PM event point + referred to a device created by a loadable kernel module. In + this case cat /sys/power/pm_trace_dev_match (see below) after + your system is started up and the kernel modules are loaded. + CAUTION: Using it will cause your machine's real-time (CMOS) clock to be set to a random invalid time after a resume. +What; /sys/power/pm_trace_dev_match +Date: October 2010 +Contact: James Hogan +Description: + The /sys/power/pm_trace_dev_match file contains the name of the + device associated with the last PM event point saved in the RTC + across reboots when pm_trace has been used. More precisely it + contains the list of current devices (including those + registered by loadable kernel modules since boot) which match + the device hash in the RTC at boot, with a newline after each + one. + + The advantage of this file over the hash matches printed to the + kernel log (see /sys/power/pm_trace), is that it includes + devices created after boot by loadable kernel modules. + + Due to the small hash size necessary to fit in the RTC, it is + possible that more than one device matches the hash, in which + case further investigation is required to determine which + device is causing the problem. Note that genuine RTC clock + values (such as when pm_trace has not been used), can still + match a device and output it's name here. + What: /sys/power/pm_async Date: January 2009 Contact: Rafael J. Wysocki diff --git a/Documentation/power/s2ram.txt b/Documentation/power/s2ram.txt index 514b94fc931e..1bdfa0443773 100644 --- a/Documentation/power/s2ram.txt +++ b/Documentation/power/s2ram.txt @@ -49,6 +49,13 @@ machine that doesn't boot) is: device (lspci and /sys/devices/pci* is your friend), and see if you can fix it, disable it, or trace into its resume function. + If no device matches the hash (or any matches appear to be false positives), + the culprit may be a device from a loadable kernel module that is not loaded + until after the hash is checked. You can check the hash against the current + devices again after more modules are loaded using sysfs: + + cat /sys/power/pm_trace_dev_match + For example, the above happens to be the VGA device on my EVO, which I used to run with "radeonfb" (it's an ATI Radeon mobility). It turns out that "radeonfb" simply cannot resume that device - it tries to set the diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c index 17e24e3f4422..9f4258df4cfd 100644 --- a/drivers/base/power/trace.c +++ b/drivers/base/power/trace.c @@ -207,6 +207,37 @@ static int show_dev_hash(unsigned int value) static unsigned int hash_value_early_read; +int show_trace_dev_match(char *buf, size_t size) +{ + unsigned int value = hash_value_early_read / (USERHASH * FILEHASH); + int ret = 0; + struct list_head *entry; + + /* + * It's possible that multiple devices will match the hash and we can't + * tell which is the culprit, so it's best to output them all. + */ + device_pm_lock(); + entry = dpm_list.prev; + while (size && entry != &dpm_list) { + struct device *dev = to_device(entry); + unsigned int hash = hash_string(DEVSEED, dev_name(dev), + DEVHASH); + if (hash == value) { + int len = snprintf(buf, size, "%s\n", + dev_driver_string(dev)); + if (len > size) + len = size; + buf += len; + ret += len; + size -= len; + } + entry = entry->prev; + } + device_pm_unlock(); + return ret; +} + static int early_resume_init(void) { hash_value_early_read = read_magic_time(); diff --git a/include/linux/resume-trace.h b/include/linux/resume-trace.h index bc8c3881c729..f31db2368782 100644 --- a/include/linux/resume-trace.h +++ b/include/linux/resume-trace.h @@ -3,6 +3,7 @@ #ifdef CONFIG_PM_TRACE #include +#include extern int pm_trace_enabled; @@ -14,6 +15,7 @@ static inline int pm_trace_is_enabled(void) struct device; extern void set_trace_device(struct device *); extern void generate_resume_trace(const void *tracedata, unsigned int user); +extern int show_trace_dev_match(char *buf, size_t size); #define TRACE_DEVICE(dev) do { \ if (pm_trace_enabled) \ diff --git a/kernel/power/main.c b/kernel/power/main.c index 6b12a0cf4d9f..7b5db6a8561e 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -281,12 +281,30 @@ pm_trace_store(struct kobject *kobj, struct kobj_attribute *attr, } power_attr(pm_trace); + +static ssize_t pm_trace_dev_match_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return show_trace_dev_match(buf, PAGE_SIZE); +} + +static ssize_t +pm_trace_dev_match_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t n) +{ + return -EINVAL; +} + +power_attr(pm_trace_dev_match); + #endif /* CONFIG_PM_TRACE */ static struct attribute * g[] = { &state_attr.attr, #ifdef CONFIG_PM_TRACE &pm_trace_attr.attr, + &pm_trace_dev_match_attr.attr, #endif #ifdef CONFIG_PM_SLEEP &pm_async_attr.attr, -- cgit v1.2.3 From e1f60b292ffd61151403327aa19ff7a1871820bd Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Wed, 13 Oct 2010 00:13:10 +0200 Subject: PM: Introduce library for device-specific OPPs (v7) SoCs have a standard set of tuples consisting of frequency and voltage pairs that the device will support per voltage domain. These are called Operating Performance Points or OPPs. The actual definitions of OPP varies over silicon versions. For a specific domain, we can have a set of {frequency, voltage} pairs. As the kernel boots and more information is available, a default set of these are activated based on the precise nature of device. Further on operation, based on conditions prevailing in the system (such as temperature), some OPP availability may be temporarily controlled by the SoC frameworks. To implement an OPP, some sort of power management support is necessary hence this library depends on CONFIG_PM. Contributions include: Sanjeev Premi for the initial concept: http://patchwork.kernel.org/patch/50998/ Kevin Hilman for converting original design to device-based. Kevin Hilman and Paul Walmsey for cleaning up many of the function abstractions, improvements and data structure handling. Romit Dasgupta for using enums instead of opp pointers. Thara Gopinath, Eduardo Valentin and Vishwanath BS for fixes and cleanups. Linus Walleij for recommending this layer be made generic for usage in other architectures beyond OMAP and ARM. Mark Brown, Andrew Morton, Rafael J. Wysocki, Paul E. McKenney for valuable improvements. Discussions and comments from: http://marc.info/?l=linux-omap&m=126033945313269&w=2 http://marc.info/?l=linux-omap&m=125482970102327&w=2 http://marc.info/?t=125809247500002&r=1&w=2 http://marc.info/?l=linux-omap&m=126025973426007&w=2 http://marc.info/?t=128152609200064&r=1&w=2 http://marc.info/?t=128468723000002&r=1&w=2 incorporated. v1: http://marc.info/?t=128468723000002&r=1&w=2 Signed-off-by: Nishanth Menon Signed-off-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- Documentation/power/00-INDEX | 2 + Documentation/power/opp.txt | 375 ++++++++++++++++++++++++++ drivers/base/power/Makefile | 1 + drivers/base/power/opp.c | 628 +++++++++++++++++++++++++++++++++++++++++++ include/linux/opp.h | 105 ++++++++ kernel/power/Kconfig | 14 + 6 files changed, 1125 insertions(+) create mode 100644 Documentation/power/opp.txt create mode 100644 drivers/base/power/opp.c create mode 100644 include/linux/opp.h diff --git a/Documentation/power/00-INDEX b/Documentation/power/00-INDEX index fb742c213c9e..45e9d4a91284 100644 --- a/Documentation/power/00-INDEX +++ b/Documentation/power/00-INDEX @@ -14,6 +14,8 @@ interface.txt - Power management user interface in /sys/power notifiers.txt - Registering suspend notifiers in device drivers +opp.txt + - Operating Performance Point library pci.txt - How the PCI Subsystem Does Power Management pm_qos_interface.txt diff --git a/Documentation/power/opp.txt b/Documentation/power/opp.txt new file mode 100644 index 000000000000..44d87ad3cea9 --- /dev/null +++ b/Documentation/power/opp.txt @@ -0,0 +1,375 @@ +*=============* +* OPP Library * +*=============* + +(C) 2009-2010 Nishanth Menon , Texas Instruments Incorporated + +Contents +-------- +1. Introduction +2. Initial OPP List Registration +3. OPP Search Functions +4. OPP Availability Control Functions +5. OPP Data Retrieval Functions +6. Cpufreq Table Generation +7. Data Structures + +1. Introduction +=============== +Complex SoCs of today consists of a multiple sub-modules working in conjunction. +In an operational system executing varied use cases, not all modules in the SoC +need to function at their highest performing frequency all the time. To +facilitate this, sub-modules in a SoC are grouped into domains, allowing some +domains to run at lower voltage and frequency while other domains are loaded +more. The set of discrete tuples consisting of frequency and voltage pairs that +the device will support per domain are called Operating Performance Points or +OPPs. + +OPP library provides a set of helper functions to organize and query the OPP +information. The library is located in drivers/base/power/opp.c and the header +is located in include/linux/opp.h. OPP library can be enabled by enabling +CONFIG_PM_OPP from power management menuconfig menu. OPP library depends on +CONFIG_PM as certain SoCs such as Texas Instrument's OMAP framework allows to +optionally boot at a certain OPP without needing cpufreq. + +Typical usage of the OPP library is as follows: +(users) -> registers a set of default OPPs -> (library) +SoC framework -> modifies on required cases certain OPPs -> OPP layer + -> queries to search/retrieve information -> + +OPP layer expects each domain to be represented by a unique device pointer. SoC +framework registers a set of initial OPPs per device with the OPP layer. This +list is expected to be an optimally small number typically around 5 per device. +This initial list contains a set of OPPs that the framework expects to be safely +enabled by default in the system. + +Note on OPP Availability: +------------------------ +As the system proceeds to operate, SoC framework may choose to make certain +OPPs available or not available on each device based on various external +factors. Example usage: Thermal management or other exceptional situations where +SoC framework might choose to disable a higher frequency OPP to safely continue +operations until that OPP could be re-enabled if possible. + +OPP library facilitates this concept in it's implementation. The following +operational functions operate only on available opps: +opp_find_freq_{ceil, floor}, opp_get_voltage, opp_get_freq, opp_get_opp_count +and opp_init_cpufreq_table + +opp_find_freq_exact is meant to be used to find the opp pointer which can then +be used for opp_enable/disable functions to make an opp available as required. + +WARNING: Users of OPP library should refresh their availability count using +get_opp_count if opp_enable/disable functions are invoked for a device, the +exact mechanism to trigger these or the notification mechanism to other +dependent subsystems such as cpufreq are left to the discretion of the SoC +specific framework which uses the OPP library. Similar care needs to be taken +care to refresh the cpufreq table in cases of these operations. + +WARNING on OPP List locking mechanism: +------------------------------------------------- +OPP library uses RCU for exclusivity. RCU allows the query functions to operate +in multiple contexts and this synchronization mechanism is optimal for a read +intensive operations on data structure as the OPP library caters to. + +To ensure that the data retrieved are sane, the users such as SoC framework +should ensure that the section of code operating on OPP queries are locked +using RCU read locks. The opp_find_freq_{exact,ceil,floor}, +opp_get_{voltage, freq, opp_count} fall into this category. + +opp_{add,enable,disable} are updaters which use mutex and implement it's own +RCU locking mechanisms. opp_init_cpufreq_table acts as an updater and uses +mutex to implment RCU updater strategy. These functions should *NOT* be called +under RCU locks and other contexts that prevent blocking functions in RCU or +mutex operations from working. + +2. Initial OPP List Registration +================================ +The SoC implementation calls opp_add function iteratively to add OPPs per +device. It is expected that the SoC framework will register the OPP entries +optimally- typical numbers range to be less than 5. The list generated by +registering the OPPs is maintained by OPP library throughout the device +operation. The SoC framework can subsequently control the availability of the +OPPs dynamically using the opp_enable / disable functions. + +opp_add - Add a new OPP for a specific domain represented by the device pointer. + The OPP is defined using the frequency and voltage. Once added, the OPP + is assumed to be available and control of it's availability can be done + with the opp_enable/disable functions. OPP library internally stores + and manages this information in the opp struct. This function may be + used by SoC framework to define a optimal list as per the demands of + SoC usage environment. + + WARNING: Do not use this function in interrupt context. + + Example: + soc_pm_init() + { + /* Do things */ + r = opp_add(mpu_dev, 1000000, 900000); + if (!r) { + pr_err("%s: unable to register mpu opp(%d)\n", r); + goto no_cpufreq; + } + /* Do cpufreq things */ + no_cpufreq: + /* Do remaining things */ + } + +3. OPP Search Functions +======================= +High level framework such as cpufreq operates on frequencies. To map the +frequency back to the corresponding OPP, OPP library provides handy functions +to search the OPP list that OPP library internally manages. These search +functions return the matching pointer representing the opp if a match is +found, else returns error. These errors are expected to be handled by standard +error checks such as IS_ERR() and appropriate actions taken by the caller. + +opp_find_freq_exact - Search for an OPP based on an *exact* frequency and + availability. This function is especially useful to enable an OPP which + is not available by default. + Example: In a case when SoC framework detects a situation where a + higher frequency could be made available, it can use this function to + find the OPP prior to call the opp_enable to actually make it available. + rcu_read_lock(); + opp = opp_find_freq_exact(dev, 1000000000, false); + rcu_read_unlock(); + /* dont operate on the pointer.. just do a sanity check.. */ + if (IS_ERR(opp)) { + pr_err("frequency not disabled!\n"); + /* trigger appropriate actions.. */ + } else { + opp_enable(dev,1000000000); + } + + NOTE: This is the only search function that operates on OPPs which are + not available. + +opp_find_freq_floor - Search for an available OPP which is *at most* the + provided frequency. This function is useful while searching for a lesser + match OR operating on OPP information in the order of decreasing + frequency. + Example: To find the highest opp for a device: + freq = ULONG_MAX; + rcu_read_lock(); + opp_find_freq_floor(dev, &freq); + rcu_read_unlock(); + +opp_find_freq_ceil - Search for an available OPP which is *at least* the + provided frequency. This function is useful while searching for a + higher match OR operating on OPP information in the order of increasing + frequency. + Example 1: To find the lowest opp for a device: + freq = 0; + rcu_read_lock(); + opp_find_freq_ceil(dev, &freq); + rcu_read_unlock(); + Example 2: A simplified implementation of a SoC cpufreq_driver->target: + soc_cpufreq_target(..) + { + /* Do stuff like policy checks etc. */ + /* Find the best frequency match for the req */ + rcu_read_lock(); + opp = opp_find_freq_ceil(dev, &freq); + rcu_read_unlock(); + if (!IS_ERR(opp)) + soc_switch_to_freq_voltage(freq); + else + /* do something when we cant satisfy the req */ + /* do other stuff */ + } + +4. OPP Availability Control Functions +===================================== +A default OPP list registered with the OPP library may not cater to all possible +situation. The OPP library provides a set of functions to modify the +availability of a OPP within the OPP list. This allows SoC frameworks to have +fine grained dynamic control of which sets of OPPs are operationally available. +These functions are intended to *temporarily* remove an OPP in conditions such +as thermal considerations (e.g. don't use OPPx until the temperature drops). + +WARNING: Do not use these functions in interrupt context. + +opp_enable - Make a OPP available for operation. + Example: Lets say that 1GHz OPP is to be made available only if the + SoC temperature is lower than a certain threshold. The SoC framework + implementation might choose to do something as follows: + if (cur_temp < temp_low_thresh) { + /* Enable 1GHz if it was disabled */ + rcu_read_lock(); + opp = opp_find_freq_exact(dev, 1000000000, false); + rcu_read_unlock(); + /* just error check */ + if (!IS_ERR(opp)) + ret = opp_enable(dev, 1000000000); + else + goto try_something_else; + } + +opp_disable - Make an OPP to be not available for operation + Example: Lets say that 1GHz OPP is to be disabled if the temperature + exceeds a threshold value. The SoC framework implementation might + choose to do something as follows: + if (cur_temp > temp_high_thresh) { + /* Disable 1GHz if it was enabled */ + rcu_read_lock(); + opp = opp_find_freq_exact(dev, 1000000000, true); + rcu_read_unlock(); + /* just error check */ + if (!IS_ERR(opp)) + ret = opp_disable(dev, 1000000000); + else + goto try_something_else; + } + +5. OPP Data Retrieval Functions +=============================== +Since OPP library abstracts away the OPP information, a set of functions to pull +information from the OPP structure is necessary. Once an OPP pointer is +retrieved using the search functions, the following functions can be used by SoC +framework to retrieve the information represented inside the OPP layer. + +opp_get_voltage - Retrieve the voltage represented by the opp pointer. + Example: At a cpufreq transition to a different frequency, SoC + framework requires to set the voltage represented by the OPP using + the regulator framework to the Power Management chip providing the + voltage. + soc_switch_to_freq_voltage(freq) + { + /* do things */ + rcu_read_lock(); + opp = opp_find_freq_ceil(dev, &freq); + v = opp_get_voltage(opp); + rcu_read_unlock(); + if (v) + regulator_set_voltage(.., v); + /* do other things */ + } + +opp_get_freq - Retrieve the freq represented by the opp pointer. + Example: Lets say the SoC framework uses a couple of helper functions + we could pass opp pointers instead of doing additional parameters to + handle quiet a bit of data parameters. + soc_cpufreq_target(..) + { + /* do things.. */ + max_freq = ULONG_MAX; + rcu_read_lock(); + max_opp = opp_find_freq_floor(dev,&max_freq); + requested_opp = opp_find_freq_ceil(dev,&freq); + if (!IS_ERR(max_opp) && !IS_ERR(requested_opp)) + r = soc_test_validity(max_opp, requested_opp); + rcu_read_unlock(); + /* do other things */ + } + soc_test_validity(..) + { + if(opp_get_voltage(max_opp) < opp_get_voltage(requested_opp)) + return -EINVAL; + if(opp_get_freq(max_opp) < opp_get_freq(requested_opp)) + return -EINVAL; + /* do things.. */ + } + +opp_get_opp_count - Retrieve the number of available opps for a device + Example: Lets say a co-processor in the SoC needs to know the available + frequencies in a table, the main processor can notify as following: + soc_notify_coproc_available_frequencies() + { + /* Do things */ + rcu_read_lock(); + num_available = opp_get_opp_count(dev); + speeds = kzalloc(sizeof(u32) * num_available, GFP_KERNEL); + /* populate the table in increasing order */ + freq = 0; + while (!IS_ERR(opp = opp_find_freq_ceil(dev, &freq))) { + speeds[i] = freq; + freq++; + i++; + } + rcu_read_unlock(); + + soc_notify_coproc(AVAILABLE_FREQs, speeds, num_available); + /* Do other things */ + } + +6. Cpufreq Table Generation +=========================== +opp_init_cpufreq_table - cpufreq framework typically is initialized with + cpufreq_frequency_table_cpuinfo which is provided with the list of + frequencies that are available for operation. This function provides + a ready to use conversion routine to translate the OPP layer's internal + information about the available frequencies into a format readily + providable to cpufreq. + + WARNING: Do not use this function in interrupt context. + + Example: + soc_pm_init() + { + /* Do things */ + r = opp_init_cpufreq_table(dev, &freq_table); + if (!r) + cpufreq_frequency_table_cpuinfo(policy, freq_table); + /* Do other things */ + } + + NOTE: This function is available only if CONFIG_CPU_FREQ is enabled in + addition to CONFIG_PM as power management feature is required to + dynamically scale voltage and frequency in a system. + +7. Data Structures +================== +Typically an SoC contains multiple voltage domains which are variable. Each +domain is represented by a device pointer. The relationship to OPP can be +represented as follows: +SoC + |- device 1 + | |- opp 1 (availability, freq, voltage) + | |- opp 2 .. + ... ... + | `- opp n .. + |- device 2 + ... + `- device m + +OPP library maintains a internal list that the SoC framework populates and +accessed by various functions as described above. However, the structures +representing the actual OPPs and domains are internal to the OPP library itself +to allow for suitable abstraction reusable across systems. + +struct opp - The internal data structure of OPP library which is used to + represent an OPP. In addition to the freq, voltage, availability + information, it also contains internal book keeping information required + for the OPP library to operate on. Pointer to this structure is + provided back to the users such as SoC framework to be used as a + identifier for OPP in the interactions with OPP layer. + + WARNING: The struct opp pointer should not be parsed or modified by the + users. The defaults of for an instance is populated by opp_add, but the + availability of the OPP can be modified by opp_enable/disable functions. + +struct device - This is used to identify a domain to the OPP layer. The + nature of the device and it's implementation is left to the user of + OPP library such as the SoC framework. + +Overall, in a simplistic view, the data structure operations is represented as +following: + +Initialization / modification: + +-----+ /- opp_enable +opp_add --> | opp | <------- + | +-----+ \- opp_disable + \-------> domain_info(device) + +Search functions: + /-- opp_find_freq_ceil ---\ +-----+ +domain_info<---- opp_find_freq_exact -----> | opp | + \-- opp_find_freq_floor ---/ +-----+ + +Retrieval functions: ++-----+ /- opp_get_voltage +| opp | <--- ++-----+ \- opp_get_freq + +domain_info <- opp_get_opp_count diff --git a/drivers/base/power/Makefile b/drivers/base/power/Makefile index cbccf9a3cee4..abe46edfe5b4 100644 --- a/drivers/base/power/Makefile +++ b/drivers/base/power/Makefile @@ -3,6 +3,7 @@ obj-$(CONFIG_PM_SLEEP) += main.o wakeup.o obj-$(CONFIG_PM_RUNTIME) += runtime.o obj-$(CONFIG_PM_OPS) += generic_ops.o obj-$(CONFIG_PM_TRACE_RTC) += trace.o +obj-$(CONFIG_PM_OPP) += opp.o ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG ccflags-$(CONFIG_PM_VERBOSE) += -DDEBUG diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c new file mode 100644 index 000000000000..2bb9b4cf59d7 --- /dev/null +++ b/drivers/base/power/opp.c @@ -0,0 +1,628 @@ +/* + * Generic OPP Interface + * + * Copyright (C) 2009-2010 Texas Instruments Incorporated. + * Nishanth Menon + * Romit Dasgupta + * Kevin Hilman + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Internal data structure organization with the OPP layer library is as + * follows: + * dev_opp_list (root) + * |- device 1 (represents voltage domain 1) + * | |- opp 1 (availability, freq, voltage) + * | |- opp 2 .. + * ... ... + * | `- opp n .. + * |- device 2 (represents the next voltage domain) + * ... + * `- device m (represents mth voltage domain) + * device 1, 2.. are represented by dev_opp structure while each opp + * is represented by the opp structure. + */ + +/** + * struct opp - Generic OPP description structure + * @node: opp list node. The nodes are maintained throughout the lifetime + * of boot. It is expected only an optimal set of OPPs are + * added to the library by the SoC framework. + * RCU usage: opp list is traversed with RCU locks. node + * modification is possible realtime, hence the modifications + * are protected by the dev_opp_list_lock for integrity. + * IMPORTANT: the opp nodes should be maintained in increasing + * order. + * @available: true/false - marks if this OPP as available or not + * @rate: Frequency in hertz + * @u_volt: Nominal voltage in microvolts corresponding to this OPP + * @dev_opp: points back to the device_opp struct this opp belongs to + * + * This structure stores the OPP information for a given device. + */ +struct opp { + struct list_head node; + + bool available; + unsigned long rate; + unsigned long u_volt; + + struct device_opp *dev_opp; +}; + +/** + * struct device_opp - Device opp structure + * @node: list node - contains the devices with OPPs that + * have been registered. Nodes once added are not modified in this + * list. + * RCU usage: nodes are not modified in the list of device_opp, + * however addition is possible and is secured by dev_opp_list_lock + * @dev: device pointer + * @opp_list: list of opps + * + * This is an internal data structure maintaining the link to opps attached to + * a device. This structure is not meant to be shared to users as it is + * meant for book keeping and private to OPP library + */ +struct device_opp { + struct list_head node; + + struct device *dev; + struct list_head opp_list; +}; + +/* + * The root of the list of all devices. All device_opp structures branch off + * from here, with each device_opp containing the list of opp it supports in + * various states of availability. + */ +static LIST_HEAD(dev_opp_list); +/* Lock to allow exclusive modification to the device and opp lists */ +static DEFINE_MUTEX(dev_opp_list_lock); + +/** + * find_device_opp() - find device_opp struct using device pointer + * @dev: device pointer used to lookup device OPPs + * + * Search list of device OPPs for one containing matching device. Does a RCU + * reader operation to grab the pointer needed. + * + * Returns pointer to 'struct device_opp' if found, otherwise -ENODEV or + * -EINVAL based on type of error. + * + * Locking: This function must be called under rcu_read_lock(). device_opp + * is a RCU protected pointer. This means that device_opp is valid as long + * as we are under RCU lock. + */ +static struct device_opp *find_device_opp(struct device *dev) +{ + struct device_opp *tmp_dev_opp, *dev_opp = ERR_PTR(-ENODEV); + + if (unlikely(IS_ERR_OR_NULL(dev))) { + pr_err("%s: Invalid parameters\n", __func__); + return ERR_PTR(-EINVAL); + } + + list_for_each_entry_rcu(tmp_dev_opp, &dev_opp_list, node) { + if (tmp_dev_opp->dev == dev) { + dev_opp = tmp_dev_opp; + break; + } + } + + return dev_opp; +} + +/** + * opp_get_voltage() - Gets the voltage corresponding to an available opp + * @opp: opp for which voltage has to be returned for + * + * Return voltage in micro volt corresponding to the opp, else + * return 0 + * + * Locking: This function must be called under rcu_read_lock(). opp is a rcu + * protected pointer. This means that opp which could have been fetched by + * opp_find_freq_{exact,ceil,floor} functions is valid as long as we are + * under RCU lock. The pointer returned by the opp_find_freq family must be + * used in the same section as the usage of this function with the pointer + * prior to unlocking with rcu_read_unlock() to maintain the integrity of the + * pointer. + */ +unsigned long opp_get_voltage(struct opp *opp) +{ + struct opp *tmp_opp; + unsigned long v = 0; + + tmp_opp = rcu_dereference(opp); + if (unlikely(IS_ERR_OR_NULL(tmp_opp)) || !tmp_opp->available) + pr_err("%s: Invalid parameters\n", __func__); + else + v = tmp_opp->u_volt; + + return v; +} + +/** + * opp_get_freq() - Gets the frequency corresponding to an available opp + * @opp: opp for which frequency has to be returned for + * + * Return frequency in hertz corresponding to the opp, else + * return 0 + * + * Locking: This function must be called under rcu_read_lock(). opp is a rcu + * protected pointer. This means that opp which could have been fetched by + * opp_find_freq_{exact,ceil,floor} functions is valid as long as we are + * under RCU lock. The pointer returned by the opp_find_freq family must be + * used in the same section as the usage of this function with the pointer + * prior to unlocking with rcu_read_unlock() to maintain the integrity of the + * pointer. + */ +unsigned long opp_get_freq(struct opp *opp) +{ + struct opp *tmp_opp; + unsigned long f = 0; + + tmp_opp = rcu_dereference(opp); + if (unlikely(IS_ERR_OR_NULL(tmp_opp)) || !tmp_opp->available) + pr_err("%s: Invalid parameters\n", __func__); + else + f = tmp_opp->rate; + + return f; +} + +/** + * opp_get_opp_count() - Get number of opps available in the opp list + * @dev: device for which we do this operation + * + * This function returns the number of available opps if there are any, + * else returns 0 if none or the corresponding error value. + * + * Locking: This function must be called under rcu_read_lock(). This function + * internally references two RCU protected structures: device_opp and opp which + * are safe as long as we are under a common RCU locked section. + */ +int opp_get_opp_count(struct device *dev) +{ + struct device_opp *dev_opp; + struct opp *temp_opp; + int count = 0; + + dev_opp = find_device_opp(dev); + if (IS_ERR(dev_opp)) { + int r = PTR_ERR(dev_opp); + dev_err(dev, "%s: device OPP not found (%d)\n", __func__, r); + return r; + } + + list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) { + if (temp_opp->available) + count++; + } + + return count; +} + +/** + * opp_find_freq_exact() - search for an exact frequency + * @dev: device for which we do this operation + * @freq: frequency to search for + * @is_available: true/false - match for available opp + * + * Searches for exact match in the opp list and returns pointer to the matching + * opp if found, else returns ERR_PTR in case of error and should be handled + * using IS_ERR. + * + * Note: available is a modifier for the search. if available=true, then the + * match is for exact matching frequency and is available in the stored OPP + * table. if false, the match is for exact frequency which is not available. + * + * This provides a mechanism to enable an opp which is not available currently + * or the opposite as well. + * + * Locking: This function must be called under rcu_read_lock(). opp is a rcu + * protected pointer. The reason for the same is that the opp pointer which is + * returned will remain valid for use with opp_get_{voltage, freq} only while + * under the locked area. The pointer returned must be used prior to unlocking + * with rcu_read_unlock() to maintain the integrity of the pointer. + */ +struct opp *opp_find_freq_exact(struct device *dev, unsigned long freq, + bool available) +{ + struct device_opp *dev_opp; + struct opp *temp_opp, *opp = ERR_PTR(-ENODEV); + + dev_opp = find_device_opp(dev); + if (IS_ERR(dev_opp)) { + int r = PTR_ERR(dev_opp); + dev_err(dev, "%s: device OPP not found (%d)\n", __func__, r); + return ERR_PTR(r); + } + + list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) { + if (temp_opp->available == available && + temp_opp->rate == freq) { + opp = temp_opp; + break; + } + } + + return opp; +} + +/** + * opp_find_freq_ceil() - Search for an rounded ceil freq + * @dev: device for which we do this operation + * @freq: Start frequency + * + * Search for the matching ceil *available* OPP from a starting freq + * for a device. + * + * Returns matching *opp and refreshes *freq accordingly, else returns + * ERR_PTR in case of error and should be handled using IS_ERR. + * + * Locking: This function must be called under rcu_read_lock(). opp is a rcu + * protected pointer. The reason for the same is that the opp pointer which is + * returned will remain valid for use with opp_get_{voltage, freq} only while + * under the locked area. The pointer returned must be used prior to unlocking + * with rcu_read_unlock() to maintain the integrity of the pointer. + */ +struct opp *opp_find_freq_ceil(struct device *dev, unsigned long *freq) +{ + struct device_opp *dev_opp; + struct opp *temp_opp, *opp = ERR_PTR(-ENODEV); + + if (!dev || !freq) { + dev_err(dev, "%s: Invalid argument freq=%p\n", __func__, freq); + return ERR_PTR(-EINVAL); + } + + dev_opp = find_device_opp(dev); + if (IS_ERR(dev_opp)) + return opp; + + list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) { + if (temp_opp->available && temp_opp->rate >= *freq) { + opp = temp_opp; + *freq = opp->rate; + break; + } + } + + return opp; +} + +/** + * opp_find_freq_floor() - Search for a rounded floor freq + * @dev: device for which we do this operation + * @freq: Start frequency + * + * Search for the matching floor *available* OPP from a starting freq + * for a device. + * + * Returns matching *opp and refreshes *freq accordingly, else returns + * ERR_PTR in case of error and should be handled using IS_ERR. + * + * Locking: This function must be called under rcu_read_lock(). opp is a rcu + * protected pointer. The reason for the same is that the opp pointer which is + * returned will remain valid for use with opp_get_{voltage, freq} only while + * under the locked area. The pointer returned must be used prior to unlocking + * with rcu_read_unlock() to maintain the integrity of the pointer. + */ +struct opp *opp_find_freq_floor(struct device *dev, unsigned long *freq) +{ + struct device_opp *dev_opp; + struct opp *temp_opp, *opp = ERR_PTR(-ENODEV); + + if (!dev || !freq) { + dev_err(dev, "%s: Invalid argument freq=%p\n", __func__, freq); + return ERR_PTR(-EINVAL); + } + + dev_opp = find_device_opp(dev); + if (IS_ERR(dev_opp)) + return opp; + + list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) { + if (temp_opp->available) { + /* go to the next node, before choosing prev */ + if (temp_opp->rate > *freq) + break; + else + opp = temp_opp; + } + } + if (!IS_ERR(opp)) + *freq = opp->rate; + + return opp; +} + +/** + * opp_add() - Add an OPP table from a table definitions + * @dev: device for which we do this operation + * @freq: Frequency in Hz for this OPP + * @u_volt: Voltage in uVolts for this OPP + * + * This function adds an opp definition to the opp list and returns status. + * The opp is made available by default and it can be controlled using + * opp_enable/disable functions. + * + * Locking: The internal device_opp and opp structures are RCU protected. + * Hence this function internally uses RCU updater strategy with mutex locks + * to keep the integrity of the internal data structures. Callers should ensure + * that this function is *NOT* called under RCU protection or in contexts where + * mutex cannot be locked. + */ +int opp_add(struct device *dev, unsigned long freq, unsigned long u_volt) +{ + struct device_opp *dev_opp = NULL; + struct opp *opp, *new_opp; + struct list_head *head; + + /* allocate new OPP node */ + new_opp = kzalloc(sizeof(struct opp), GFP_KERNEL); + if (!new_opp) { + dev_warn(dev, "%s: Unable to create new OPP node\n", __func__); + return -ENOMEM; + } + + /* Hold our list modification lock here */ + mutex_lock(&dev_opp_list_lock); + + /* Check for existing list for 'dev' */ + dev_opp = find_device_opp(dev); + if (IS_ERR(dev_opp)) { + /* + * Allocate a new device OPP table. In the infrequent case + * where a new device is needed to be added, we pay this + * penalty. + */ + dev_opp = kzalloc(sizeof(struct device_opp), GFP_KERNEL); + if (!dev_opp) { + mutex_unlock(&dev_opp_list_lock); + kfree(new_opp); + dev_warn(dev, + "%s: Unable to create device OPP structure\n", + __func__); + return -ENOMEM; + } + + dev_opp->dev = dev; + INIT_LIST_HEAD(&dev_opp->opp_list); + + /* Secure the device list modification */ + list_add_rcu(&dev_opp->node, &dev_opp_list); + } + + /* populate the opp table */ + new_opp->dev_opp = dev_opp; + new_opp->rate = freq; + new_opp->u_volt = u_volt; + new_opp->available = true; + + /* Insert new OPP in order of increasing frequency */ + head = &dev_opp->opp_list; + list_for_each_entry_rcu(opp, &dev_opp->opp_list, node) { + if (new_opp->rate < opp->rate) + break; + else + head = &opp->node; + } + + list_add_rcu(&new_opp->node, head); + mutex_unlock(&dev_opp_list_lock); + + return 0; +} + +/** + * opp_set_availability() - helper to set the availability of an opp + * @dev: device for which we do this operation + * @freq: OPP frequency to modify availability + * @availability_req: availability status requested for this opp + * + * Set the availability of an OPP with an RCU operation, opp_{enable,disable} + * share a common logic which is isolated here. + * + * Returns -EINVAL for bad pointers, -ENOMEM if no memory available for the + * copy operation, returns 0 if no modifcation was done OR modification was + * successful. + * + * Locking: The internal device_opp and opp structures are RCU protected. + * Hence this function internally uses RCU updater strategy with mutex locks to + * keep the integrity of the internal data structures. Callers should ensure + * that this function is *NOT* called under RCU protection or in contexts where + * mutex locking or synchronize_rcu() blocking calls cannot be used. + */ +static int opp_set_availability(struct device *dev, unsigned long freq, + bool availability_req) +{ + struct device_opp *tmp_dev_opp, *dev_opp = NULL; + struct opp *new_opp, *tmp_opp, *opp = ERR_PTR(-ENODEV); + int r = 0; + + /* keep the node allocated */ + new_opp = kmalloc(sizeof(struct opp), GFP_KERNEL); + if (!new_opp) { + dev_warn(dev, "%s: Unable to create OPP\n", __func__); + return -ENOMEM; + } + + mutex_lock(&dev_opp_list_lock); + + /* Find the device_opp */ + list_for_each_entry(tmp_dev_opp, &dev_opp_list, node) { + if (dev == tmp_dev_opp->dev) { + dev_opp = tmp_dev_opp; + break; + } + } + if (IS_ERR(dev_opp)) { + r = PTR_ERR(dev_opp); + dev_warn(dev, "%s: Device OPP not found (%d)\n", __func__, r); + goto unlock; + } + + /* Do we have the frequency? */ + list_for_each_entry(tmp_opp, &dev_opp->opp_list, node) { + if (tmp_opp->rate == freq) { + opp = tmp_opp; + break; + } + } + if (IS_ERR(opp)) { + r = PTR_ERR(opp); + goto unlock; + } + + /* Is update really needed? */ + if (opp->available == availability_req) + goto unlock; + /* copy the old data over */ + *new_opp = *opp; + + /* plug in new node */ + new_opp->available = availability_req; + + list_replace_rcu(&opp->node, &new_opp->node); + mutex_unlock(&dev_opp_list_lock); + synchronize_rcu(); + + /* clean up old opp */ + new_opp = opp; + goto out; + +unlock: + mutex_unlock(&dev_opp_list_lock); +out: + kfree(new_opp); + return r; +} + +/** + * opp_enable() - Enable a specific OPP + * @dev: device for which we do this operation + * @freq: OPP frequency to enable + * + * Enables a provided opp. If the operation is valid, this returns 0, else the + * corresponding error value. It is meant to be used for users an OPP available + * after being temporarily made unavailable with opp_disable. + * + * Locking: The internal device_opp and opp structures are RCU protected. + * Hence this function indirectly uses RCU and mutex locks to keep the + * integrity of the internal data structures. Callers should ensure that + * this function is *NOT* called under RCU protection or in contexts where + * mutex locking or synchronize_rcu() blocking calls cannot be used. + */ +int opp_enable(struct device *dev, unsigned long freq) +{ + return opp_set_availability(dev, freq, true); +} + +/** + * opp_disable() - Disable a specific OPP + * @dev: device for which we do this operation + * @freq: OPP frequency to disable + * + * Disables a provided opp. If the operation is valid, this returns + * 0, else the corresponding error value. It is meant to be a temporary + * control by users to make this OPP not available until the circumstances are + * right to make it available again (with a call to opp_enable). + * + * Locking: The internal device_opp and opp structures are RCU protected. + * Hence this function indirectly uses RCU and mutex locks to keep the + * integrity of the internal data structures. Callers should ensure that + * this function is *NOT* called under RCU protection or in contexts where + * mutex locking or synchronize_rcu() blocking calls cannot be used. + */ +int opp_disable(struct device *dev, unsigned long freq) +{ + return opp_set_availability(dev, freq, false); +} + +#ifdef CONFIG_CPU_FREQ +/** + * opp_init_cpufreq_table() - create a cpufreq table for a device + * @dev: device for which we do this operation + * @table: Cpufreq table returned back to caller + * + * Generate a cpufreq table for a provided device- this assumes that the + * opp list is already initialized and ready for usage. + * + * This function allocates required memory for the cpufreq table. It is + * expected that the caller does the required maintenance such as freeing + * the table as required. + * + * Returns -EINVAL for bad pointers, -ENODEV if the device is not found, -ENOMEM + * if no memory available for the operation (table is not populated), returns 0 + * if successful and table is populated. + * + * WARNING: It is important for the callers to ensure refreshing their copy of + * the table if any of the mentioned functions have been invoked in the interim. + * + * Locking: The internal device_opp and opp structures are RCU protected. + * To simplify the logic, we pretend we are updater and hold relevant mutex here + * Callers should ensure that this function is *NOT* called under RCU protection + * or in contexts where mutex locking cannot be used. + */ +int opp_init_cpufreq_table(struct device *dev, + struct cpufreq_frequency_table **table) +{ + struct device_opp *dev_opp; + struct opp *opp; + struct cpufreq_frequency_table *freq_table; + int i = 0; + + /* Pretend as if I am an updater */ + mutex_lock(&dev_opp_list_lock); + + dev_opp = find_device_opp(dev); + if (IS_ERR(dev_opp)) { + int r = PTR_ERR(dev_opp); + mutex_unlock(&dev_opp_list_lock); + dev_err(dev, "%s: Device OPP not found (%d)\n", __func__, r); + return r; + } + + freq_table = kzalloc(sizeof(struct cpufreq_frequency_table) * + (opp_get_opp_count(dev) + 1), GFP_KERNEL); + if (!freq_table) { + mutex_unlock(&dev_opp_list_lock); + dev_warn(dev, "%s: Unable to allocate frequency table\n", + __func__); + return -ENOMEM; + } + + list_for_each_entry(opp, &dev_opp->opp_list, node) { + if (opp->available) { + freq_table[i].index = i; + freq_table[i].frequency = opp->rate / 1000; + i++; + } + } + mutex_unlock(&dev_opp_list_lock); + + freq_table[i].index = i; + freq_table[i].frequency = CPUFREQ_TABLE_END; + + *table = &freq_table[0]; + + return 0; +} +#endif /* CONFIG_CPU_FREQ */ diff --git a/include/linux/opp.h b/include/linux/opp.h new file mode 100644 index 000000000000..5449945d589f --- /dev/null +++ b/include/linux/opp.h @@ -0,0 +1,105 @@ +/* + * Generic OPP Interface + * + * Copyright (C) 2009-2010 Texas Instruments Incorporated. + * Nishanth Menon + * Romit Dasgupta + * Kevin Hilman + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __LINUX_OPP_H__ +#define __LINUX_OPP_H__ + +#include +#include + +struct opp; + +#if defined(CONFIG_PM_OPP) + +unsigned long opp_get_voltage(struct opp *opp); + +unsigned long opp_get_freq(struct opp *opp); + +int opp_get_opp_count(struct device *dev); + +struct opp *opp_find_freq_exact(struct device *dev, unsigned long freq, + bool available); + +struct opp *opp_find_freq_floor(struct device *dev, unsigned long *freq); + +struct opp *opp_find_freq_ceil(struct device *dev, unsigned long *freq); + +int opp_add(struct device *dev, unsigned long freq, unsigned long u_volt); + +int opp_enable(struct device *dev, unsigned long freq); + +int opp_disable(struct device *dev, unsigned long freq); + +#else +static inline unsigned long opp_get_voltage(struct opp *opp) +{ + return 0; +} + +static inline unsigned long opp_get_freq(struct opp *opp) +{ + return 0; +} + +static inline int opp_get_opp_count(struct device *dev) +{ + return 0; +} + +static inline struct opp *opp_find_freq_exact(struct device *dev, + unsigned long freq, bool available) +{ + return ERR_PTR(-EINVAL); +} + +static inline struct opp *opp_find_freq_floor(struct device *dev, + unsigned long *freq) +{ + return ERR_PTR(-EINVAL); +} + +static inline struct opp *opp_find_freq_ceil(struct device *dev, + unsigned long *freq) +{ + return ERR_PTR(-EINVAL); +} + +static inline int opp_add(struct device *dev, unsigned long freq, + unsigned long u_volt) +{ + return -EINVAL; +} + +static inline int opp_enable(struct device *dev, unsigned long freq) +{ + return 0; +} + +static inline int opp_disable(struct device *dev, unsigned long freq) +{ + return 0; +} +#endif /* CONFIG_PM */ + +#if defined(CONFIG_CPU_FREQ) && defined(CONFIG_PM_OPP) +int opp_init_cpufreq_table(struct device *dev, + struct cpufreq_frequency_table **table); +#else +static inline int opp_init_cpufreq_table(struct device *dev, + struct cpufreq_frequency_table **table) +{ + return -EINVAL; +} +#endif /* CONFIG_CPU_FREQ */ + +#endif /* __LINUX_OPP_H__ */ diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index e45894c696ee..29bff6117abc 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -245,3 +245,17 @@ config PM_OPS bool depends on PM_SLEEP || PM_RUNTIME default y + +config PM_OPP + bool "Operating Performance Point (OPP) Layer library" + depends on PM + ---help--- + SOCs have a standard set of tuples consisting of frequency and + voltage pairs that the device will support per voltage domain. This + is called Operating Performance Point or OPP. The actual definitions + of OPP varies over silicon within the same family of devices. + + OPP layer organizes the data internally using device pointers + representing individual voltage domains and provides SOC + implementations a ready to use framework to manage OPPs. + For more information, read -- cgit v1.2.3 From 9c034392533f3e9f00656d5c58478cff2560ef81 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 19 Oct 2010 23:42:49 +0200 Subject: PM / Wakeup: Show wakeup sources statistics in debugfs There may be wakeup sources that aren't associated with any devices and their statistics information won't be available from sysfs. Also, for debugging purposes it is convenient to have all of the wakeup sources statistics available from one place. For these reasons, introduce new file "wakeup_sources" in debugfs containing those statistics. Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman --- drivers/base/power/wakeup.c | 85 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 03751a01dbad..71c5528e1c35 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include "power.h" @@ -617,3 +619,86 @@ bool pm_save_wakeup_count(unsigned int count) pm_wakeup_update_hit_counts(); return ret; } + +static struct dentry *wakeup_sources_stats_dentry; + +/** + * print_wakeup_source_stats - Print wakeup source statistics information. + * @m: seq_file to print the statistics into. + * @ws: Wakeup source object to print the statistics for. + */ +static int print_wakeup_source_stats(struct seq_file *m, + struct wakeup_source *ws) +{ + unsigned long flags; + ktime_t total_time; + ktime_t max_time; + unsigned long active_count; + ktime_t active_time; + int ret; + + spin_lock_irqsave(&ws->lock, flags); + + total_time = ws->total_time; + max_time = ws->max_time; + active_count = ws->active_count; + if (ws->active) { + active_time = ktime_sub(ktime_get(), ws->last_time); + total_time = ktime_add(total_time, active_time); + if (active_time.tv64 > max_time.tv64) + max_time = active_time; + } else { + active_time = ktime_set(0, 0); + } + + ret = seq_printf(m, "%-12s\t%lu\t\t%lu\t\t%lu\t\t" + "%lld\t\t%lld\t\t%lld\t\t%lld\n", + ws->name, active_count, ws->event_count, ws->hit_count, + ktime_to_ms(active_time), ktime_to_ms(total_time), + ktime_to_ms(max_time), ktime_to_ms(ws->last_time)); + + spin_unlock_irqrestore(&ws->lock, flags); + + return ret; +} + +/** + * wakeup_sources_stats_show - Print wakeup sources statistics information. + * @m: seq_file to print the statistics into. + */ +static int wakeup_sources_stats_show(struct seq_file *m, void *unused) +{ + struct wakeup_source *ws; + + seq_puts(m, "name\t\tactive_count\tevent_count\thit_count\t" + "active_since\ttotal_time\tmax_time\tlast_change\n"); + + rcu_read_lock(); + list_for_each_entry_rcu(ws, &wakeup_sources, entry) + print_wakeup_source_stats(m, ws); + rcu_read_unlock(); + + return 0; +} + +static int wakeup_sources_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, wakeup_sources_stats_show, NULL); +} + +static const struct file_operations wakeup_sources_stats_fops = { + .owner = THIS_MODULE, + .open = wakeup_sources_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init wakeup_sources_debugfs_init(void) +{ + wakeup_sources_stats_dentry = debugfs_create_file("wakeup_sources", + S_IRUGO, NULL, NULL, &wakeup_sources_stats_fops); + return 0; +} + +postcore_initcall(wakeup_sources_debugfs_init); -- cgit v1.2.3