From c3b0795c98c08351567464150db66d11e05d7611 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Tue, 10 May 2011 21:09:38 +0200 Subject: PM / ACPI: Remove acpi_sleep=s4_nonvs acpi_sleep=s4_nonvs is superseded by acpi_sleep=nonvs, so remove it. Signed-off-by: WANG Cong Acked-by: Pavel Machek Acked-by: Len Brown Signed-off-by: Rafael J. Wysocki --- Documentation/feature-removal-schedule.txt | 8 -------- Documentation/kernel-parameters.txt | 2 +- 2 files changed, 1 insertion(+), 9 deletions(-) (limited to 'Documentation') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 492e81df2968..f6a24e8aa11e 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -460,14 +460,6 @@ Who: Thomas Gleixner ---------------------------- -What: The acpi_sleep=s4_nonvs command line option -When: 2.6.37 -Files: arch/x86/kernel/acpi/sleep.c -Why: superseded by acpi_sleep=nonvs -Who: Rafael J. Wysocki - ----------------------------- - What: PCI DMA unmap state API When: August 2012 Why: PCI DMA unmap state API (include/linux/pci-dma.h) was replaced diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index cc85a9278190..259037b873b7 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -245,7 +245,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. acpi_sleep= [HW,ACPI] Sleep options Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig, - old_ordering, s4_nonvs, sci_force_enable } + old_ordering, nonvs, sci_force_enable } See Documentation/power/video.txt for information on s3_bios and s3_mode. s3_beep is for debugging; it makes the PC's speaker beep -- cgit v1.2.3 From ddeb648708108091a641adad0a438ec4fd8bf190 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 15 May 2011 11:38:48 +0200 Subject: PM / Hibernate: Add sysfs knob to control size of memory for drivers Martin reports that on his system hibernation occasionally fails due to the lack of memory, because the radeon driver apparently allocates too much of it during the device freeze stage. It turns out that the amount of memory allocated by radeon during hibernation (and presumably during system suspend too) depends on the utilization of the GPU (e.g. hibernating while there are two KDE 4 sessions with compositing enabled causes radeon to allocate more memory than for one KDE 4 session). In principle it should be possible to use image_size to make the memory preallocation mechanism free enough memory for the radeon driver, but in practice it is not easy to guess the right value because of the way the preallocation code uses image_size. For this reason, it seems reasonable to allow users to control the amount of memory reserved for driver allocations made after the hibernate preallocation, which currently is constant and amounts to 1 MB. Introduce a new sysfs file, /sys/power/reserved_size, whose value will be used as the amount of memory to reserve for the post-preallocation reservations made by device drivers, in bytes. For backwards compatibility, set its default (and initial) value to the currently used number (1 MB). References: https://bugzilla.kernel.org/show_bug.cgi?id=34102 Reported-and-tested-by: Martin Steigerwald Signed-off-by: Rafael J. Wysocki --- Documentation/ABI/testing/sysfs-power | 14 ++++++++++++++ kernel/power/hibernate.c | 23 +++++++++++++++++++++++ kernel/power/main.c | 1 + kernel/power/power.h | 4 ++++ kernel/power/snapshot.c | 25 ++++++++++++++++++++----- 5 files changed, 62 insertions(+), 5 deletions(-) (limited to 'Documentation') diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power index 194ca446ac28..b464d12761ba 100644 --- a/Documentation/ABI/testing/sysfs-power +++ b/Documentation/ABI/testing/sysfs-power @@ -158,3 +158,17 @@ Description: successful, will make the kernel abort a subsequent transition to a sleep state if any wakeup events are reported after the write has returned. + +What: /sys/power/reserved_size +Date: May 2011 +Contact: Rafael J. Wysocki +Description: + The /sys/power/reserved_size file allows user space to control + the amount of memory reserved for allocations made by device + drivers during the "device freeze" stage of hibernation. It can + be written a string representing a non-negative integer that + will be used as the amount of memory to reserve for allocations + made by device drivers' "freeze" callbacks, in bytes. + + Reading from this file will display the current value, which is + set to 1 MB by default. diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 50aae660174d..431721313b71 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -982,10 +982,33 @@ static ssize_t image_size_store(struct kobject *kobj, struct kobj_attribute *att power_attr(image_size); +static ssize_t reserved_size_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%lu\n", reserved_size); +} + +static ssize_t reserved_size_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + unsigned long size; + + if (sscanf(buf, "%lu", &size) == 1) { + reserved_size = size; + return n; + } + + return -EINVAL; +} + +power_attr(reserved_size); + static struct attribute * g[] = { &disk_attr.attr, &resume_attr.attr, &image_size_attr.attr, + &reserved_size_attr.attr, NULL, }; diff --git a/kernel/power/main.c b/kernel/power/main.c index de9aef8742f4..2981af4ce7cb 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -337,6 +337,7 @@ static int __init pm_init(void) if (error) return error; hibernate_image_size_init(); + hibernate_reserved_size_init(); power_kobj = kobject_create_and_add("power", NULL); if (!power_kobj) return -ENOMEM; diff --git a/kernel/power/power.h b/kernel/power/power.h index 03634be55f62..9a00a0a26280 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -15,6 +15,7 @@ struct swsusp_info { #ifdef CONFIG_HIBERNATION /* kernel/power/snapshot.c */ +extern void __init hibernate_reserved_size_init(void); extern void __init hibernate_image_size_init(void); #ifdef CONFIG_ARCH_HIBERNATION_HEADER @@ -55,6 +56,7 @@ extern int hibernation_platform_enter(void); #else /* !CONFIG_HIBERNATION */ +static inline void hibernate_reserved_size_init(void) {} static inline void hibernate_image_size_init(void) {} #endif /* !CONFIG_HIBERNATION */ @@ -72,6 +74,8 @@ static struct kobj_attribute _name##_attr = { \ /* Preferred image size in bytes (default 500 MB) */ extern unsigned long image_size; +/* Size of memory reserved for drivers (default SPARE_PAGES x PAGE_SIZE) */ +extern unsigned long reserved_size; extern int in_suspend; extern dev_t swsusp_resume_device; extern sector_t swsusp_resume_block; diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index ca0aacc24874..d69e3323a85d 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -40,6 +40,18 @@ static int swsusp_page_is_free(struct page *); static void swsusp_set_page_forbidden(struct page *); static void swsusp_unset_page_forbidden(struct page *); +/* + * Number of bytes to reserve for memory allocations made by device drivers + * from their ->freeze() and ->freeze_noirq() callbacks so that they don't + * cause image creation to fail (tunable via /sys/power/reserved_size). + */ +unsigned long reserved_size; + +void __init hibernate_reserved_size_init(void) +{ + reserved_size = SPARE_PAGES * PAGE_SIZE; +} + /* * Preferred image size in bytes (tunable via /sys/power/image_size). * When it is set to N, the image creating code will do its best to @@ -1263,11 +1275,13 @@ static unsigned long minimum_image_size(unsigned long saveable) * frame in use. We also need a number of page frames to be free during * hibernation for allocations made while saving the image and for device * drivers, in case they need to allocate memory from their hibernation - * callbacks (these two numbers are given by PAGES_FOR_IO and SPARE_PAGES, - * respectively, both of which are rough estimates). To make this happen, we - * compute the total number of available page frames and allocate at least + * callbacks (these two numbers are given by PAGES_FOR_IO (which is a rough + * estimate) and reserverd_size divided by PAGE_SIZE (which is tunable through + * /sys/power/reserved_size, respectively). To make this happen, we compute the + * total number of available page frames and allocate at least * - * ([page frames total] + PAGES_FOR_IO + [metadata pages]) / 2 + 2 * SPARE_PAGES + * ([page frames total] + PAGES_FOR_IO + [metadata pages]) / 2 + * + 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE) * * of them, which corresponds to the maximum size of a hibernation image. * @@ -1322,7 +1336,8 @@ int hibernate_preallocate_memory(void) count -= totalreserve_pages; /* Compute the maximum number of saveable pages to leave in memory. */ - max_size = (count - (size + PAGES_FOR_IO)) / 2 - 2 * SPARE_PAGES; + max_size = (count - (size + PAGES_FOR_IO)) / 2 + - 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE); /* Compute the desired number of image pages specified by image_size. */ size = DIV_ROUND_UP(image_size, PAGE_SIZE); if (size > max_size) -- cgit v1.2.3 From 91e7c75ba93c48a82670d630b9daac92ff70095d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 17 May 2011 23:26:00 +0200 Subject: PM: Allow drivers to allocate memory from .prepare() callbacks safely If device drivers allocate substantial amounts of memory (above 1 MB) in their hibernate .freeze() callbacks (or in their legacy suspend callbcks during hibernation), the subsequent creation of hibernate image may fail due to the lack of memory. This is the case, because the drivers' .freeze() callbacks are executed after the hibernate memory preallocation has been carried out and the preallocated amount of memory may be too small to cover the new driver allocations. Unfortunately, the drivers' .prepare() callbacks also are executed after the hibernate memory preallocation has completed, so they are not suitable for allocating additional memory either. Thus the only way a driver can safely allocate memory during hibernation is to use a hibernate/suspend notifier. However, the notifiers are called before the freezing of user space and the drivers wanting to use them for allocating additional memory may not know how much memory needs to be allocated at that point. To let device drivers overcome this difficulty rework the hibernation sequence so that the memory preallocation is carried out after the drivers' .prepare() callbacks have been executed, so that the .prepare() callbacks can be used for allocating additional memory to be used by the drivers' .freeze() callbacks. Update documentation to match the new behavior of the code. Signed-off-by: Rafael J. Wysocki --- Documentation/power/devices.txt | 14 +++++++---- Documentation/power/notifiers.txt | 51 ++++++++++++++++++--------------------- drivers/base/power/main.c | 18 +++++++++----- include/linux/pm.h | 4 +++ kernel/power/hibernate.c | 17 ++++++++++--- 5 files changed, 61 insertions(+), 43 deletions(-) (limited to 'Documentation') diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt index 1971bcf48a60..88880839ece4 100644 --- a/Documentation/power/devices.txt +++ b/Documentation/power/devices.txt @@ -279,11 +279,15 @@ When the system goes into the standby or memory sleep state, the phases are: time.) Unlike the other suspend-related phases, during the prepare phase the device tree is traversed top-down. - The prepare phase uses only a bus callback. After the callback method - returns, no new children may be registered below the device. The method - may also prepare the device or driver in some way for the upcoming - system power transition, but it should not put the device into a - low-power state. + In addition to that, if device drivers need to allocate additional + memory to be able to hadle device suspend correctly, that should be + done in the prepare phase. + + After the prepare callback method returns, no new children may be + registered below the device. The method may also prepare the device or + driver in some way for the upcoming system power transition (for + example, by allocating additional memory required for this purpose), but + it should not put the device into a low-power state. 2. The suspend methods should quiesce the device to stop it from performing I/O. They also may save the device registers and put it into the diff --git a/Documentation/power/notifiers.txt b/Documentation/power/notifiers.txt index cf980709122a..c2a4a346c0d9 100644 --- a/Documentation/power/notifiers.txt +++ b/Documentation/power/notifiers.txt @@ -1,46 +1,41 @@ Suspend notifiers - (C) 2007 Rafael J. Wysocki , GPL - -There are some operations that device drivers may want to carry out in their -.suspend() routines, but shouldn't, because they can cause the hibernation or -suspend to fail. For example, a driver may want to allocate a substantial amount -of memory (like 50 MB) in .suspend(), but that shouldn't be done after the -swsusp's memory shrinker has run. - -Also, there may be some operations, that subsystems want to carry out before a -hibernation/suspend or after a restore/resume, requiring the system to be fully -functional, so the drivers' .suspend() and .resume() routines are not suitable -for this purpose. For example, device drivers may want to upload firmware to -their devices after a restore from a hibernation image, but they cannot do it by -calling request_firmware() from their .resume() routines (user land processes -are frozen at this point). The solution may be to load the firmware into -memory before processes are frozen and upload it from there in the .resume() -routine. Of course, a hibernation notifier may be used for this purpose. - -The subsystems that have such needs can register suspend notifiers that will be -called upon the following events by the suspend core: + (C) 2007-2011 Rafael J. Wysocki , GPL + +There are some operations that subsystems or drivers may want to carry out +before hibernation/suspend or after restore/resume, but they require the system +to be fully functional, so the drivers' and subsystems' .suspend() and .resume() +or even .prepare() and .complete() callbacks are not suitable for this purpose. +For example, device drivers may want to upload firmware to their devices after +resume/restore, but they cannot do it by calling request_firmware() from their +.resume() or .complete() routines (user land processes are frozen at these +points). The solution may be to load the firmware into memory before processes +are frozen and upload it from there in the .resume() routine. +A suspend/hibernation notifier may be used for this purpose. + +The subsystems or drivers having such needs can register suspend notifiers that +will be called upon the following events by the PM core: PM_HIBERNATION_PREPARE The system is going to hibernate or suspend, tasks will be frozen immediately. PM_POST_HIBERNATION The system memory state has been restored from a - hibernation image or an error occurred during the - hibernation. Device drivers' .resume() callbacks have + hibernation image or an error occurred during + hibernation. Device drivers' restore callbacks have been executed and tasks have been thawed. PM_RESTORE_PREPARE The system is going to restore a hibernation image. - If all goes well the restored kernel will issue a + If all goes well, the restored kernel will issue a PM_POST_HIBERNATION notification. -PM_POST_RESTORE An error occurred during the hibernation restore. - Device drivers' .resume() callbacks have been executed +PM_POST_RESTORE An error occurred during restore from hibernation. + Device drivers' restore callbacks have been executed and tasks have been thawed. -PM_SUSPEND_PREPARE The system is preparing for a suspend. +PM_SUSPEND_PREPARE The system is preparing for suspend. PM_POST_SUSPEND The system has just resumed or an error occurred during - the suspend. Device drivers' .resume() callbacks have - been executed and tasks have been thawed. + suspend. Device drivers' resume callbacks have been + executed and tasks have been thawed. It is generally assumed that whatever the notifiers do for PM_HIBERNATION_PREPARE, should be undone for PM_POST_HIBERNATION. Analogously, diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 3b354560f306..aa6320207745 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -579,11 +579,13 @@ static bool is_async(struct device *dev) * Execute the appropriate "resume" callback for all devices whose status * indicates that they are suspended. */ -static void dpm_resume(pm_message_t state) +void dpm_resume(pm_message_t state) { struct device *dev; ktime_t starttime = ktime_get(); + might_sleep(); + mutex_lock(&dpm_list_mtx); pm_transition = state; async_error = 0; @@ -656,10 +658,12 @@ static void device_complete(struct device *dev, pm_message_t state) * Execute the ->complete() callbacks for all devices whose PM status is not * DPM_ON (this allows new devices to be registered). */ -static void dpm_complete(pm_message_t state) +void dpm_complete(pm_message_t state) { struct list_head list; + might_sleep(); + INIT_LIST_HEAD(&list); mutex_lock(&dpm_list_mtx); while (!list_empty(&dpm_prepared_list)) { @@ -688,7 +692,6 @@ static void dpm_complete(pm_message_t state) */ void dpm_resume_end(pm_message_t state) { - might_sleep(); dpm_resume(state); dpm_complete(state); } @@ -912,11 +915,13 @@ static int device_suspend(struct device *dev) * dpm_suspend - Execute "suspend" callbacks for all non-sysdev devices. * @state: PM transition of the system being carried out. */ -static int dpm_suspend(pm_message_t state) +int dpm_suspend(pm_message_t state) { ktime_t starttime = ktime_get(); int error = 0; + might_sleep(); + mutex_lock(&dpm_list_mtx); pm_transition = state; async_error = 0; @@ -1003,10 +1008,12 @@ static int device_prepare(struct device *dev, pm_message_t state) * * Execute the ->prepare() callback(s) for all devices. */ -static int dpm_prepare(pm_message_t state) +int dpm_prepare(pm_message_t state) { int error = 0; + might_sleep(); + mutex_lock(&dpm_list_mtx); while (!list_empty(&dpm_list)) { struct device *dev = to_device(dpm_list.next); @@ -1055,7 +1062,6 @@ int dpm_suspend_start(pm_message_t state) { int error; - might_sleep(); error = dpm_prepare(state); if (!error) error = dpm_suspend(state); diff --git a/include/linux/pm.h b/include/linux/pm.h index 3cc3e7e589f0..dce7c7148771 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -533,10 +533,14 @@ struct dev_power_domain { extern void device_pm_lock(void); extern void dpm_resume_noirq(pm_message_t state); extern void dpm_resume_end(pm_message_t state); +extern void dpm_resume(pm_message_t state); +extern void dpm_complete(pm_message_t state); extern void device_pm_unlock(void); extern int dpm_suspend_noirq(pm_message_t state); extern int dpm_suspend_start(pm_message_t state); +extern int dpm_suspend(pm_message_t state); +extern int dpm_prepare(pm_message_t state); extern void __suspend_report_result(const char *function, void *fn, int ret); diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 95a2ac40f48c..f9bec56d8825 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -327,20 +327,25 @@ static int create_image(int platform_mode) int hibernation_snapshot(int platform_mode) { + pm_message_t msg = PMSG_RECOVER; int error; error = platform_begin(platform_mode); if (error) goto Close; + error = dpm_prepare(PMSG_FREEZE); + if (error) + goto Complete_devices; + /* Preallocate image memory before shutting down devices. */ error = hibernate_preallocate_memory(); if (error) - goto Close; + goto Complete_devices; suspend_console(); pm_restrict_gfp_mask(); - error = dpm_suspend_start(PMSG_FREEZE); + error = dpm_suspend(PMSG_FREEZE); if (error) goto Recover_platform; @@ -358,13 +363,17 @@ int hibernation_snapshot(int platform_mode) if (error || !in_suspend) swsusp_free(); - dpm_resume_end(in_suspend ? - (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); + msg = in_suspend ? (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE; + dpm_resume(msg); if (error || !in_suspend) pm_restore_gfp_mask(); resume_console(); + + Complete_devices: + dpm_complete(msg); + Close: platform_end(platform_mode); return error; -- cgit v1.2.3