From a0acae0e886d44bd5ce6d2f173c1ace0fcf0d9f6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:22 -0800 Subject: freezer: unexport refrigerator() and update try_to_freeze() slightly There is no reason to export two functions for entering the refrigerator. Calling refrigerator() instead of try_to_freeze() doesn't save anything noticeable or removes any race condition. * Rename refrigerator() to __refrigerator() and make it return bool indicating whether it scheduled out for freezing. * Update try_to_freeze() to return bool and relay the return value of __refrigerator() if freezing(). * Convert all refrigerator() users to try_to_freeze(). * Update documentation accordingly. * While at it, add might_sleep() to try_to_freeze(). Signed-off-by: Tejun Heo Cc: Samuel Ortiz Cc: Chris Mason Cc: "Theodore Ts'o" Cc: Steven Whitehouse Cc: Andrew Morton Cc: Jan Kara Cc: KONISHI Ryusuke Cc: Christoph Hellwig --- include/linux/freezer.h | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index a5386e3ee756..7a9427e9fe47 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -47,18 +47,17 @@ static inline bool should_send_signal(struct task_struct *p) /* Takes and releases task alloc lock using task_lock() */ extern int thaw_process(struct task_struct *p); -extern void refrigerator(void); +extern bool __refrigerator(void); extern int freeze_processes(void); extern int freeze_kernel_threads(void); extern void thaw_processes(void); -static inline int try_to_freeze(void) +static inline bool try_to_freeze(void) { - if (freezing(current)) { - refrigerator(); - return 1; - } else - return 0; + might_sleep(); + if (likely(!freezing(current))) + return false; + return __refrigerator(); } extern bool freeze_task(struct task_struct *p, bool sig_only); @@ -181,12 +180,12 @@ static inline void set_freeze_flag(struct task_struct *p) {} static inline void clear_freeze_flag(struct task_struct *p) {} static inline int thaw_process(struct task_struct *p) { return 1; } -static inline void refrigerator(void) {} +static inline bool __refrigerator(void) { return false; } static inline int freeze_processes(void) { return -ENOSYS; } static inline int freeze_kernel_threads(void) { return -ENOSYS; } static inline void thaw_processes(void) {} -static inline int try_to_freeze(void) { return 0; } +static inline bool try_to_freeze(void) { return false; } static inline void freezer_do_not_count(void) {} static inline void freezer_count(void) {} -- cgit v1.2.3 From 8a32c441c1609f80e55df75422324a1151208f40 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:23 -0800 Subject: freezer: implement and use kthread_freezable_should_stop() Writeback and thinkpad_acpi have been using thaw_process() to prevent deadlock between the freezer and kthread_stop(); unfortunately, this is inherently racy - nothing prevents freezing from happening between thaw_process() and kthread_stop(). This patch implements kthread_freezable_should_stop() which enters refrigerator if necessary but is guaranteed to return if kthread_stop() is invoked. Both thaw_process() users are converted to use the new function. Note that this deadlock condition exists for many of freezable kthreads. They need to be converted to use the new should_stop or freezable workqueue. Tested with synthetic test case. Signed-off-by: Tejun Heo Acked-by: Henrique de Moraes Holschuh Cc: Jens Axboe Cc: Oleg Nesterov --- drivers/platform/x86/thinkpad_acpi.c | 15 ++++++--------- fs/fs-writeback.c | 4 +--- include/linux/freezer.h | 6 +++--- include/linux/kthread.h | 1 + kernel/freezer.c | 6 ++++-- kernel/kthread.c | 25 +++++++++++++++++++++++++ mm/backing-dev.c | 8 ++------ 7 files changed, 42 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 7b828680b21d..4b11fc91fa7d 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -2456,8 +2456,9 @@ static int hotkey_kthread(void *data) u32 poll_mask, event_mask; unsigned int si, so; unsigned long t; - unsigned int change_detector, must_reset; + unsigned int change_detector; unsigned int poll_freq; + bool was_frozen; mutex_lock(&hotkey_thread_mutex); @@ -2488,14 +2489,14 @@ static int hotkey_kthread(void *data) t = 100; /* should never happen... */ } t = msleep_interruptible(t); - if (unlikely(kthread_should_stop())) + if (unlikely(kthread_freezable_should_stop(&was_frozen))) break; - must_reset = try_to_freeze(); - if (t > 0 && !must_reset) + + if (t > 0 && !was_frozen) continue; mutex_lock(&hotkey_thread_data_mutex); - if (must_reset || hotkey_config_change != change_detector) { + if (was_frozen || hotkey_config_change != change_detector) { /* forget old state on thaw or config change */ si = so; t = 0; @@ -2528,10 +2529,6 @@ exit: static void hotkey_poll_stop_sync(void) { if (tpacpi_hotkey_task) { - if (frozen(tpacpi_hotkey_task) || - freezing(tpacpi_hotkey_task)) - thaw_process(tpacpi_hotkey_task); - kthread_stop(tpacpi_hotkey_task); tpacpi_hotkey_task = NULL; mutex_lock(&hotkey_thread_mutex); diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 73c3992b2bb4..271fde50f0ee 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -947,7 +947,7 @@ int bdi_writeback_thread(void *data) trace_writeback_thread_start(bdi); - while (!kthread_should_stop()) { + while (!kthread_freezable_should_stop(NULL)) { /* * Remove own delayed wake-up timer, since we are already awake * and we'll take care of the preriodic write-back. @@ -977,8 +977,6 @@ int bdi_writeback_thread(void *data) */ schedule(); } - - try_to_freeze(); } /* Flush any work that raced with us exiting */ diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 7a9427e9fe47..d02b78448b0f 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -47,7 +47,7 @@ static inline bool should_send_signal(struct task_struct *p) /* Takes and releases task alloc lock using task_lock() */ extern int thaw_process(struct task_struct *p); -extern bool __refrigerator(void); +extern bool __refrigerator(bool check_kthr_stop); extern int freeze_processes(void); extern int freeze_kernel_threads(void); extern void thaw_processes(void); @@ -57,7 +57,7 @@ static inline bool try_to_freeze(void) might_sleep(); if (likely(!freezing(current))) return false; - return __refrigerator(); + return __refrigerator(false); } extern bool freeze_task(struct task_struct *p, bool sig_only); @@ -180,7 +180,7 @@ static inline void set_freeze_flag(struct task_struct *p) {} static inline void clear_freeze_flag(struct task_struct *p) {} static inline int thaw_process(struct task_struct *p) { return 1; } -static inline bool __refrigerator(void) { return false; } +static inline bool __refrigerator(bool check_kthr_stop) { return false; } static inline int freeze_processes(void) { return -ENOSYS; } static inline int freeze_kernel_threads(void) { return -ENOSYS; } static inline void thaw_processes(void) {} diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 5cac19b3a266..0714b24c0e45 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -35,6 +35,7 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), void kthread_bind(struct task_struct *k, unsigned int cpu); int kthread_stop(struct task_struct *k); int kthread_should_stop(void); +bool kthread_freezable_should_stop(bool *was_frozen); void *kthread_data(struct task_struct *k); int kthreadd(void *unused); diff --git a/kernel/freezer.c b/kernel/freezer.c index 732f14f5944f..b83c30e9483a 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -9,6 +9,7 @@ #include #include #include +#include /* * freezing is complete, mark current process as frozen @@ -23,7 +24,7 @@ static inline void frozen_process(void) } /* Refrigerator is place where frozen processes are stored :-). */ -bool __refrigerator(void) +bool __refrigerator(bool check_kthr_stop) { /* Hmm, should we be allowed to suspend when there are realtime processes around? */ @@ -50,7 +51,8 @@ bool __refrigerator(void) for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); - if (!frozen(current)) + if (!frozen(current) || + (check_kthr_stop && kthread_should_stop())) break; was_frozen = true; schedule(); diff --git a/kernel/kthread.c b/kernel/kthread.c index b6d216a92639..1c36deaae2f1 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -58,6 +58,31 @@ int kthread_should_stop(void) } EXPORT_SYMBOL(kthread_should_stop); +/** + * kthread_freezable_should_stop - should this freezable kthread return now? + * @was_frozen: optional out parameter, indicates whether %current was frozen + * + * kthread_should_stop() for freezable kthreads, which will enter + * refrigerator if necessary. This function is safe from kthread_stop() / + * freezer deadlock and freezable kthreads should use this function instead + * of calling try_to_freeze() directly. + */ +bool kthread_freezable_should_stop(bool *was_frozen) +{ + bool frozen = false; + + might_sleep(); + + if (unlikely(freezing(current))) + frozen = __refrigerator(true); + + if (was_frozen) + *was_frozen = frozen; + + return kthread_should_stop(); +} +EXPORT_SYMBOL_GPL(kthread_freezable_should_stop); + /** * kthread_data - return data value specified on kthread creation * @task: kthread task in question diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 71034f41a2ba..7ba8feae11b8 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -600,14 +600,10 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi) /* * Finally, kill the kernel thread. We don't need to be RCU - * safe anymore, since the bdi is gone from visibility. Force - * unfreeze of the thread before calling kthread_stop(), otherwise - * it would never exet if it is currently stuck in the refrigerator. + * safe anymore, since the bdi is gone from visibility. */ - if (bdi->wb.task) { - thaw_process(bdi->wb.task); + if (bdi->wb.task) kthread_stop(bdi->wb.task); - } } /* -- cgit v1.2.3 From a5be2d0d1a8746e7be5210e3d6b904455000443c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:23 -0800 Subject: freezer: rename thaw_process() to __thaw_task() and simplify the implementation thaw_process() now has only internal users - system and cgroup freezers. Remove the unnecessary return value, rename, unexport and collapse __thaw_process() into it. This will help further updates to the freezer code. -v3: oom_kill grew a use of thaw_process() while this patch was pending. Convert it to use __thaw_task() for now. In the longer term, this should be handled by allowing tasks to die if killed even if it's frozen. -v2: minor style update as suggested by Matt. Signed-off-by: Tejun Heo Cc: Paul Menage Cc: Matt Helsley --- include/linux/freezer.h | 3 +-- kernel/cgroup_freezer.c | 7 +++---- kernel/freezer.c | 31 ++++++++++++------------------- kernel/power/process.c | 2 +- mm/oom_kill.c | 2 +- 5 files changed, 18 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index d02b78448b0f..ba4f512d2938 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -45,7 +45,7 @@ static inline bool should_send_signal(struct task_struct *p) } /* Takes and releases task alloc lock using task_lock() */ -extern int thaw_process(struct task_struct *p); +extern void __thaw_task(struct task_struct *t); extern bool __refrigerator(bool check_kthr_stop); extern int freeze_processes(void); @@ -178,7 +178,6 @@ static inline int frozen(struct task_struct *p) { return 0; } static inline int freezing(struct task_struct *p) { return 0; } static inline void set_freeze_flag(struct task_struct *p) {} static inline void clear_freeze_flag(struct task_struct *p) {} -static inline int thaw_process(struct task_struct *p) { return 1; } static inline bool __refrigerator(bool check_kthr_stop) { return false; } static inline int freeze_processes(void) { return -ENOSYS; } diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index 5e828a2ca8e6..a6d405a86ee0 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -130,7 +130,7 @@ struct cgroup_subsys freezer_subsys; * write_lock css_set_lock (cgroup iterator start) * task->alloc_lock * read_lock css_set_lock (cgroup iterator start) - * task->alloc_lock (inside thaw_process(), prevents race with refrigerator()) + * task->alloc_lock (inside __thaw_task(), prevents race with refrigerator()) * sighand->siglock */ static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss, @@ -300,9 +300,8 @@ static void unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) struct task_struct *task; cgroup_iter_start(cgroup, &it); - while ((task = cgroup_iter_next(cgroup, &it))) { - thaw_process(task); - } + while ((task = cgroup_iter_next(cgroup, &it))) + __thaw_task(task); cgroup_iter_end(cgroup, &it); freezer->state = CGROUP_THAWED; diff --git a/kernel/freezer.c b/kernel/freezer.c index b83c30e9483a..c851d588e29f 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -145,18 +145,8 @@ void cancel_freezing(struct task_struct *p) } } -static int __thaw_process(struct task_struct *p) -{ - if (frozen(p)) { - p->flags &= ~PF_FROZEN; - return 1; - } - clear_freeze_flag(p); - return 0; -} - /* - * Wake up a frozen process + * Wake up a frozen task * * task_lock() is needed to prevent the race with refrigerator() which may * occur if the freezing of tasks fails. Namely, without the lock, if the @@ -164,15 +154,18 @@ static int __thaw_process(struct task_struct *p) * refrigerator() could call frozen_process(), in which case the task would be * frozen and no one would thaw it. */ -int thaw_process(struct task_struct *p) +void __thaw_task(struct task_struct *p) { + bool was_frozen; + task_lock(p); - if (__thaw_process(p) == 1) { - task_unlock(p); - wake_up_process(p); - return 1; - } + was_frozen = frozen(p); + if (was_frozen) + p->flags &= ~PF_FROZEN; + else + clear_freeze_flag(p); task_unlock(p); - return 0; + + if (was_frozen) + wake_up_process(p); } -EXPORT_SYMBOL(thaw_process); diff --git a/kernel/power/process.c b/kernel/power/process.c index addbbe5531bc..fe2787207f00 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -186,7 +186,7 @@ static void thaw_tasks(bool nosig_only) if (cgroup_freezing_or_frozen(p)) continue; - thaw_process(p); + __thaw_task(p); } while_each_thread(g, p); read_unlock(&tasklist_lock); } diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 76f2c5ae908e..3134ee2fb2e8 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -328,7 +328,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints, */ if (test_tsk_thread_flag(p, TIF_MEMDIE)) { if (unlikely(frozen(p))) - thaw_process(p); + __thaw_task(p); return ERR_PTR(-1UL); } if (!p->mm) -- cgit v1.2.3 From 376fede80e74d98b49d1ba9ac18f23c9fd026ddd Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:24 -0800 Subject: freezer: kill PF_FREEZING With the previous changes, there's no meaningful difference between PF_FREEZING and PF_FROZEN. Remove PF_FREEZING and use PF_FROZEN instead in task_contributes_to_load(). Signed-off-by: Tejun Heo --- include/linux/sched.h | 3 +-- kernel/freezer.c | 6 ------ 2 files changed, 1 insertion(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 68daf4f27e2c..d12bd03b688f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -220,7 +220,7 @@ extern char ___assert_task_state[1 - 2*!!( ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0) #define task_contributes_to_load(task) \ ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \ - (task->flags & PF_FREEZING) == 0) + (task->flags & PF_FROZEN) == 0) #define __set_task_state(tsk, state_value) \ do { (tsk)->state = (state_value); } while (0) @@ -1773,7 +1773,6 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * #define PF_MEMALLOC 0x00000800 /* Allocating memory */ #define PF_NPROC_EXCEEDED 0x00001000 /* set_user noticed that RLIMIT_NPROC was exceeded */ #define PF_USED_MATH 0x00002000 /* if unset the fpu must be initialized before use */ -#define PF_FREEZING 0x00004000 /* freeze in progress. do not account to load */ #define PF_NOFREEZE 0x00008000 /* this thread should not be frozen */ #define PF_FROZEN 0x00010000 /* frozen for system suspend */ #define PF_FSTRANS 0x00020000 /* inside a filesystem transaction */ diff --git a/kernel/freezer.c b/kernel/freezer.c index a257ecd37c48..b8b562124ba9 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -44,9 +44,6 @@ repeat: recalc_sigpending(); /* We sent fake signal, clean it up */ spin_unlock_irq(¤t->sighand->siglock); - /* prevent accounting of that task to load */ - current->flags |= PF_FREEZING; - for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); if (!freezing(current) || @@ -56,9 +53,6 @@ repeat: schedule(); } - /* Remove the accounting blocker */ - current->flags &= ~PF_FREEZING; - /* leave FROZEN */ spin_lock_irq(&freezer_lock); if (freezing(current)) -- cgit v1.2.3 From 03afed8bc296fa70186ba832c1126228bb992465 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:24 -0800 Subject: freezer: clean up freeze_processes() failure path freeze_processes() failure path is rather messy. Freezing is canceled for workqueues and tasks which aren't frozen yet but frozen tasks are left alone and should be thawed by the caller and of course some callers (xen and kexec) didn't do it. This patch updates __thaw_task() to handle cancelation correctly and makes freeze_processes() and freeze_kernel_threads() call thaw_processes() on failure instead so that the system is fully thawed on failure. Unnecessary [suspend_]thaw_processes() calls are removed from kernel/power/hibernate.c, suspend.c and user.c. While at it, restructure error checking if clause in suspend_prepare() to be less weird. -v2: Srivatsa spotted missing removal of suspend_thaw_processes() in suspend_prepare() and error in commit message. Updated. Signed-off-by: Tejun Heo Acked-by: Srivatsa S. Bhat --- include/linux/freezer.h | 1 - kernel/freezer.c | 25 +++++++++---------------- kernel/power/hibernate.c | 15 ++------------- kernel/power/process.c | 16 ++++++++-------- kernel/power/suspend.c | 8 +++----- kernel/power/user.c | 4 +--- 6 files changed, 23 insertions(+), 46 deletions(-) (limited to 'include') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index ba4f512d2938..93f411a52872 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -61,7 +61,6 @@ static inline bool try_to_freeze(void) } extern bool freeze_task(struct task_struct *p, bool sig_only); -extern void cancel_freezing(struct task_struct *p); #ifdef CONFIG_CGROUP_FREEZER extern int cgroup_freezing_or_frozen(struct task_struct *task); diff --git a/kernel/freezer.c b/kernel/freezer.c index b8b562124ba9..11e32d419dec 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -129,21 +129,6 @@ out_unlock: return ret; } -void cancel_freezing(struct task_struct *p) -{ - unsigned long flags; - - spin_lock_irqsave(&freezer_lock, flags); - if (freezing(p)) { - pr_debug(" clean up: %s\n", p->comm); - clear_freeze_flag(p); - spin_lock(&p->sighand->siglock); - recalc_sigpending_and_wake(p); - spin_unlock(&p->sighand->siglock); - } - spin_unlock_irqrestore(&freezer_lock, flags); -} - void __thaw_task(struct task_struct *p) { unsigned long flags; @@ -153,10 +138,18 @@ void __thaw_task(struct task_struct *p) * be visible to @p as waking up implies wmb. Waking up inside * freezer_lock also prevents wakeups from leaking outside * refrigerator. + * + * If !FROZEN, @p hasn't reached refrigerator, recalc sigpending to + * avoid leaving dangling TIF_SIGPENDING behind. */ spin_lock_irqsave(&freezer_lock, flags); clear_freeze_flag(p); - if (frozen(p)) + if (frozen(p)) { wake_up_process(p); + } else { + spin_lock(&p->sighand->siglock); + recalc_sigpending_and_wake(p); + spin_unlock(&p->sighand->siglock); + } spin_unlock_irqrestore(&freezer_lock, flags); } diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 196c01268ebd..ba2319ffc860 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -607,17 +607,6 @@ static void power_down(void) while(1); } -static int prepare_processes(void) -{ - int error = 0; - - if (freeze_processes()) { - error = -EBUSY; - thaw_processes(); - } - return error; -} - /** * hibernate - Carry out system hibernation, including saving the image. */ @@ -650,7 +639,7 @@ int hibernate(void) sys_sync(); printk("done.\n"); - error = prepare_processes(); + error = freeze_processes(); if (error) goto Finish; @@ -811,7 +800,7 @@ static int software_resume(void) goto close_finish; pr_debug("PM: Preparing processes for restore.\n"); - error = prepare_processes(); + error = freeze_processes(); if (error) { swsusp_close(FMODE_READ); goto Done; diff --git a/kernel/power/process.c b/kernel/power/process.c index e59676f5811d..ce643838a00c 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -91,11 +91,6 @@ static int try_to_freeze_tasks(bool sig_only) elapsed_csecs = elapsed_csecs64; if (todo) { - /* This does not unfreeze processes that are already frozen - * (we have slightly ugly calling convention in that respect, - * and caller must call thaw_processes() if something fails), - * but it cleans up leftover PF_FREEZE requests. - */ printk("\n"); printk(KERN_ERR "Freezing of tasks %s after %d.%02d seconds " "(%d tasks refusing to freeze, wq_busy=%d):\n", @@ -103,14 +98,11 @@ static int try_to_freeze_tasks(bool sig_only) elapsed_csecs / 100, elapsed_csecs % 100, todo - wq_busy, wq_busy); - thaw_workqueues(); - read_lock(&tasklist_lock); do_each_thread(g, p) { if (!wakeup && !freezer_should_skip(p) && freezing(p) && !frozen(p)) sched_show_task(p); - cancel_freezing(p); } while_each_thread(g, p); read_unlock(&tasklist_lock); } else { @@ -123,6 +115,8 @@ static int try_to_freeze_tasks(bool sig_only) /** * freeze_processes - Signal user space processes to enter the refrigerator. + * + * On success, returns 0. On failure, -errno and system is fully thawed. */ int freeze_processes(void) { @@ -137,11 +131,15 @@ int freeze_processes(void) printk("\n"); BUG_ON(in_atomic()); + if (error) + thaw_processes(); return error; } /** * freeze_kernel_threads - Make freezable kernel threads go to the refrigerator. + * + * On success, returns 0. On failure, -errno and system is fully thawed. */ int freeze_kernel_threads(void) { @@ -155,6 +153,8 @@ int freeze_kernel_threads(void) printk("\n"); BUG_ON(in_atomic()); + if (error) + thaw_processes(); return error; } diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 4953dc054c53..d336b27d1104 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -106,13 +106,11 @@ static int suspend_prepare(void) goto Finish; error = suspend_freeze_processes(); - if (error) { - suspend_stats.failed_freeze++; - dpm_save_failed_step(SUSPEND_FREEZE); - } else + if (!error) return 0; - suspend_thaw_processes(); + suspend_stats.failed_freeze++; + dpm_save_failed_step(SUSPEND_FREEZE); usermodehelper_enable(); Finish: pm_notifier_call_chain(PM_POST_SUSPEND); diff --git a/kernel/power/user.c b/kernel/power/user.c index 6d8f535c2b88..7cc3f5bc5c24 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -257,10 +257,8 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, break; error = freeze_processes(); - if (error) { - thaw_processes(); + if (error) usermodehelper_enable(); - } if (!error) data->frozen = 1; break; -- cgit v1.2.3 From 22b4e111fa01a1147aa562ceaf18a752a928ef4e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:25 -0800 Subject: cgroup_freezer: prepare for removal of TIF_FREEZE TIF_FREEZE will be removed soon and freezing() will directly test whether any freezing condition is in effect. Make the following changes in preparation. * Rename cgroup_freezing_or_frozen() to cgroup_freezing() and make it return bool. * Make cgroup_freezing() access task_freezer() under rcu read lock instead of task_lock(). This makes the state dereferencing racy against task moving to another cgroup; however, it was already racy without this change as ->state dereference wasn't synchronized. This will be later dealt with using attach hooks. * freezer->state is now set before trying to push tasks into the target state. -v2: Oleg pointed out that freeze_change_state() was setting freeze->state incorrectly to CGROUP_FROZEN instead of CGROUP_FREEZING. Fixed. -v3: Matt pointed out that setting CGROUP_FROZEN used to always invoke try_to_freeze_cgroup() regardless of the current state. Patch updated such that the actual freeze/thaw operations are always performed on invocation. This shouldn't make any difference unless something is broken. Signed-off-by: Tejun Heo Acked-by: Paul Menage Cc: Li Zefan Cc: Oleg Nesterov --- include/linux/freezer.h | 6 +++--- kernel/cgroup_freezer.c | 40 +++++++++++++--------------------------- kernel/power/process.c | 2 +- 3 files changed, 17 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 93f411a52872..b2b4abc5a739 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -63,11 +63,11 @@ static inline bool try_to_freeze(void) extern bool freeze_task(struct task_struct *p, bool sig_only); #ifdef CONFIG_CGROUP_FREEZER -extern int cgroup_freezing_or_frozen(struct task_struct *task); +extern bool cgroup_freezing(struct task_struct *task); #else /* !CONFIG_CGROUP_FREEZER */ -static inline int cgroup_freezing_or_frozen(struct task_struct *task) +static inline bool cgroup_freezing(struct task_struct *task) { - return 0; + return false; } #endif /* !CONFIG_CGROUP_FREEZER */ diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index cd27b0825560..e6a1b8d1b8bc 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -48,19 +48,17 @@ static inline struct freezer *task_freezer(struct task_struct *task) struct freezer, css); } -static inline int __cgroup_freezing_or_frozen(struct task_struct *task) +bool cgroup_freezing(struct task_struct *task) { - enum freezer_state state = task_freezer(task)->state; - return (state == CGROUP_FREEZING) || (state == CGROUP_FROZEN); -} + enum freezer_state state; + bool ret; -int cgroup_freezing_or_frozen(struct task_struct *task) -{ - int result; - task_lock(task); - result = __cgroup_freezing_or_frozen(task); - task_unlock(task); - return result; + rcu_read_lock(); + state = task_freezer(task)->state; + ret = state == CGROUP_FREEZING || state == CGROUP_FROZEN; + rcu_read_unlock(); + + return ret; } /* @@ -102,9 +100,6 @@ struct cgroup_subsys freezer_subsys; * freezer_can_attach(): * cgroup_mutex (held by caller of can_attach) * - * cgroup_freezing_or_frozen(): - * task->alloc_lock (to get task's cgroup) - * * freezer_fork() (preserving fork() performance means can't take cgroup_mutex): * freezer->lock * sighand->siglock (if the cgroup is freezing) @@ -177,13 +172,7 @@ static int freezer_can_attach(struct cgroup_subsys *ss, static int freezer_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk) { - rcu_read_lock(); - if (__cgroup_freezing_or_frozen(tsk)) { - rcu_read_unlock(); - return -EBUSY; - } - rcu_read_unlock(); - return 0; + return cgroup_freezing(tsk) ? -EBUSY : 0; } static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task) @@ -279,7 +268,6 @@ static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) struct task_struct *task; unsigned int num_cant_freeze_now = 0; - freezer->state = CGROUP_FREEZING; cgroup_iter_start(cgroup, &it); while ((task = cgroup_iter_next(cgroup, &it))) { if (!freeze_task(task, true)) @@ -303,8 +291,6 @@ static void unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) while ((task = cgroup_iter_next(cgroup, &it))) __thaw_task(task); cgroup_iter_end(cgroup, &it); - - freezer->state = CGROUP_THAWED; } static int freezer_change_state(struct cgroup *cgroup, @@ -318,20 +304,20 @@ static int freezer_change_state(struct cgroup *cgroup, spin_lock_irq(&freezer->lock); update_if_frozen(cgroup, freezer); - if (goal_state == freezer->state) - goto out; switch (goal_state) { case CGROUP_THAWED: + freezer->state = CGROUP_THAWED; unfreeze_cgroup(cgroup, freezer); break; case CGROUP_FROZEN: + freezer->state = CGROUP_FREEZING; retval = try_to_freeze_cgroup(cgroup, freezer); break; default: BUG(); } -out: + spin_unlock_irq(&freezer->lock); return retval; diff --git a/kernel/power/process.c b/kernel/power/process.c index ce643838a00c..9f6f5c755cfa 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -170,7 +170,7 @@ void thaw_processes(void) read_lock(&tasklist_lock); do_each_thread(g, p) { - if (cgroup_freezing_or_frozen(p)) + if (cgroup_freezing(p)) continue; __thaw_task(p); -- cgit v1.2.3 From a3201227f803ad7fd43180c5195dbe5a2bf998aa Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:25 -0800 Subject: freezer: make freezing() test freeze conditions in effect instead of TIF_FREEZE Using TIF_FREEZE for freezing worked when there was only single freezing condition (the PM one); however, now there is also the cgroup_freezer and single bit flag is getting clumsy. thaw_processes() is already testing whether cgroup freezing in in effect to avoid thawing tasks which were frozen by both PM and cgroup freezers. This is racy (nothing prevents race against cgroup freezing) and fragile. A much simpler way is to test actual freeze conditions from freezing() - ie. directly test whether PM or cgroup freezing is in effect. This patch adds variables to indicate whether and what type of freezing conditions are in effect and reimplements freezing() such that it directly tests whether any of the two freezing conditions is active and the task should freeze. On fast path, freezing() is still very cheap - it only tests system_freezing_cnt. This makes the clumsy dancing aroung TIF_FREEZE unnecessary and freeze/thaw operations more usual - updating state variables for the new state and nudging target tasks so that they notice the new state and comply. As long as the nudging happens after state update, it's race-free. * This allows use of freezing() in freeze_task(). Replace the open coded tests with freezing(). * p != current test is added to warning printing conditions in try_to_freeze_tasks() failure path. This is necessary as freezing() is now true for the task which initiated freezing too. -v2: Oleg pointed out that re-freezing FROZEN cgroup could increment system_freezing_cnt. Fixed. Signed-off-by: Tejun Heo Acked-by: Paul Menage (for the cgroup portions) --- include/linux/freezer.h | 33 ++++++++++---------------- kernel/cgroup_freezer.c | 10 +++++++- kernel/fork.c | 1 - kernel/freezer.c | 62 +++++++++++++++++++++++++++++++------------------ kernel/power/process.c | 15 ++++++++---- 5 files changed, 72 insertions(+), 49 deletions(-) (limited to 'include') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index b2b4abc5a739..8e29f2b7ce11 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -5,8 +5,13 @@ #include #include +#include #ifdef CONFIG_FREEZER +extern atomic_t system_freezing_cnt; /* nr of freezing conds in effect */ +extern bool pm_freezing; /* PM freezing in effect */ +extern bool pm_nosig_freezing; /* PM nosig freezing in effect */ + /* * Check if a process has been frozen */ @@ -15,28 +20,16 @@ static inline int frozen(struct task_struct *p) return p->flags & PF_FROZEN; } -/* - * Check if there is a request to freeze a process - */ -static inline int freezing(struct task_struct *p) -{ - return test_tsk_thread_flag(p, TIF_FREEZE); -} +extern bool freezing_slow_path(struct task_struct *p); /* - * Request that a process be frozen - */ -static inline void set_freeze_flag(struct task_struct *p) -{ - set_tsk_thread_flag(p, TIF_FREEZE); -} - -/* - * Sometimes we may need to cancel the previous 'freeze' request + * Check if there is a request to freeze a process */ -static inline void clear_freeze_flag(struct task_struct *p) +static inline bool freezing(struct task_struct *p) { - clear_tsk_thread_flag(p, TIF_FREEZE); + if (likely(!atomic_read(&system_freezing_cnt))) + return false; + return freezing_slow_path(p); } static inline bool should_send_signal(struct task_struct *p) @@ -174,9 +167,7 @@ static inline void set_freezable_with_signal(void) }) #else /* !CONFIG_FREEZER */ static inline int frozen(struct task_struct *p) { return 0; } -static inline int freezing(struct task_struct *p) { return 0; } -static inline void set_freeze_flag(struct task_struct *p) {} -static inline void clear_freeze_flag(struct task_struct *p) {} +static inline bool freezing(struct task_struct *p) { return false; } static inline bool __refrigerator(bool check_kthr_stop) { return false; } static inline int freeze_processes(void) { return -ENOSYS; } diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index e6a1b8d1b8bc..2327ad11725f 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -145,7 +145,11 @@ static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss, static void freezer_destroy(struct cgroup_subsys *ss, struct cgroup *cgroup) { - kfree(cgroup_freezer(cgroup)); + struct freezer *freezer = cgroup_freezer(cgroup); + + if (freezer->state != CGROUP_THAWED) + atomic_dec(&system_freezing_cnt); + kfree(freezer); } /* @@ -307,10 +311,14 @@ static int freezer_change_state(struct cgroup *cgroup, switch (goal_state) { case CGROUP_THAWED: + if (freezer->state != CGROUP_THAWED) + atomic_dec(&system_freezing_cnt); freezer->state = CGROUP_THAWED; unfreeze_cgroup(cgroup, freezer); break; case CGROUP_FROZEN: + if (freezer->state == CGROUP_THAWED) + atomic_inc(&system_freezing_cnt); freezer->state = CGROUP_FREEZING; retval = try_to_freeze_cgroup(cgroup, freezer); break; diff --git a/kernel/fork.c b/kernel/fork.c index ba0d17261329..d53316e88d9d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -997,7 +997,6 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p) new_flags |= PF_FORKNOEXEC; new_flags |= PF_STARTING; p->flags = new_flags; - clear_freeze_flag(p); } SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr) diff --git a/kernel/freezer.c b/kernel/freezer.c index 11e32d419dec..f53cd5aa5b2e 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -11,9 +11,41 @@ #include #include +/* total number of freezing conditions in effect */ +atomic_t system_freezing_cnt = ATOMIC_INIT(0); +EXPORT_SYMBOL(system_freezing_cnt); + +/* indicate whether PM freezing is in effect, protected by pm_mutex */ +bool pm_freezing; +bool pm_nosig_freezing; + /* protects freezing and frozen transitions */ static DEFINE_SPINLOCK(freezer_lock); +/** + * freezing_slow_path - slow path for testing whether a task needs to be frozen + * @p: task to be tested + * + * This function is called by freezing() if system_freezing_cnt isn't zero + * and tests whether @p needs to enter and stay in frozen state. Can be + * called under any context. The freezers are responsible for ensuring the + * target tasks see the updated state. + */ +bool freezing_slow_path(struct task_struct *p) +{ + if (p->flags & PF_NOFREEZE) + return false; + + if (pm_nosig_freezing || cgroup_freezing(p)) + return true; + + if (pm_freezing && !(p->flags & PF_FREEZER_NOSIG)) + return true; + + return false; +} +EXPORT_SYMBOL(freezing_slow_path); + /* Refrigerator is place where frozen processes are stored :-). */ bool __refrigerator(bool check_kthr_stop) { @@ -23,17 +55,11 @@ bool __refrigerator(bool check_kthr_stop) long save; /* - * Enter FROZEN. If NOFREEZE, schedule immediate thawing by - * clearing freezing. + * No point in checking freezing() again - the caller already did. + * Proceed to enter FROZEN. */ spin_lock_irq(&freezer_lock); repeat: - if (!freezing(current)) { - spin_unlock_irq(&freezer_lock); - return was_frozen; - } - if (current->flags & PF_NOFREEZE) - clear_freeze_flag(current); current->flags |= PF_FROZEN; spin_unlock_irq(&freezer_lock); @@ -99,18 +125,12 @@ static void fake_signal_wake_up(struct task_struct *p) bool freeze_task(struct task_struct *p, bool sig_only) { unsigned long flags; - bool ret = false; spin_lock_irqsave(&freezer_lock, flags); - - if ((p->flags & PF_NOFREEZE) || - (sig_only && !should_send_signal(p))) - goto out_unlock; - - if (frozen(p)) - goto out_unlock; - - set_freeze_flag(p); + if (!freezing(p) || frozen(p)) { + spin_unlock_irqrestore(&freezer_lock, flags); + return false; + } if (should_send_signal(p)) { fake_signal_wake_up(p); @@ -123,10 +143,9 @@ bool freeze_task(struct task_struct *p, bool sig_only) } else { wake_up_state(p, TASK_INTERRUPTIBLE); } - ret = true; -out_unlock: + spin_unlock_irqrestore(&freezer_lock, flags); - return ret; + return true; } void __thaw_task(struct task_struct *p) @@ -143,7 +162,6 @@ void __thaw_task(struct task_struct *p) * avoid leaving dangling TIF_SIGPENDING behind. */ spin_lock_irqsave(&freezer_lock, flags); - clear_freeze_flag(p); if (frozen(p)) { wake_up_process(p); } else { diff --git a/kernel/power/process.c b/kernel/power/process.c index 9f6f5c755cfa..0beb51e1dec9 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -101,7 +101,7 @@ static int try_to_freeze_tasks(bool sig_only) read_lock(&tasklist_lock); do_each_thread(g, p) { if (!wakeup && !freezer_should_skip(p) && - freezing(p) && !frozen(p)) + p != current && freezing(p) && !frozen(p)) sched_show_task(p); } while_each_thread(g, p); read_unlock(&tasklist_lock); @@ -122,7 +122,11 @@ int freeze_processes(void) { int error; + if (!pm_freezing) + atomic_inc(&system_freezing_cnt); + printk("Freezing user space processes ... "); + pm_freezing = true; error = try_to_freeze_tasks(true); if (!error) { printk("done."); @@ -146,6 +150,7 @@ int freeze_kernel_threads(void) int error; printk("Freezing remaining freezable tasks ... "); + pm_nosig_freezing = true; error = try_to_freeze_tasks(false); if (!error) printk("done."); @@ -162,6 +167,11 @@ void thaw_processes(void) { struct task_struct *g, *p; + if (pm_freezing) + atomic_dec(&system_freezing_cnt); + pm_freezing = false; + pm_nosig_freezing = false; + oom_killer_enable(); printk("Restarting tasks ... "); @@ -170,9 +180,6 @@ void thaw_processes(void) read_lock(&tasklist_lock); do_each_thread(g, p) { - if (cgroup_freezing(p)) - continue; - __thaw_task(p); } while_each_thread(g, p); read_unlock(&tasklist_lock); -- cgit v1.2.3 From 948246f70a811c872b9d93bb4a8ab5823c4c79e0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:25 -0800 Subject: freezer: remove should_send_signal() and update frozen() should_send_signal() is only used in freezer.c. Exporting them only increases chance of abuse. Open code the two users and remove it. Update frozen() to return bool. Signed-off-by: Tejun Heo --- include/linux/freezer.h | 9 ++------- kernel/freezer.c | 2 +- 2 files changed, 3 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 8e29f2b7ce11..3d50913d39d0 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -15,7 +15,7 @@ extern bool pm_nosig_freezing; /* PM nosig freezing in effect */ /* * Check if a process has been frozen */ -static inline int frozen(struct task_struct *p) +static inline bool frozen(struct task_struct *p) { return p->flags & PF_FROZEN; } @@ -32,11 +32,6 @@ static inline bool freezing(struct task_struct *p) return freezing_slow_path(p); } -static inline bool should_send_signal(struct task_struct *p) -{ - return !(p->flags & PF_FREEZER_NOSIG); -} - /* Takes and releases task alloc lock using task_lock() */ extern void __thaw_task(struct task_struct *t); @@ -166,7 +161,7 @@ static inline void set_freezable_with_signal(void) __retval; \ }) #else /* !CONFIG_FREEZER */ -static inline int frozen(struct task_struct *p) { return 0; } +static inline bool frozen(struct task_struct *p) { return false; } static inline bool freezing(struct task_struct *p) { return false; } static inline bool __refrigerator(bool check_kthr_stop) { return false; } diff --git a/kernel/freezer.c b/kernel/freezer.c index f53cd5aa5b2e..95a123844241 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -132,7 +132,7 @@ bool freeze_task(struct task_struct *p, bool sig_only) return false; } - if (should_send_signal(p)) { + if (!(p->flags & PF_FREEZER_NOSIG)) { fake_signal_wake_up(p); /* * fake_signal_wake_up() goes through p's scheduler -- cgit v1.2.3 From 96ee6d8539c9fc6742908d85eb9723abb5c91854 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:25 -0800 Subject: freezer: fix set_freezable[_with_signal]() race A kthread doing set_freezable*() may race with on-going PM freeze and the freezer might think all tasks are frozen while the new freezable kthread is merrily proceeding to execute code paths which aren't supposed to be executing during PM freeze. Reimplement set_freezable[_with_signal]() using __set_freezable() such that freezable PF flags are modified under freezer_lock and try_to_freeze() is called afterwards. This eliminates race condition against freezing. Note: Separated out from larger patch to resolve fix order dependency Oleg pointed out. Signed-off-by: Tejun Heo Cc: Oleg Nesterov --- include/linux/freezer.h | 9 +++++---- kernel/freezer.c | 25 +++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 3d50913d39d0..a0f1b3a3604f 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -49,6 +49,7 @@ static inline bool try_to_freeze(void) } extern bool freeze_task(struct task_struct *p, bool sig_only); +extern bool __set_freezable(bool with_signal); #ifdef CONFIG_CGROUP_FREEZER extern bool cgroup_freezing(struct task_struct *task); @@ -106,18 +107,18 @@ static inline int freezer_should_skip(struct task_struct *p) /* * Tell the freezer that the current task should be frozen by it */ -static inline void set_freezable(void) +static inline bool set_freezable(void) { - current->flags &= ~PF_NOFREEZE; + return __set_freezable(false); } /* * Tell the freezer that the current task should be frozen by it and that it * should send a fake signal to the task to freeze it. */ -static inline void set_freezable_with_signal(void) +static inline bool set_freezable_with_signal(void) { - current->flags &= ~(PF_NOFREEZE | PF_FREEZER_NOSIG); + return __set_freezable(true); } /* diff --git a/kernel/freezer.c b/kernel/freezer.c index 95a123844241..b1e7a7b3d2cd 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -171,3 +171,28 @@ void __thaw_task(struct task_struct *p) } spin_unlock_irqrestore(&freezer_lock, flags); } + +/** + * __set_freezable - make %current freezable + * @with_signal: do we want %TIF_SIGPENDING for notification too? + * + * Mark %current freezable and enter refrigerator if necessary. + */ +bool __set_freezable(bool with_signal) +{ + might_sleep(); + + /* + * Modify flags while holding freezer_lock. This ensures the + * freezer notices that we aren't frozen yet or the freezing + * condition is visible to try_to_freeze() below. + */ + spin_lock_irq(&freezer_lock); + current->flags &= ~PF_NOFREEZE; + if (with_signal) + current->flags &= ~PF_FREEZER_NOSIG; + spin_unlock_irq(&freezer_lock); + + return try_to_freeze(); +} +EXPORT_SYMBOL(__set_freezable); -- cgit v1.2.3 From 839e3407d90a810318d17c17ceb3d5928a910704 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:26 -0800 Subject: freezer: remove unused @sig_only from freeze_task() After "freezer: make freezing() test freeze conditions in effect instead of TIF_FREEZE", freezing() returns authoritative answer on whether the current task should freeze or not and freeze_task() doesn't need or use @sig_only. Remove it. While at it, rewrite function comment for freeze_task() and rename @sig_only to @user_only in try_to_freeze_tasks(). This patch doesn't cause any functional change. Signed-off-by: Tejun Heo Acked-by: Oleg Nesterov --- include/linux/freezer.h | 2 +- kernel/cgroup_freezer.c | 4 ++-- kernel/freezer.c | 21 +++++++++------------ kernel/power/process.c | 8 ++++---- 4 files changed, 16 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index a0f1b3a3604f..a28842e588f4 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -48,7 +48,7 @@ static inline bool try_to_freeze(void) return __refrigerator(false); } -extern bool freeze_task(struct task_struct *p, bool sig_only); +extern bool freeze_task(struct task_struct *p); extern bool __set_freezable(bool with_signal); #ifdef CONFIG_CGROUP_FREEZER diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index 2327ad11725f..e411a60cc2c8 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -206,7 +206,7 @@ static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task) /* Locking avoids race with FREEZING -> THAWED transitions. */ if (freezer->state == CGROUP_FREEZING) - freeze_task(task, true); + freeze_task(task); spin_unlock_irq(&freezer->lock); } @@ -274,7 +274,7 @@ static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) cgroup_iter_start(cgroup, &it); while ((task = cgroup_iter_next(cgroup, &it))) { - if (!freeze_task(task, true)) + if (!freeze_task(task)) continue; if (frozen(task)) continue; diff --git a/kernel/freezer.c b/kernel/freezer.c index 389549f0a94e..2589a61de44c 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -100,20 +100,17 @@ static void fake_signal_wake_up(struct task_struct *p) } /** - * freeze_task - send a freeze request to given task - * @p: task to send the request to - * @sig_only: if set, the request will only be sent if the task has the - * PF_FREEZER_NOSIG flag unset - * Return value: 'false', if @sig_only is set and the task has - * PF_FREEZER_NOSIG set or the task is frozen, 'true', otherwise + * freeze_task - send a freeze request to given task + * @p: task to send the request to * - * The freeze request is sent by setting the tasks's TIF_FREEZE flag and - * either sending a fake signal to it or waking it up, depending on whether - * or not it has PF_FREEZER_NOSIG set. If @sig_only is set and the task - * has PF_FREEZER_NOSIG set (ie. it is a typical kernel thread), its - * TIF_FREEZE flag will not be set. + * If @p is freezing, the freeze request is sent by setting %TIF_FREEZE + * flag and either sending a fake signal to it or waking it up, depending + * on whether it has %PF_FREEZER_NOSIG set. + * + * RETURNS: + * %false, if @p is not freezing or already frozen; %true, otherwise */ -bool freeze_task(struct task_struct *p, bool sig_only) +bool freeze_task(struct task_struct *p) { unsigned long flags; diff --git a/kernel/power/process.c b/kernel/power/process.c index 0beb51e1dec9..77274c9ba2f1 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -22,7 +22,7 @@ */ #define TIMEOUT (20 * HZ) -static int try_to_freeze_tasks(bool sig_only) +static int try_to_freeze_tasks(bool user_only) { struct task_struct *g, *p; unsigned long end_time; @@ -37,14 +37,14 @@ static int try_to_freeze_tasks(bool sig_only) end_time = jiffies + TIMEOUT; - if (!sig_only) + if (!user_only) freeze_workqueues_begin(); while (true) { todo = 0; read_lock(&tasklist_lock); do_each_thread(g, p) { - if (p == current || !freeze_task(p, sig_only)) + if (p == current || !freeze_task(p)) continue; /* @@ -65,7 +65,7 @@ static int try_to_freeze_tasks(bool sig_only) } while_each_thread(g, p); read_unlock(&tasklist_lock); - if (!sig_only) { + if (!user_only) { wq_busy = freeze_workqueues_busy(); todo += wq_busy; } -- cgit v1.2.3 From 34b087e48367c252e343c2f8de65676a78af1e4a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 23 Nov 2011 09:28:17 -0800 Subject: freezer: kill unused set_freezable_with_signal() There's no in-kernel user of set_freezable_with_signal() left. Mixing TIF_SIGPENDING with kernel threads can lead to nasty corner cases as kernel threads never travel signal delivery path on their own. e.g. the current implementation is buggy in the cancelation path of __thaw_task(). It calls recalc_sigpending_and_wake() in an attempt to clear TIF_SIGPENDING but the function never clears it regardless of sigpending state. This means that signallable freezable kthreads may continue executing with !freezing() && stuck TIF_SIGPENDING, which can be troublesome. This patch removes set_freezable_with_signal() along with PF_FREEZER_NOSIG and recalc_sigpending*() calls in freezer. User tasks get TIF_SIGPENDING, kernel tasks get woken up and the spurious sigpending is dealt with in the usual signal delivery path. Signed-off-by: Tejun Heo Acked-by: Oleg Nesterov --- include/linux/freezer.h | 20 +------------------- include/linux/sched.h | 1 - kernel/freezer.c | 27 ++++++--------------------- kernel/kthread.c | 2 +- 4 files changed, 8 insertions(+), 42 deletions(-) (limited to 'include') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index a28842e588f4..a33550fc05c5 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -49,7 +49,7 @@ static inline bool try_to_freeze(void) } extern bool freeze_task(struct task_struct *p); -extern bool __set_freezable(bool with_signal); +extern bool set_freezable(void); #ifdef CONFIG_CGROUP_FREEZER extern bool cgroup_freezing(struct task_struct *task); @@ -104,23 +104,6 @@ static inline int freezer_should_skip(struct task_struct *p) return !!(p->flags & PF_FREEZER_SKIP); } -/* - * Tell the freezer that the current task should be frozen by it - */ -static inline bool set_freezable(void) -{ - return __set_freezable(false); -} - -/* - * Tell the freezer that the current task should be frozen by it and that it - * should send a fake signal to the task to freeze it. - */ -static inline bool set_freezable_with_signal(void) -{ - return __set_freezable(true); -} - /* * Freezer-friendly wrappers around wait_event_interruptible(), * wait_event_killable() and wait_event_interruptible_timeout(), originally @@ -176,7 +159,6 @@ static inline void freezer_do_not_count(void) {} static inline void freezer_count(void) {} static inline int freezer_should_skip(struct task_struct *p) { return 0; } static inline void set_freezable(void) {} -static inline void set_freezable_with_signal(void) {} #define wait_event_freezable(wq, condition) \ wait_event_interruptible(wq, condition) diff --git a/include/linux/sched.h b/include/linux/sched.h index d12bd03b688f..2f90470ad843 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1788,7 +1788,6 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ -#define PF_FREEZER_NOSIG 0x80000000 /* Freezer won't send signals to it */ /* * Only the _current_ task can read/write to tsk->flags, but other diff --git a/kernel/freezer.c b/kernel/freezer.c index 2589a61de44c..9815b8d1eed5 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -39,7 +39,7 @@ bool freezing_slow_path(struct task_struct *p) if (pm_nosig_freezing || cgroup_freezing(p)) return true; - if (pm_freezing && !(p->flags & PF_FREEZER_NOSIG)) + if (pm_freezing && !(p->flags & PF_KTHREAD)) return true; return false; @@ -72,10 +72,6 @@ bool __refrigerator(bool check_kthr_stop) schedule(); } - spin_lock_irq(¤t->sighand->siglock); - recalc_sigpending(); /* We sent fake signal, clean it up */ - spin_unlock_irq(¤t->sighand->siglock); - pr_debug("%s left refrigerator\n", current->comm); /* @@ -120,7 +116,7 @@ bool freeze_task(struct task_struct *p) return false; } - if (!(p->flags & PF_FREEZER_NOSIG)) { + if (!(p->flags & PF_KTHREAD)) { fake_signal_wake_up(p); /* * fake_signal_wake_up() goes through p's scheduler @@ -145,28 +141,19 @@ void __thaw_task(struct task_struct *p) * be visible to @p as waking up implies wmb. Waking up inside * freezer_lock also prevents wakeups from leaking outside * refrigerator. - * - * If !FROZEN, @p hasn't reached refrigerator, recalc sigpending to - * avoid leaving dangling TIF_SIGPENDING behind. */ spin_lock_irqsave(&freezer_lock, flags); - if (frozen(p)) { + if (frozen(p)) wake_up_process(p); - } else { - spin_lock(&p->sighand->siglock); - recalc_sigpending_and_wake(p); - spin_unlock(&p->sighand->siglock); - } spin_unlock_irqrestore(&freezer_lock, flags); } /** - * __set_freezable - make %current freezable - * @with_signal: do we want %TIF_SIGPENDING for notification too? + * set_freezable - make %current freezable * * Mark %current freezable and enter refrigerator if necessary. */ -bool __set_freezable(bool with_signal) +bool set_freezable(void) { might_sleep(); @@ -177,10 +164,8 @@ bool __set_freezable(bool with_signal) */ spin_lock_irq(&freezer_lock); current->flags &= ~PF_NOFREEZE; - if (with_signal) - current->flags &= ~PF_FREEZER_NOSIG; spin_unlock_irq(&freezer_lock); return try_to_freeze(); } -EXPORT_SYMBOL(__set_freezable); +EXPORT_SYMBOL(set_freezable); diff --git a/kernel/kthread.c b/kernel/kthread.c index 1c36deaae2f1..3d3de633702e 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -282,7 +282,7 @@ int kthreadd(void *unused) set_cpus_allowed_ptr(tsk, cpu_all_mask); set_mems_allowed(node_states[N_HIGH_MEMORY]); - current->flags |= PF_NOFREEZE | PF_FREEZER_NOSIG; + current->flags |= PF_NOFREEZE; for (;;) { set_current_state(TASK_INTERRUPTIBLE); -- cgit v1.2.3 From 24b7ead3fb0bae267c2ee50898eb4c13aedd1e9f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 23 Nov 2011 09:28:17 -0800 Subject: freezer: fix wait_event_freezable/__thaw_task races wait_event_freezable() and friends stop the waiting if try_to_freeze() fails. This is not right, we can race with __thaw_task() and in this case - wait_event_freezable() returns the wrong ERESTARTSYS - wait_event_freezable_timeout() can return the positive value while condition == F Change the code to always check __retval/condition before return. Note: with or without this patch the timeout logic looks strange, probably we should recalc timeout if try_to_freeze() returns T. Signed-off-by: Oleg Nesterov Acked-by: Tejun Heo --- include/linux/freezer.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index a33550fc05c5..09570ac22be6 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -122,28 +122,30 @@ static inline int freezer_should_skip(struct task_struct *p) #define wait_event_freezable(wq, condition) \ ({ \ int __retval; \ - do { \ + for (;;) { \ __retval = wait_event_interruptible(wq, \ (condition) || freezing(current)); \ - if (__retval && !freezing(current)) \ + if (__retval || (condition)) \ break; \ - else if (!(condition)) \ - __retval = -ERESTARTSYS; \ - } while (try_to_freeze()); \ + try_to_freeze(); \ + } \ __retval; \ }) - #define wait_event_freezable_timeout(wq, condition, timeout) \ ({ \ long __retval = timeout; \ - do { \ + for (;;) { \ __retval = wait_event_interruptible_timeout(wq, \ (condition) || freezing(current), \ __retval); \ - } while (try_to_freeze()); \ + if (__retval <= 0 || (condition)) \ + break; \ + try_to_freeze(); \ + } \ __retval; \ }) + #else /* !CONFIG_FREEZER */ static inline bool frozen(struct task_struct *p) { return false; } static inline bool freezing(struct task_struct *p) { return false; } -- cgit v1.2.3 From 6a76b7a9cc93dec6ae58d70f1257d234291908e0 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Mon, 21 Nov 2011 23:32:35 +0100 Subject: PM / Memory-hotplug: Avoid task freezing failures The lock_system_sleep() function is used in the memory hotplug code at several places in order to implement mutual exclusion with hibernation. However, this function tries to acquire the 'pm_mutex' lock using mutex_lock() and hence blocks in TASK_UNINTERRUPTIBLE state if it doesn't get the lock. This would lead to task freezing failures and hence hibernation failure as a consequence, even though the hibernation call path successfully acquired the lock. But it is to be noted that, since this task tries to acquire pm_mutex, if it blocks due to this, we are *100% sure* that this task is not going to run as long as hibernation sequence is in progress, since hibernation releases 'pm_mutex' only at the very end, when everything is done. And this means, this task is going to be anyway blocked for much more longer than what the freezer intends to achieve; which means, freezing and thawing doesn't really make any difference to this task! So, to fix freezing failures, we just ask the freezer to skip freezing this task, since it is already "frozen enough". But instead of calling freezer_do_not_count() and freezer_count() as it is, we use only the relevant parts of those functions, because restrictions such as 'the task should be a userspace one' etc., might not be relevant in this scenario. Signed-off-by: Srivatsa S. Bhat Acked-by: Tejun Heo Signed-off-by: Rafael J. Wysocki --- include/linux/suspend.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 57a692432f8a..1f7fff47cfac 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -380,12 +380,16 @@ static inline void unlock_system_sleep(void) {} static inline void lock_system_sleep(void) { + /* simplified freezer_do_not_count() */ + current->flags |= PF_FREEZER_SKIP; mutex_lock(&pm_mutex); } static inline void unlock_system_sleep(void) { mutex_unlock(&pm_mutex); + /* simplified freezer_count() */ + current->flags &= ~PF_FREEZER_SKIP; } #endif -- cgit v1.2.3 From 62c9ea6b120688d800b4d892eaf737c20a73e86b Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Fri, 25 Nov 2011 00:44:55 +0100 Subject: Freezer: fix more fallout from the thaw_process rename Commit 944e192db53c "freezer: rename thaw_process() to __thaw_task() and simplify the implementation" did not create a !CONFIG_FREEZER version of __thaw_task(). Signed-off-by: Stephen Rothwell Signed-off-by: Rafael J. Wysocki --- include/linux/freezer.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 09570ac22be6..c1ee2833655e 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -149,6 +149,7 @@ static inline int freezer_should_skip(struct task_struct *p) #else /* !CONFIG_FREEZER */ static inline bool frozen(struct task_struct *p) { return false; } static inline bool freezing(struct task_struct *p) { return false; } +static inline void __thaw_task(struct task_struct *t) {} static inline bool __refrigerator(bool check_kthr_stop) { return false; } static inline int freeze_processes(void) { return -ENOSYS; } -- cgit v1.2.3 From 00dc9ad18d707f36b2fb4af98fd2cf0548d2b258 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 1 Dec 2011 00:01:31 +0100 Subject: PM / Runtime: Use device PM QoS constraints (v2) Make the runtime PM core use device PM QoS constraints to check if it is allowed to suspend a given device, so that an error code is returned if the device's own PM QoS constraint is negative or one of its children has already been suspended for too long. If this is not the case, the maximum estimated time the device is allowed to be suspended, computed as the minimum of the device's PM QoS constraint and the PM QoS constraints of its children (reduced by the difference between the current time and their suspend times) is stored in a new device's PM field power.max_time_suspended_ns that can be used by the device's subsystem or PM domain to decide whether or not to put the device into lower-power (and presumably higher-latency) states later (if the constraint is 0, which means "no constraint", the power.max_time_suspended_ns is set to -1). Additionally, the time of execution of the subsystem-level .runtime_suspend() callback for the device is recorded in the new power.suspend_time field for later use by the device's subsystem or PM domain along with power.max_time_suspended_ns (it also is used by the core code when the device's parent is suspended). Introduce a new helper function, pm_runtime_update_max_time_suspended(), allowing subsystems and PM domains (or device drivers) to update the power.max_time_suspended_ns field, for example after changing the power state of a suspended device. Signed-off-by: Rafael J. Wysocki --- drivers/base/power/qos.c | 24 ++++--- drivers/base/power/runtime.c | 148 +++++++++++++++++++++++++++++++++++++------ include/linux/pm.h | 2 + include/linux/pm_qos.h | 3 + include/linux/pm_runtime.h | 5 ++ 5 files changed, 154 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c index 86de6c50fc41..03f4bd069ca8 100644 --- a/drivers/base/power/qos.c +++ b/drivers/base/power/qos.c @@ -47,21 +47,29 @@ static DEFINE_MUTEX(dev_pm_qos_mtx); static BLOCKING_NOTIFIER_HEAD(dev_pm_notifiers); /** - * dev_pm_qos_read_value - Get PM QoS constraint for a given device. + * __dev_pm_qos_read_value - Get PM QoS constraint for a given device. + * @dev: Device to get the PM QoS constraint value for. + * + * This routine must be called with dev->power.lock held. + */ +s32 __dev_pm_qos_read_value(struct device *dev) +{ + struct pm_qos_constraints *c = dev->power.constraints; + + return c ? pm_qos_read_value(c) : 0; +} + +/** + * dev_pm_qos_read_value - Get PM QoS constraint for a given device (locked). * @dev: Device to get the PM QoS constraint value for. */ s32 dev_pm_qos_read_value(struct device *dev) { - struct pm_qos_constraints *c; unsigned long flags; - s32 ret = 0; + s32 ret; spin_lock_irqsave(&dev->power.lock, flags); - - c = dev->power.constraints; - if (c) - ret = pm_qos_read_value(c); - + ret = __dev_pm_qos_read_value(dev); spin_unlock_irqrestore(&dev->power.lock, flags); return ret; diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 8c78443bca8f..068f7ed1f009 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -279,6 +279,47 @@ static int rpm_callback(int (*cb)(struct device *), struct device *dev) return retval != -EACCES ? retval : -EIO; } +struct rpm_qos_data { + ktime_t time_now; + s64 constraint_ns; +}; + +/** + * rpm_update_qos_constraint - Update a given PM QoS constraint data. + * @dev: Device whose timing data to use. + * @data: PM QoS constraint data to update. + * + * Use the suspend timing data of @dev to update PM QoS constraint data pointed + * to by @data. + */ +static int rpm_update_qos_constraint(struct device *dev, void *data) +{ + struct rpm_qos_data *qos = data; + unsigned long flags; + s64 delta_ns; + int ret = 0; + + spin_lock_irqsave(&dev->power.lock, flags); + + if (dev->power.max_time_suspended_ns < 0) + goto out; + + delta_ns = dev->power.max_time_suspended_ns - + ktime_to_ns(ktime_sub(qos->time_now, dev->power.suspend_time)); + if (delta_ns <= 0) { + ret = -EBUSY; + goto out; + } + + if (qos->constraint_ns > delta_ns || qos->constraint_ns == 0) + qos->constraint_ns = delta_ns; + + out: + spin_unlock_irqrestore(&dev->power.lock, flags); + + return ret; +} + /** * rpm_suspend - Carry out runtime suspend of given device. * @dev: Device to suspend. @@ -305,6 +346,7 @@ static int rpm_suspend(struct device *dev, int rpmflags) { int (*callback)(struct device *); struct device *parent = NULL; + struct rpm_qos_data qos; int retval; trace_rpm_suspend(dev, rpmflags); @@ -400,8 +442,38 @@ static int rpm_suspend(struct device *dev, int rpmflags) goto out; } + qos.constraint_ns = __dev_pm_qos_read_value(dev); + if (qos.constraint_ns < 0) { + /* Negative constraint means "never suspend". */ + retval = -EPERM; + goto out; + } + qos.constraint_ns *= NSEC_PER_USEC; + qos.time_now = ktime_get(); + __update_runtime_status(dev, RPM_SUSPENDING); + if (!dev->power.ignore_children) { + if (dev->power.irq_safe) + spin_unlock(&dev->power.lock); + else + spin_unlock_irq(&dev->power.lock); + + retval = device_for_each_child(dev, &qos, + rpm_update_qos_constraint); + + if (dev->power.irq_safe) + spin_lock(&dev->power.lock); + else + spin_lock_irq(&dev->power.lock); + + if (retval) + goto fail; + } + + dev->power.suspend_time = qos.time_now; + dev->power.max_time_suspended_ns = qos.constraint_ns ? : -1; + if (dev->pm_domain) callback = dev->pm_domain->ops.runtime_suspend; else if (dev->type && dev->type->pm) @@ -414,27 +486,9 @@ static int rpm_suspend(struct device *dev, int rpmflags) callback = NULL; retval = rpm_callback(callback, dev); - if (retval) { - __update_runtime_status(dev, RPM_ACTIVE); - dev->power.deferred_resume = false; - if (retval == -EAGAIN || retval == -EBUSY) { - dev->power.runtime_error = 0; + if (retval) + goto fail; - /* - * If the callback routine failed an autosuspend, and - * if the last_busy time has been updated so that there - * is a new autosuspend expiration time, automatically - * reschedule another autosuspend. - */ - if ((rpmflags & RPM_AUTO) && - pm_runtime_autosuspend_expiration(dev) != 0) - goto repeat; - } else { - pm_runtime_cancel_pending(dev); - } - wake_up_all(&dev->power.wait_queue); - goto out; - } no_callback: __update_runtime_status(dev, RPM_SUSPENDED); pm_runtime_deactivate_timer(dev); @@ -466,6 +520,29 @@ static int rpm_suspend(struct device *dev, int rpmflags) trace_rpm_return_int(dev, _THIS_IP_, retval); return retval; + + fail: + __update_runtime_status(dev, RPM_ACTIVE); + dev->power.suspend_time = ktime_set(0, 0); + dev->power.max_time_suspended_ns = -1; + dev->power.deferred_resume = false; + if (retval == -EAGAIN || retval == -EBUSY) { + dev->power.runtime_error = 0; + + /* + * If the callback routine failed an autosuspend, and + * if the last_busy time has been updated so that there + * is a new autosuspend expiration time, automatically + * reschedule another autosuspend. + */ + if ((rpmflags & RPM_AUTO) && + pm_runtime_autosuspend_expiration(dev) != 0) + goto repeat; + } else { + pm_runtime_cancel_pending(dev); + } + wake_up_all(&dev->power.wait_queue); + goto out; } /** @@ -620,6 +697,9 @@ static int rpm_resume(struct device *dev, int rpmflags) if (dev->power.no_callbacks) goto no_callback; /* Assume success. */ + dev->power.suspend_time = ktime_set(0, 0); + dev->power.max_time_suspended_ns = -1; + __update_runtime_status(dev, RPM_RESUMING); if (dev->pm_domain) @@ -1279,6 +1359,9 @@ void pm_runtime_init(struct device *dev) setup_timer(&dev->power.suspend_timer, pm_suspend_timer_fn, (unsigned long)dev); + dev->power.suspend_time = ktime_set(0, 0); + dev->power.max_time_suspended_ns = -1; + init_waitqueue_head(&dev->power.wait_queue); } @@ -1296,3 +1379,28 @@ void pm_runtime_remove(struct device *dev) if (dev->power.irq_safe && dev->parent) pm_runtime_put_sync(dev->parent); } + +/** + * pm_runtime_update_max_time_suspended - Update device's suspend time data. + * @dev: Device to handle. + * @delta_ns: Value to subtract from the device's max_time_suspended_ns field. + * + * Update the device's power.max_time_suspended_ns field by subtracting + * @delta_ns from it. The resulting value of power.max_time_suspended_ns is + * never negative. + */ +void pm_runtime_update_max_time_suspended(struct device *dev, s64 delta_ns) +{ + unsigned long flags; + + spin_lock_irqsave(&dev->power.lock, flags); + + if (delta_ns > 0 && dev->power.max_time_suspended_ns > 0) { + if (dev->power.max_time_suspended_ns > delta_ns) + dev->power.max_time_suspended_ns -= delta_ns; + else + dev->power.max_time_suspended_ns = 0; + } + + spin_unlock_irqrestore(&dev->power.lock, flags); +} diff --git a/include/linux/pm.h b/include/linux/pm.h index 3f3ed83a9aa5..a7676efa6831 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -521,6 +521,8 @@ struct dev_pm_info { unsigned long active_jiffies; unsigned long suspended_jiffies; unsigned long accounting_timestamp; + ktime_t suspend_time; + s64 max_time_suspended_ns; #endif struct pm_subsys_data *subsys_data; /* Owned by the subsystem. */ struct pm_qos_constraints *constraints; diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 83b0ea302a80..775a3236343d 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -78,6 +78,7 @@ int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier); int pm_qos_request_active(struct pm_qos_request *req); s32 pm_qos_read_value(struct pm_qos_constraints *c); +s32 __dev_pm_qos_read_value(struct device *dev); s32 dev_pm_qos_read_value(struct device *dev); int dev_pm_qos_add_request(struct device *dev, struct dev_pm_qos_request *req, s32 value); @@ -119,6 +120,8 @@ static inline int pm_qos_request_active(struct pm_qos_request *req) static inline s32 pm_qos_read_value(struct pm_qos_constraints *c) { return 0; } +static inline s32 __dev_pm_qos_read_value(struct device *dev) + { return 0; } static inline s32 dev_pm_qos_read_value(struct device *dev) { return 0; } static inline int dev_pm_qos_add_request(struct device *dev, diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index d3085e72a0ee..609daae7a014 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -45,6 +45,8 @@ extern void pm_runtime_irq_safe(struct device *dev); extern void __pm_runtime_use_autosuspend(struct device *dev, bool use); extern void pm_runtime_set_autosuspend_delay(struct device *dev, int delay); extern unsigned long pm_runtime_autosuspend_expiration(struct device *dev); +extern void pm_runtime_update_max_time_suspended(struct device *dev, + s64 delta_ns); static inline bool pm_children_suspended(struct device *dev) { @@ -148,6 +150,9 @@ static inline void pm_runtime_set_autosuspend_delay(struct device *dev, static inline unsigned long pm_runtime_autosuspend_expiration( struct device *dev) { return 0; } +static inline void pm_runtime_update_max_time_suspended(struct device *dev, + s64 delta_ns) {} + #endif /* !CONFIG_PM_RUNTIME */ static inline int pm_runtime_idle(struct device *dev) -- cgit v1.2.3 From d5e4cbfe2049fca375cb19c4bc0cf676e8b4a88a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 27 Nov 2011 13:11:36 +0100 Subject: PM / Domains: Make it possible to use per-device domain callbacks The current generic PM domains code requires that the same .stop(), .start() and .active_wakeup() device callback routines be used for all devices in the given domain, which is inflexible and may not cover some specific use cases. For this reason, make it possible to use device specific .start()/.stop() and .active_wakeup() callback routines by adding corresponding callback pointers to struct generic_pm_domain_data. Add a new helper routine, pm_genpd_register_callbacks(), that can be used to populate the new per-device callback pointers. Modify the shmobile's power domains code to allow drivers to add their own code to be run during the device stop and start operations with the help of the new callback pointers. Signed-off-by: Rafael J. Wysocki Acked-by: Magnus Damm --- arch/arm/mach-shmobile/pm-sh7372.c | 40 +++++++++- drivers/base/power/domain.c | 152 ++++++++++++++++++++++++++++--------- include/linux/pm_domain.h | 27 ++++++- 3 files changed, 175 insertions(+), 44 deletions(-) (limited to 'include') diff --git a/arch/arm/mach-shmobile/pm-sh7372.c b/arch/arm/mach-shmobile/pm-sh7372.c index 34bbcbfb1706..6777bb1be059 100644 --- a/arch/arm/mach-shmobile/pm-sh7372.c +++ b/arch/arm/mach-shmobile/pm-sh7372.c @@ -156,7 +156,10 @@ static void sh7372_a4r_suspend(void) static bool pd_active_wakeup(struct device *dev) { - return true; + bool (*active_wakeup)(struct device *dev); + + active_wakeup = dev_gpd_data(dev)->ops.active_wakeup; + return active_wakeup ? active_wakeup(dev) : true; } static bool sh7372_power_down_forbidden(struct dev_pm_domain *domain) @@ -168,15 +171,44 @@ struct dev_power_governor sh7372_always_on_gov = { .power_down_ok = sh7372_power_down_forbidden, }; +static int sh7372_stop_dev(struct device *dev) +{ + int (*stop)(struct device *dev); + + stop = dev_gpd_data(dev)->ops.stop; + if (stop) { + int ret = stop(dev); + if (ret) + return ret; + } + return pm_clk_suspend(dev); +} + +static int sh7372_start_dev(struct device *dev) +{ + int (*start)(struct device *dev); + int ret; + + ret = pm_clk_resume(dev); + if (ret) + return ret; + + start = dev_gpd_data(dev)->ops.start; + if (start) + ret = start(dev); + + return ret; +} + void sh7372_init_pm_domain(struct sh7372_pm_domain *sh7372_pd) { struct generic_pm_domain *genpd = &sh7372_pd->genpd; pm_genpd_init(genpd, sh7372_pd->gov, false); - genpd->stop_device = pm_clk_suspend; - genpd->start_device = pm_clk_resume; + genpd->dev_ops.stop = sh7372_stop_dev; + genpd->dev_ops.start = sh7372_start_dev; + genpd->dev_ops.active_wakeup = pd_active_wakeup; genpd->dev_irq_safe = true; - genpd->active_wakeup = pd_active_wakeup; genpd->power_off = pd_power_down; genpd->power_on = pd_power_up; __pd_power_up(sh7372_pd, false); diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 6790cf7eba5a..94afaa2686a6 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -15,6 +15,23 @@ #include #include #include +#include + +#define GENPD_DEV_CALLBACK(genpd, type, callback, dev) \ +({ \ + type (*__routine)(struct device *__d); \ + type __ret = (type)0; \ + \ + __routine = genpd->dev_ops.callback; \ + if (__routine) { \ + __ret = __routine(dev); \ + } else { \ + __routine = dev_gpd_data(dev)->ops.callback; \ + if (__routine) \ + __ret = __routine(dev); \ + } \ + __ret; \ +}) static LIST_HEAD(gpd_list); static DEFINE_MUTEX(gpd_list_lock); @@ -29,6 +46,16 @@ static struct generic_pm_domain *dev_to_genpd(struct device *dev) return pd_to_genpd(dev->pm_domain); } +static int genpd_stop_dev(struct generic_pm_domain *genpd, struct device *dev) +{ + return GENPD_DEV_CALLBACK(genpd, int, stop, dev); +} + +static int genpd_start_dev(struct generic_pm_domain *genpd, struct device *dev) +{ + return GENPD_DEV_CALLBACK(genpd, int, start, dev); +} + static bool genpd_sd_counter_dec(struct generic_pm_domain *genpd) { bool ret = false; @@ -199,13 +226,9 @@ static int __pm_genpd_save_device(struct pm_domain_data *pdd, mutex_unlock(&genpd->lock); if (drv && drv->pm && drv->pm->runtime_suspend) { - if (genpd->start_device) - genpd->start_device(dev); - + genpd_start_dev(genpd, dev); ret = drv->pm->runtime_suspend(dev); - - if (genpd->stop_device) - genpd->stop_device(dev); + genpd_stop_dev(genpd, dev); } mutex_lock(&genpd->lock); @@ -235,13 +258,9 @@ static void __pm_genpd_restore_device(struct pm_domain_data *pdd, mutex_unlock(&genpd->lock); if (drv && drv->pm && drv->pm->runtime_resume) { - if (genpd->start_device) - genpd->start_device(dev); - + genpd_start_dev(genpd, dev); drv->pm->runtime_resume(dev); - - if (genpd->stop_device) - genpd->stop_device(dev); + genpd_stop_dev(genpd, dev); } mutex_lock(&genpd->lock); @@ -413,6 +432,7 @@ static void genpd_power_off_work_fn(struct work_struct *work) static int pm_genpd_runtime_suspend(struct device *dev) { struct generic_pm_domain *genpd; + int ret; dev_dbg(dev, "%s()\n", __func__); @@ -422,11 +442,9 @@ static int pm_genpd_runtime_suspend(struct device *dev) might_sleep_if(!genpd->dev_irq_safe); - if (genpd->stop_device) { - int ret = genpd->stop_device(dev); - if (ret) - return ret; - } + ret = genpd_stop_dev(genpd, dev); + if (ret) + return ret; /* * If power.irq_safe is set, this routine will be run with interrupts @@ -502,8 +520,7 @@ static int pm_genpd_runtime_resume(struct device *dev) mutex_unlock(&genpd->lock); out: - if (genpd->start_device) - genpd->start_device(dev); + genpd_start_dev(genpd, dev); return 0; } @@ -534,6 +551,12 @@ static inline void genpd_power_off_work_fn(struct work_struct *work) {} #ifdef CONFIG_PM_SLEEP +static bool genpd_dev_active_wakeup(struct generic_pm_domain *genpd, + struct device *dev) +{ + return GENPD_DEV_CALLBACK(genpd, bool, active_wakeup, dev); +} + /** * pm_genpd_sync_poweroff - Synchronously power off a PM domain and its masters. * @genpd: PM domain to power off, if possible. @@ -590,7 +613,7 @@ static bool resume_needed(struct device *dev, struct generic_pm_domain *genpd) if (!device_can_wakeup(dev)) return false; - active_wakeup = genpd->active_wakeup && genpd->active_wakeup(dev); + active_wakeup = genpd_dev_active_wakeup(genpd, dev); return device_may_wakeup(dev) ? active_wakeup : !active_wakeup; } @@ -646,7 +669,7 @@ static int pm_genpd_prepare(struct device *dev) /* * The PM domain must be in the GPD_STATE_ACTIVE state at this point, * so pm_genpd_poweron() will return immediately, but if the device - * is suspended (e.g. it's been stopped by .stop_device()), we need + * is suspended (e.g. it's been stopped by genpd_stop_dev()), we need * to make it operational. */ pm_runtime_resume(dev); @@ -714,12 +737,10 @@ static int pm_genpd_suspend_noirq(struct device *dev) if (ret) return ret; - if (dev->power.wakeup_path - && genpd->active_wakeup && genpd->active_wakeup(dev)) + if (dev->power.wakeup_path && genpd_dev_active_wakeup(genpd, dev)) return 0; - if (genpd->stop_device) - genpd->stop_device(dev); + genpd_stop_dev(genpd, dev); /* * Since all of the "noirq" callbacks are executed sequentially, it is @@ -761,8 +782,7 @@ static int pm_genpd_resume_noirq(struct device *dev) */ pm_genpd_poweron(genpd); genpd->suspended_count--; - if (genpd->start_device) - genpd->start_device(dev); + genpd_start_dev(genpd, dev); return pm_generic_resume_noirq(dev); } @@ -836,8 +856,7 @@ static int pm_genpd_freeze_noirq(struct device *dev) if (ret) return ret; - if (genpd->stop_device) - genpd->stop_device(dev); + genpd_stop_dev(genpd, dev); return 0; } @@ -864,8 +883,7 @@ static int pm_genpd_thaw_noirq(struct device *dev) if (genpd->suspend_power_off) return 0; - if (genpd->start_device) - genpd->start_device(dev); + genpd_start_dev(genpd, dev); return pm_generic_thaw_noirq(dev); } @@ -938,12 +956,10 @@ static int pm_genpd_dev_poweroff_noirq(struct device *dev) if (ret) return ret; - if (dev->power.wakeup_path - && genpd->active_wakeup && genpd->active_wakeup(dev)) + if (dev->power.wakeup_path && genpd_dev_active_wakeup(genpd, dev)) return 0; - if (genpd->stop_device) - genpd->stop_device(dev); + genpd_stop_dev(genpd, dev); /* * Since all of the "noirq" callbacks are executed sequentially, it is @@ -993,8 +1009,7 @@ static int pm_genpd_restore_noirq(struct device *dev) pm_genpd_poweron(genpd); genpd->suspended_count--; - if (genpd->start_device) - genpd->start_device(dev); + genpd_start_dev(genpd, dev); return pm_generic_restore_noirq(dev); } @@ -1279,6 +1294,69 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, return ret; } +/** + * pm_genpd_add_callbacks - Add PM domain callbacks to a given device. + * @dev: Device to add the callbacks to. + * @ops: Set of callbacks to add. + */ +int pm_genpd_add_callbacks(struct device *dev, struct gpd_dev_ops *ops) +{ + struct pm_domain_data *pdd; + int ret = 0; + + if (!(dev && dev->power.subsys_data && ops)) + return -EINVAL; + + pm_runtime_disable(dev); + device_pm_lock(); + + pdd = dev->power.subsys_data->domain_data; + if (pdd) { + struct generic_pm_domain_data *gpd_data = to_gpd_data(pdd); + + gpd_data->ops = *ops; + } else { + ret = -EINVAL; + } + + device_pm_unlock(); + pm_runtime_enable(dev); + + return ret; +} +EXPORT_SYMBOL_GPL(pm_genpd_add_callbacks); + +/** + * pm_genpd_remove_callbacks - Remove PM domain callbacks from a given device. + * @dev: Device to remove the callbacks from. + */ +int pm_genpd_remove_callbacks(struct device *dev) +{ + struct pm_domain_data *pdd; + int ret = 0; + + if (!(dev && dev->power.subsys_data)) + return -EINVAL; + + pm_runtime_disable(dev); + device_pm_lock(); + + pdd = dev->power.subsys_data->domain_data; + if (pdd) { + struct generic_pm_domain_data *gpd_data = to_gpd_data(pdd); + + gpd_data->ops = (struct gpd_dev_ops){ 0 }; + } else { + ret = -EINVAL; + } + + device_pm_unlock(); + pm_runtime_enable(dev); + + return ret; +} +EXPORT_SYMBOL_GPL(pm_genpd_remove_callbacks); + /** * pm_genpd_init - Initialize a generic I/O PM domain object. * @genpd: PM domain object to initialize. diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 65633e5a2bc0..8949d2d202ae 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -23,6 +23,12 @@ struct dev_power_governor { bool (*power_down_ok)(struct dev_pm_domain *domain); }; +struct gpd_dev_ops { + int (*start)(struct device *dev); + int (*stop)(struct device *dev); + bool (*active_wakeup)(struct device *dev); +}; + struct generic_pm_domain { struct dev_pm_domain domain; /* PM domain operations */ struct list_head gpd_list_node; /* Node in the global PM domains list */ @@ -45,9 +51,7 @@ struct generic_pm_domain { bool dev_irq_safe; /* Device callbacks are IRQ-safe */ int (*power_off)(struct generic_pm_domain *domain); int (*power_on)(struct generic_pm_domain *domain); - int (*start_device)(struct device *dev); - int (*stop_device)(struct device *dev); - bool (*active_wakeup)(struct device *dev); + struct gpd_dev_ops dev_ops; }; static inline struct generic_pm_domain *pd_to_genpd(struct dev_pm_domain *pd) @@ -64,6 +68,7 @@ struct gpd_link { struct generic_pm_domain_data { struct pm_domain_data base; + struct gpd_dev_ops ops; bool need_restore; }; @@ -73,6 +78,11 @@ static inline struct generic_pm_domain_data *to_gpd_data(struct pm_domain_data * } #ifdef CONFIG_PM_GENERIC_DOMAINS +static inline struct generic_pm_domain_data *dev_gpd_data(struct device *dev) +{ + return to_gpd_data(dev->power.subsys_data->domain_data); +} + extern int pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev); extern int pm_genpd_remove_device(struct generic_pm_domain *genpd, @@ -81,6 +91,8 @@ extern int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, struct generic_pm_domain *new_subdomain); extern int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, struct generic_pm_domain *target); +extern int pm_genpd_add_callbacks(struct device *dev, struct gpd_dev_ops *ops); +extern int pm_genpd_remove_callbacks(struct device *dev); extern void pm_genpd_init(struct generic_pm_domain *genpd, struct dev_power_governor *gov, bool is_off); extern int pm_genpd_poweron(struct generic_pm_domain *genpd); @@ -105,6 +117,15 @@ static inline int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, { return -ENOSYS; } +static inline int pm_genpd_add_callbacks(struct device *dev, + struct gpd_dev_ops *ops) +{ + return -ENOSYS; +} +static inline int pm_genpd_remove_callbacks(struct device *dev) +{ + return -ENOSYS; +} static inline void pm_genpd_init(struct generic_pm_domain *genpd, struct dev_power_governor *gov, bool is_off) {} static inline int pm_genpd_poweron(struct generic_pm_domain *genpd) -- cgit v1.2.3 From ecf00475f229fcf06362412ad2d15a3267e354a1 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 27 Nov 2011 13:11:44 +0100 Subject: PM / Domains: Introduce "save/restore state" device callbacks The current PM domains code uses device drivers' .runtime_suspend() and .runtime_resume() callbacks as the "save device state" and "restore device state" operations, which may not be appropriate in general, because it forces drivers to assume that they always will be used with generic PM domains. However, in theory, the same hardware may be used in devices that don't belong to any PM domain, in which case it would be necessary to add "fake" PM domains to satisfy the above assumption. It also may be located in a PM domain that's not handled with the help of the generic code. To allow device drivers that may be used along with the generic PM domains code of more flexibility, introduce new device callbacks, .save_state() and .restore_state(), that can be supplied by the drivers in addition to their "standard" runtime PM callbacks. This will allow the drivers to be designed to work with generic PM domains as well as without them. For backwards compatibility, introduce default .save_state() and .restore_state() callback routines for PM domains that will execute a device driver's .runtime_suspend() and .runtime_resume() callbacks, respectively, for the given device if the driver doesn't provide its own implementations of .save_state() and .restore_state(). Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 68 +++++++++++++++++++++++++++++++++++++-------- include/linux/pm_domain.h | 2 ++ 2 files changed, 58 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 94afaa2686a6..3c9451b10427 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -56,6 +56,16 @@ static int genpd_start_dev(struct generic_pm_domain *genpd, struct device *dev) return GENPD_DEV_CALLBACK(genpd, int, start, dev); } +static int genpd_save_dev(struct generic_pm_domain *genpd, struct device *dev) +{ + return GENPD_DEV_CALLBACK(genpd, int, save_state, dev); +} + +static int genpd_restore_dev(struct generic_pm_domain *genpd, struct device *dev) +{ + return GENPD_DEV_CALLBACK(genpd, int, restore_state, dev); +} + static bool genpd_sd_counter_dec(struct generic_pm_domain *genpd) { bool ret = false; @@ -217,7 +227,6 @@ static int __pm_genpd_save_device(struct pm_domain_data *pdd, { struct generic_pm_domain_data *gpd_data = to_gpd_data(pdd); struct device *dev = pdd->dev; - struct device_driver *drv = dev->driver; int ret = 0; if (gpd_data->need_restore) @@ -225,11 +234,9 @@ static int __pm_genpd_save_device(struct pm_domain_data *pdd, mutex_unlock(&genpd->lock); - if (drv && drv->pm && drv->pm->runtime_suspend) { - genpd_start_dev(genpd, dev); - ret = drv->pm->runtime_suspend(dev); - genpd_stop_dev(genpd, dev); - } + genpd_start_dev(genpd, dev); + ret = genpd_save_dev(genpd, dev); + genpd_stop_dev(genpd, dev); mutex_lock(&genpd->lock); @@ -250,18 +257,15 @@ static void __pm_genpd_restore_device(struct pm_domain_data *pdd, { struct generic_pm_domain_data *gpd_data = to_gpd_data(pdd); struct device *dev = pdd->dev; - struct device_driver *drv = dev->driver; if (!gpd_data->need_restore) return; mutex_unlock(&genpd->lock); - if (drv && drv->pm && drv->pm->runtime_resume) { - genpd_start_dev(genpd, dev); - drv->pm->runtime_resume(dev); - genpd_stop_dev(genpd, dev); - } + genpd_start_dev(genpd, dev); + genpd_restore_dev(genpd, dev); + genpd_stop_dev(genpd, dev); mutex_lock(&genpd->lock); @@ -1357,6 +1361,44 @@ int pm_genpd_remove_callbacks(struct device *dev) } EXPORT_SYMBOL_GPL(pm_genpd_remove_callbacks); +/** + * pm_genpd_default_save_state - Default "save device state" for PM domians. + * @dev: Device to handle. + */ +static int pm_genpd_default_save_state(struct device *dev) +{ + int (*cb)(struct device *__dev); + struct device_driver *drv = dev->driver; + + cb = dev_gpd_data(dev)->ops.save_state; + if (cb) + return cb(dev); + + if (drv && drv->pm && drv->pm->runtime_suspend) + return drv->pm->runtime_suspend(dev); + + return 0; +} + +/** + * pm_genpd_default_restore_state - Default PM domians "restore device state". + * @dev: Device to handle. + */ +static int pm_genpd_default_restore_state(struct device *dev) +{ + int (*cb)(struct device *__dev); + struct device_driver *drv = dev->driver; + + cb = dev_gpd_data(dev)->ops.restore_state; + if (cb) + return cb(dev); + + if (drv && drv->pm && drv->pm->runtime_resume) + return drv->pm->runtime_resume(dev); + + return 0; +} + /** * pm_genpd_init - Initialize a generic I/O PM domain object. * @genpd: PM domain object to initialize. @@ -1400,6 +1442,8 @@ void pm_genpd_init(struct generic_pm_domain *genpd, genpd->domain.ops.restore_noirq = pm_genpd_restore_noirq; genpd->domain.ops.restore = pm_genpd_restore; genpd->domain.ops.complete = pm_genpd_complete; + genpd->dev_ops.save_state = pm_genpd_default_save_state; + genpd->dev_ops.restore_state = pm_genpd_default_restore_state; mutex_lock(&gpd_list_lock); list_add(&genpd->gpd_list_node, &gpd_list); mutex_unlock(&gpd_list_lock); diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 8949d2d202ae..731080dad250 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -26,6 +26,8 @@ struct dev_power_governor { struct gpd_dev_ops { int (*start)(struct device *dev); int (*stop)(struct device *dev); + int (*save_state)(struct device *dev); + int (*restore_state)(struct device *dev); bool (*active_wakeup)(struct device *dev); }; -- cgit v1.2.3 From d23b9b00cdde5c93b914a172cecd57d5625fcd04 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 27 Nov 2011 13:11:51 +0100 Subject: PM / Domains: Rework system suspend callback routines (v2) The current generic PM domains code attempts to use the generic system suspend operations along with the domains' device stop/start routines, which requires device drivers to assume that their system suspend/resume (and hibernation/restore) callbacks will always be used with generic PM domains. However, in theory, the same hardware may be used in devices that don't belong to any PM domain, in which case it would be necessary to add "fake" PM domains to satisfy the above assumption. Also, the domain the hardware belongs to may not be handled with the help of the generic code. To allow device drivers that may be used along with the generic PM domains code of more flexibility, add new device callbacks, .suspend(), .suspend_late(), .resume_early(), .resume(), .freeze(), .freeze_late(), .thaw_early(), and .thaw(), that can be supplied by the drivers in addition to their "standard" system suspend and hibernation callbacks. These new callbacks, if defined, will be used by the generic PM domains code for the handling of system suspend and hibernation instead of the "standard" ones. This will allow drivers to be designed to work with generic PM domains as well as without them. For backwards compatibility, introduce default implementations of the new callbacks for PM domains that will execute pm_generic_suspend(), pm_generic_suspend_noirq(), pm_generic_resume_noirq(), pm_generic_resume(), pm_generic_freeze(), pm_generic_freeze_noirq(), pm_generic_thaw_noirq(), and pm_generic_thaw(), respectively, for the given device if its driver doesn't define those callbacks. Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 249 ++++++++++++++++++++++++++------------------ include/linux/pm_domain.h | 8 ++ 2 files changed, 158 insertions(+), 99 deletions(-) (limited to 'include') diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 3c9451b10427..9a77080cb799 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -561,6 +561,46 @@ static bool genpd_dev_active_wakeup(struct generic_pm_domain *genpd, return GENPD_DEV_CALLBACK(genpd, bool, active_wakeup, dev); } +static int genpd_suspend_dev(struct generic_pm_domain *genpd, struct device *dev) +{ + return GENPD_DEV_CALLBACK(genpd, int, suspend, dev); +} + +static int genpd_suspend_late(struct generic_pm_domain *genpd, struct device *dev) +{ + return GENPD_DEV_CALLBACK(genpd, int, suspend_late, dev); +} + +static int genpd_resume_early(struct generic_pm_domain *genpd, struct device *dev) +{ + return GENPD_DEV_CALLBACK(genpd, int, resume_early, dev); +} + +static int genpd_resume_dev(struct generic_pm_domain *genpd, struct device *dev) +{ + return GENPD_DEV_CALLBACK(genpd, int, resume, dev); +} + +static int genpd_freeze_dev(struct generic_pm_domain *genpd, struct device *dev) +{ + return GENPD_DEV_CALLBACK(genpd, int, freeze, dev); +} + +static int genpd_freeze_late(struct generic_pm_domain *genpd, struct device *dev) +{ + return GENPD_DEV_CALLBACK(genpd, int, freeze_late, dev); +} + +static int genpd_thaw_early(struct generic_pm_domain *genpd, struct device *dev) +{ + return GENPD_DEV_CALLBACK(genpd, int, thaw_early, dev); +} + +static int genpd_thaw_dev(struct generic_pm_domain *genpd, struct device *dev) +{ + return GENPD_DEV_CALLBACK(genpd, int, thaw, dev); +} + /** * pm_genpd_sync_poweroff - Synchronously power off a PM domain and its masters. * @genpd: PM domain to power off, if possible. @@ -712,7 +752,7 @@ static int pm_genpd_suspend(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; - return genpd->suspend_power_off ? 0 : pm_generic_suspend(dev); + return genpd->suspend_power_off ? 0 : genpd_suspend_dev(genpd, dev); } /** @@ -737,7 +777,7 @@ static int pm_genpd_suspend_noirq(struct device *dev) if (genpd->suspend_power_off) return 0; - ret = pm_generic_suspend_noirq(dev); + ret = genpd_suspend_late(genpd, dev); if (ret) return ret; @@ -788,7 +828,7 @@ static int pm_genpd_resume_noirq(struct device *dev) genpd->suspended_count--; genpd_start_dev(genpd, dev); - return pm_generic_resume_noirq(dev); + return genpd_resume_early(genpd, dev); } /** @@ -809,7 +849,7 @@ static int pm_genpd_resume(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; - return genpd->suspend_power_off ? 0 : pm_generic_resume(dev); + return genpd->suspend_power_off ? 0 : genpd_resume_dev(genpd, dev); } /** @@ -830,7 +870,7 @@ static int pm_genpd_freeze(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; - return genpd->suspend_power_off ? 0 : pm_generic_freeze(dev); + return genpd->suspend_power_off ? 0 : genpd_freeze_dev(genpd, dev); } /** @@ -856,7 +896,7 @@ static int pm_genpd_freeze_noirq(struct device *dev) if (genpd->suspend_power_off) return 0; - ret = pm_generic_freeze_noirq(dev); + ret = genpd_freeze_late(genpd, dev); if (ret) return ret; @@ -889,7 +929,7 @@ static int pm_genpd_thaw_noirq(struct device *dev) genpd_start_dev(genpd, dev); - return pm_generic_thaw_noirq(dev); + return genpd_thaw_early(genpd, dev); } /** @@ -910,70 +950,7 @@ static int pm_genpd_thaw(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; - return genpd->suspend_power_off ? 0 : pm_generic_thaw(dev); -} - -/** - * pm_genpd_dev_poweroff - Power off a device belonging to an I/O PM domain. - * @dev: Device to suspend. - * - * Power off a device under the assumption that its pm_domain field points to - * the domain member of an object of type struct generic_pm_domain representing - * a PM domain consisting of I/O devices. - */ -static int pm_genpd_dev_poweroff(struct device *dev) -{ - struct generic_pm_domain *genpd; - - dev_dbg(dev, "%s()\n", __func__); - - genpd = dev_to_genpd(dev); - if (IS_ERR(genpd)) - return -EINVAL; - - return genpd->suspend_power_off ? 0 : pm_generic_poweroff(dev); -} - -/** - * pm_genpd_dev_poweroff_noirq - Late power off of a device from a PM domain. - * @dev: Device to suspend. - * - * Carry out a late powering off of a device under the assumption that its - * pm_domain field points to the domain member of an object of type - * struct generic_pm_domain representing a PM domain consisting of I/O devices. - */ -static int pm_genpd_dev_poweroff_noirq(struct device *dev) -{ - struct generic_pm_domain *genpd; - int ret; - - dev_dbg(dev, "%s()\n", __func__); - - genpd = dev_to_genpd(dev); - if (IS_ERR(genpd)) - return -EINVAL; - - if (genpd->suspend_power_off) - return 0; - - ret = pm_generic_poweroff_noirq(dev); - if (ret) - return ret; - - if (dev->power.wakeup_path && genpd_dev_active_wakeup(genpd, dev)) - return 0; - - genpd_stop_dev(genpd, dev); - - /* - * Since all of the "noirq" callbacks are executed sequentially, it is - * guaranteed that this function will never run twice in parallel for - * the same PM domain, so it is not necessary to use locking here. - */ - genpd->suspended_count++; - pm_genpd_sync_poweroff(genpd); - - return 0; + return genpd->suspend_power_off ? 0 : genpd_thaw_dev(genpd, dev); } /** @@ -1015,28 +992,7 @@ static int pm_genpd_restore_noirq(struct device *dev) genpd->suspended_count--; genpd_start_dev(genpd, dev); - return pm_generic_restore_noirq(dev); -} - -/** - * pm_genpd_restore - Restore a device belonging to an I/O power domain. - * @dev: Device to resume. - * - * Restore a device under the assumption that its pm_domain field points to the - * domain member of an object of type struct generic_pm_domain representing - * a power domain consisting of I/O devices. - */ -static int pm_genpd_restore(struct device *dev) -{ - struct generic_pm_domain *genpd; - - dev_dbg(dev, "%s()\n", __func__); - - genpd = dev_to_genpd(dev); - if (IS_ERR(genpd)) - return -EINVAL; - - return genpd->suspend_power_off ? 0 : pm_generic_restore(dev); + return genpd_resume_early(genpd, dev); } /** @@ -1086,10 +1042,7 @@ static void pm_genpd_complete(struct device *dev) #define pm_genpd_freeze_noirq NULL #define pm_genpd_thaw_noirq NULL #define pm_genpd_thaw NULL -#define pm_genpd_dev_poweroff_noirq NULL -#define pm_genpd_dev_poweroff NULL #define pm_genpd_restore_noirq NULL -#define pm_genpd_restore NULL #define pm_genpd_complete NULL #endif /* CONFIG_PM_SLEEP */ @@ -1361,6 +1314,8 @@ int pm_genpd_remove_callbacks(struct device *dev) } EXPORT_SYMBOL_GPL(pm_genpd_remove_callbacks); +/* Default device callbacks for generic PM domains. */ + /** * pm_genpd_default_save_state - Default "save device state" for PM domians. * @dev: Device to handle. @@ -1399,6 +1354,94 @@ static int pm_genpd_default_restore_state(struct device *dev) return 0; } +/** + * pm_genpd_default_suspend - Default "device suspend" for PM domians. + * @dev: Device to handle. + */ +static int pm_genpd_default_suspend(struct device *dev) +{ + int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.freeze; + + return cb ? cb(dev) : pm_generic_suspend(dev); +} + +/** + * pm_genpd_default_suspend_late - Default "late device suspend" for PM domians. + * @dev: Device to handle. + */ +static int pm_genpd_default_suspend_late(struct device *dev) +{ + int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.freeze_late; + + return cb ? cb(dev) : pm_generic_suspend_noirq(dev); +} + +/** + * pm_genpd_default_resume_early - Default "early device resume" for PM domians. + * @dev: Device to handle. + */ +static int pm_genpd_default_resume_early(struct device *dev) +{ + int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.thaw_early; + + return cb ? cb(dev) : pm_generic_resume_noirq(dev); +} + +/** + * pm_genpd_default_resume - Default "device resume" for PM domians. + * @dev: Device to handle. + */ +static int pm_genpd_default_resume(struct device *dev) +{ + int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.thaw; + + return cb ? cb(dev) : pm_generic_resume(dev); +} + +/** + * pm_genpd_default_freeze - Default "device freeze" for PM domians. + * @dev: Device to handle. + */ +static int pm_genpd_default_freeze(struct device *dev) +{ + int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.freeze; + + return cb ? cb(dev) : pm_generic_freeze(dev); +} + +/** + * pm_genpd_default_freeze_late - Default "late device freeze" for PM domians. + * @dev: Device to handle. + */ +static int pm_genpd_default_freeze_late(struct device *dev) +{ + int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.freeze_late; + + return cb ? cb(dev) : pm_generic_freeze_noirq(dev); +} + +/** + * pm_genpd_default_thaw_early - Default "early device thaw" for PM domians. + * @dev: Device to handle. + */ +static int pm_genpd_default_thaw_early(struct device *dev) +{ + int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.thaw_early; + + return cb ? cb(dev) : pm_generic_thaw_noirq(dev); +} + +/** + * pm_genpd_default_thaw - Default "device thaw" for PM domians. + * @dev: Device to handle. + */ +static int pm_genpd_default_thaw(struct device *dev) +{ + int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.thaw; + + return cb ? cb(dev) : pm_generic_thaw(dev); +} + /** * pm_genpd_init - Initialize a generic I/O PM domain object. * @genpd: PM domain object to initialize. @@ -1437,13 +1480,21 @@ void pm_genpd_init(struct generic_pm_domain *genpd, genpd->domain.ops.freeze_noirq = pm_genpd_freeze_noirq; genpd->domain.ops.thaw_noirq = pm_genpd_thaw_noirq; genpd->domain.ops.thaw = pm_genpd_thaw; - genpd->domain.ops.poweroff = pm_genpd_dev_poweroff; - genpd->domain.ops.poweroff_noirq = pm_genpd_dev_poweroff_noirq; + genpd->domain.ops.poweroff = pm_genpd_suspend; + genpd->domain.ops.poweroff_noirq = pm_genpd_suspend_noirq; genpd->domain.ops.restore_noirq = pm_genpd_restore_noirq; - genpd->domain.ops.restore = pm_genpd_restore; + genpd->domain.ops.restore = pm_genpd_resume; genpd->domain.ops.complete = pm_genpd_complete; genpd->dev_ops.save_state = pm_genpd_default_save_state; genpd->dev_ops.restore_state = pm_genpd_default_restore_state; + genpd->dev_ops.freeze = pm_genpd_default_suspend; + genpd->dev_ops.freeze_late = pm_genpd_default_suspend_late; + genpd->dev_ops.thaw_early = pm_genpd_default_resume_early; + genpd->dev_ops.thaw = pm_genpd_default_resume; + genpd->dev_ops.freeze = pm_genpd_default_freeze; + genpd->dev_ops.freeze_late = pm_genpd_default_freeze_late; + genpd->dev_ops.thaw_early = pm_genpd_default_thaw_early; + genpd->dev_ops.thaw = pm_genpd_default_thaw; mutex_lock(&gpd_list_lock); list_add(&genpd->gpd_list_node, &gpd_list); mutex_unlock(&gpd_list_lock); diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 731080dad250..10a197dce07e 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -28,6 +28,14 @@ struct gpd_dev_ops { int (*stop)(struct device *dev); int (*save_state)(struct device *dev); int (*restore_state)(struct device *dev); + int (*suspend)(struct device *dev); + int (*suspend_late)(struct device *dev); + int (*resume_early)(struct device *dev); + int (*resume)(struct device *dev); + int (*freeze)(struct device *dev); + int (*freeze_late)(struct device *dev); + int (*thaw_early)(struct device *dev); + int (*thaw)(struct device *dev); bool (*active_wakeup)(struct device *dev); }; -- cgit v1.2.3 From b02c999ac325e977585abeb4caf6e0a2ee21e30b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 1 Dec 2011 00:02:05 +0100 Subject: PM / Domains: Add device stop governor function (v4) Add a function deciding whether or not devices should be stopped in pm_genpd_runtime_suspend() depending on their PM QoS constraints and stop/start timing values. Make it possible to add information used by this function to device objects. Signed-off-by: Rafael J. Wysocki Acked-by: Magnus Damm --- arch/arm/mach-shmobile/pm-sh7372.c | 4 ++- drivers/base/power/Makefile | 2 +- drivers/base/power/domain.c | 33 +++++++++++++++---- drivers/base/power/domain_governor.c | 33 +++++++++++++++++++ include/linux/pm_domain.h | 63 +++++++++++++++++++++++++++++++----- 5 files changed, 118 insertions(+), 17 deletions(-) create mode 100644 drivers/base/power/domain_governor.c (limited to 'include') diff --git a/arch/arm/mach-shmobile/pm-sh7372.c b/arch/arm/mach-shmobile/pm-sh7372.c index 6777bb1be059..adf1765e69c6 100644 --- a/arch/arm/mach-shmobile/pm-sh7372.c +++ b/arch/arm/mach-shmobile/pm-sh7372.c @@ -169,6 +169,7 @@ static bool sh7372_power_down_forbidden(struct dev_pm_domain *domain) struct dev_power_governor sh7372_always_on_gov = { .power_down_ok = sh7372_power_down_forbidden, + .stop_ok = default_stop_ok, }; static int sh7372_stop_dev(struct device *dev) @@ -203,8 +204,9 @@ static int sh7372_start_dev(struct device *dev) void sh7372_init_pm_domain(struct sh7372_pm_domain *sh7372_pd) { struct generic_pm_domain *genpd = &sh7372_pd->genpd; + struct dev_power_governor *gov = sh7372_pd->gov; - pm_genpd_init(genpd, sh7372_pd->gov, false); + pm_genpd_init(genpd, gov ? : &simple_qos_governor, false); genpd->dev_ops.stop = sh7372_stop_dev; genpd->dev_ops.start = sh7372_start_dev; genpd->dev_ops.active_wakeup = pd_active_wakeup; diff --git a/drivers/base/power/Makefile b/drivers/base/power/Makefile index 81676dd17900..2e58ebb1f6c0 100644 --- a/drivers/base/power/Makefile +++ b/drivers/base/power/Makefile @@ -3,7 +3,7 @@ obj-$(CONFIG_PM_SLEEP) += main.o wakeup.o obj-$(CONFIG_PM_RUNTIME) += runtime.o obj-$(CONFIG_PM_TRACE_RTC) += trace.o obj-$(CONFIG_PM_OPP) += opp.o -obj-$(CONFIG_PM_GENERIC_DOMAINS) += domain.o +obj-$(CONFIG_PM_GENERIC_DOMAINS) += domain.o domain_governor.o obj-$(CONFIG_HAVE_CLK) += clock_ops.o ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 9a77080cb799..3af9f5a71ad5 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -38,7 +38,7 @@ static DEFINE_MUTEX(gpd_list_lock); #ifdef CONFIG_PM -static struct generic_pm_domain *dev_to_genpd(struct device *dev) +struct generic_pm_domain *dev_to_genpd(struct device *dev) { if (IS_ERR_OR_NULL(dev->pm_domain)) return ERR_PTR(-EINVAL); @@ -436,6 +436,7 @@ static void genpd_power_off_work_fn(struct work_struct *work) static int pm_genpd_runtime_suspend(struct device *dev) { struct generic_pm_domain *genpd; + bool (*stop_ok)(struct device *__dev); int ret; dev_dbg(dev, "%s()\n", __func__); @@ -446,10 +447,17 @@ static int pm_genpd_runtime_suspend(struct device *dev) might_sleep_if(!genpd->dev_irq_safe); + stop_ok = genpd->gov ? genpd->gov->stop_ok : NULL; + if (stop_ok && !stop_ok(dev)) + return -EBUSY; + ret = genpd_stop_dev(genpd, dev); if (ret) return ret; + pm_runtime_update_max_time_suspended(dev, + dev_gpd_data(dev)->td.start_latency_ns); + /* * If power.irq_safe is set, this routine will be run with interrupts * off, so it can't use mutexes. @@ -1048,11 +1056,13 @@ static void pm_genpd_complete(struct device *dev) #endif /* CONFIG_PM_SLEEP */ /** - * pm_genpd_add_device - Add a device to an I/O PM domain. + * __pm_genpd_add_device - Add a device to an I/O PM domain. * @genpd: PM domain to add the device to. * @dev: Device to be added. + * @td: Set of PM QoS timing parameters to attach to the device. */ -int pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev) +int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, + struct gpd_timing_data *td) { struct generic_pm_domain_data *gpd_data; struct pm_domain_data *pdd; @@ -1095,6 +1105,8 @@ int pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev) gpd_data->base.dev = dev; gpd_data->need_restore = false; list_add_tail(&gpd_data->base.list_node, &genpd->dev_list); + if (td) + gpd_data->td = *td; out: genpd_release_lock(genpd); @@ -1255,8 +1267,10 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, * pm_genpd_add_callbacks - Add PM domain callbacks to a given device. * @dev: Device to add the callbacks to. * @ops: Set of callbacks to add. + * @td: Timing data to add to the device along with the callbacks (optional). */ -int pm_genpd_add_callbacks(struct device *dev, struct gpd_dev_ops *ops) +int pm_genpd_add_callbacks(struct device *dev, struct gpd_dev_ops *ops, + struct gpd_timing_data *td) { struct pm_domain_data *pdd; int ret = 0; @@ -1272,6 +1286,8 @@ int pm_genpd_add_callbacks(struct device *dev, struct gpd_dev_ops *ops) struct generic_pm_domain_data *gpd_data = to_gpd_data(pdd); gpd_data->ops = *ops; + if (td) + gpd_data->td = *td; } else { ret = -EINVAL; } @@ -1284,10 +1300,11 @@ int pm_genpd_add_callbacks(struct device *dev, struct gpd_dev_ops *ops) EXPORT_SYMBOL_GPL(pm_genpd_add_callbacks); /** - * pm_genpd_remove_callbacks - Remove PM domain callbacks from a given device. + * __pm_genpd_remove_callbacks - Remove PM domain callbacks from a given device. * @dev: Device to remove the callbacks from. + * @clear_td: If set, clear the device's timing data too. */ -int pm_genpd_remove_callbacks(struct device *dev) +int __pm_genpd_remove_callbacks(struct device *dev, bool clear_td) { struct pm_domain_data *pdd; int ret = 0; @@ -1303,6 +1320,8 @@ int pm_genpd_remove_callbacks(struct device *dev) struct generic_pm_domain_data *gpd_data = to_gpd_data(pdd); gpd_data->ops = (struct gpd_dev_ops){ 0 }; + if (clear_td) + gpd_data->td = (struct gpd_timing_data){ 0 }; } else { ret = -EINVAL; } @@ -1312,7 +1331,7 @@ int pm_genpd_remove_callbacks(struct device *dev) return ret; } -EXPORT_SYMBOL_GPL(pm_genpd_remove_callbacks); +EXPORT_SYMBOL_GPL(__pm_genpd_remove_callbacks); /* Default device callbacks for generic PM domains. */ diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c new file mode 100644 index 000000000000..97b21c10c496 --- /dev/null +++ b/drivers/base/power/domain_governor.c @@ -0,0 +1,33 @@ +/* + * drivers/base/power/domain_governor.c - Governors for device PM domains. + * + * Copyright (C) 2011 Rafael J. Wysocki , Renesas Electronics Corp. + * + * This file is released under the GPLv2. + */ + +#include +#include +#include +#include + +/** + * default_stop_ok - Default PM domain governor routine for stopping devices. + * @dev: Device to check. + */ +bool default_stop_ok(struct device *dev) +{ + struct gpd_timing_data *td = &dev_gpd_data(dev)->td; + + dev_dbg(dev, "%s()\n", __func__); + + if (dev->power.max_time_suspended_ns < 0 || td->break_even_ns == 0) + return true; + + return td->stop_latency_ns + td->start_latency_ns < td->break_even_ns + && td->break_even_ns < dev->power.max_time_suspended_ns; +} + +struct dev_power_governor simple_qos_governor = { + .stop_ok = default_stop_ok, +}; diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 10a197dce07e..f6745c213a57 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -21,6 +21,7 @@ enum gpd_status { struct dev_power_governor { bool (*power_down_ok)(struct dev_pm_domain *domain); + bool (*stop_ok)(struct device *dev); }; struct gpd_dev_ops { @@ -76,9 +77,16 @@ struct gpd_link { struct list_head slave_node; }; +struct gpd_timing_data { + s64 stop_latency_ns; + s64 start_latency_ns; + s64 break_even_ns; +}; + struct generic_pm_domain_data { struct pm_domain_data base; struct gpd_dev_ops ops; + struct gpd_timing_data td; bool need_restore; }; @@ -93,20 +101,48 @@ static inline struct generic_pm_domain_data *dev_gpd_data(struct device *dev) return to_gpd_data(dev->power.subsys_data->domain_data); } -extern int pm_genpd_add_device(struct generic_pm_domain *genpd, - struct device *dev); +extern struct dev_power_governor simple_qos_governor; + +extern struct generic_pm_domain *dev_to_genpd(struct device *dev); +extern int __pm_genpd_add_device(struct generic_pm_domain *genpd, + struct device *dev, + struct gpd_timing_data *td); + +static inline int pm_genpd_add_device(struct generic_pm_domain *genpd, + struct device *dev) +{ + return __pm_genpd_add_device(genpd, dev, NULL); +} + extern int pm_genpd_remove_device(struct generic_pm_domain *genpd, struct device *dev); extern int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, struct generic_pm_domain *new_subdomain); extern int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, struct generic_pm_domain *target); -extern int pm_genpd_add_callbacks(struct device *dev, struct gpd_dev_ops *ops); -extern int pm_genpd_remove_callbacks(struct device *dev); +extern int pm_genpd_add_callbacks(struct device *dev, + struct gpd_dev_ops *ops, + struct gpd_timing_data *td); +extern int __pm_genpd_remove_callbacks(struct device *dev, bool clear_td); extern void pm_genpd_init(struct generic_pm_domain *genpd, struct dev_power_governor *gov, bool is_off); + extern int pm_genpd_poweron(struct generic_pm_domain *genpd); + +extern bool default_stop_ok(struct device *dev); + #else + +static inline struct generic_pm_domain *dev_to_genpd(struct device *dev) +{ + return ERR_PTR(-ENOSYS); +} +static inline int __pm_genpd_add_device(struct generic_pm_domain *genpd, + struct device *dev, + struct gpd_timing_data *td) +{ + return -ENOSYS; +} static inline int pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev) { @@ -128,22 +164,33 @@ static inline int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, return -ENOSYS; } static inline int pm_genpd_add_callbacks(struct device *dev, - struct gpd_dev_ops *ops) + struct gpd_dev_ops *ops, + struct gpd_timing_data *td) { return -ENOSYS; } -static inline int pm_genpd_remove_callbacks(struct device *dev) +static inline int __pm_genpd_remove_callbacks(struct device *dev, bool clear_td) { return -ENOSYS; } -static inline void pm_genpd_init(struct generic_pm_domain *genpd, - struct dev_power_governor *gov, bool is_off) {} +static inline void pm_genpd_init(struct generic_pm_domain *genpd, bool is_off) +{ +} static inline int pm_genpd_poweron(struct generic_pm_domain *genpd) { return -ENOSYS; } +static inline bool default_stop_ok(struct device *dev) +{ + return false; +} #endif +static inline int pm_genpd_remove_callbacks(struct device *dev) +{ + return __pm_genpd_remove_callbacks(dev, true); +} + #ifdef CONFIG_PM_GENERIC_DOMAINS_RUNTIME extern void genpd_queue_power_off_work(struct generic_pm_domain *genpd); extern void pm_genpd_poweroff_unused(void); -- cgit v1.2.3 From 221e9b58380abdd6c05e11b4538597e2586ee141 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 1 Dec 2011 00:02:10 +0100 Subject: PM / Domains: Add default power off governor function (v4) Add a function deciding whether or not a given PM domain should be powered off on the basis of the PM QoS constraints of devices belonging to it and their PM QoS timing data. Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 12 ++++ drivers/base/power/domain_governor.c | 110 +++++++++++++++++++++++++++++++++++ include/linux/pm_domain.h | 7 +++ 3 files changed, 129 insertions(+) (limited to 'include') diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 3af9f5a71ad5..91896194e76b 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -398,6 +398,17 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd) } genpd->status = GPD_STATE_POWER_OFF; + genpd->power_off_time = ktime_get(); + + /* Update PM QoS information for devices in the domain. */ + list_for_each_entry_reverse(pdd, &genpd->dev_list, list_node) { + struct gpd_timing_data *td = &to_gpd_data(pdd)->td; + + pm_runtime_update_max_time_suspended(pdd->dev, + td->start_latency_ns + + td->restore_state_latency_ns + + genpd->power_on_latency_ns); + } list_for_each_entry(link, &genpd->slave_links, slave_node) { genpd_sd_counter_dec(link->master); @@ -1487,6 +1498,7 @@ void pm_genpd_init(struct generic_pm_domain *genpd, genpd->resume_count = 0; genpd->device_count = 0; genpd->suspended_count = 0; + genpd->max_off_time_ns = -1; genpd->domain.ops.runtime_suspend = pm_genpd_runtime_suspend; genpd->domain.ops.runtime_resume = pm_genpd_runtime_resume; genpd->domain.ops.runtime_idle = pm_generic_runtime_idle; diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c index 97b21c10c496..da78540e9b40 100644 --- a/drivers/base/power/domain_governor.c +++ b/drivers/base/power/domain_governor.c @@ -10,6 +10,7 @@ #include #include #include +#include /** * default_stop_ok - Default PM domain governor routine for stopping devices. @@ -28,6 +29,115 @@ bool default_stop_ok(struct device *dev) && td->break_even_ns < dev->power.max_time_suspended_ns; } +/** + * default_power_down_ok - Default generic PM domain power off governor routine. + * @pd: PM domain to check. + * + * This routine must be executed under the PM domain's lock. + */ +static bool default_power_down_ok(struct dev_pm_domain *pd) +{ + struct generic_pm_domain *genpd = pd_to_genpd(pd); + struct gpd_link *link; + struct pm_domain_data *pdd; + s64 min_dev_off_time_ns; + s64 off_on_time_ns; + ktime_t time_now = ktime_get(); + + off_on_time_ns = genpd->power_off_latency_ns + + genpd->power_on_latency_ns; + /* + * It doesn't make sense to remove power from the domain if saving + * the state of all devices in it and the power off/power on operations + * take too much time. + * + * All devices in this domain have been stopped already at this point. + */ + list_for_each_entry(pdd, &genpd->dev_list, list_node) { + if (pdd->dev->driver) + off_on_time_ns += + to_gpd_data(pdd)->td.save_state_latency_ns; + } + + /* + * Check if subdomains can be off for enough time. + * + * All subdomains have been powered off already at this point. + */ + list_for_each_entry(link, &genpd->master_links, master_node) { + struct generic_pm_domain *sd = link->slave; + s64 sd_max_off_ns = sd->max_off_time_ns; + + if (sd_max_off_ns < 0) + continue; + + sd_max_off_ns -= ktime_to_ns(ktime_sub(time_now, + sd->power_off_time)); + /* + * Check if the subdomain is allowed to be off long enough for + * the current domain to turn off and on (that's how much time + * it will have to wait worst case). + */ + if (sd_max_off_ns <= off_on_time_ns) + return false; + } + + /* + * Check if the devices in the domain can be off enough time. + */ + min_dev_off_time_ns = -1; + list_for_each_entry(pdd, &genpd->dev_list, list_node) { + struct gpd_timing_data *td; + struct device *dev = pdd->dev; + s64 dev_off_time_ns; + + if (!dev->driver || dev->power.max_time_suspended_ns < 0) + continue; + + td = &to_gpd_data(pdd)->td; + dev_off_time_ns = dev->power.max_time_suspended_ns - + (td->start_latency_ns + td->restore_state_latency_ns + + ktime_to_ns(ktime_sub(time_now, + dev->power.suspend_time))); + if (dev_off_time_ns <= off_on_time_ns) + return false; + + if (min_dev_off_time_ns > dev_off_time_ns + || min_dev_off_time_ns < 0) + min_dev_off_time_ns = dev_off_time_ns; + } + + if (min_dev_off_time_ns < 0) { + /* + * There are no latency constraints, so the domain can spend + * arbitrary time in the "off" state. + */ + genpd->max_off_time_ns = -1; + return true; + } + + /* + * The difference between the computed minimum delta and the time needed + * to turn the domain on is the maximum theoretical time this domain can + * spend in the "off" state. + */ + min_dev_off_time_ns -= genpd->power_on_latency_ns; + + /* + * If the difference between the computed minimum delta and the time + * needed to turn the domain off and back on on is smaller than the + * domain's power break even time, removing power from the domain is not + * worth it. + */ + if (genpd->break_even_ns > + min_dev_off_time_ns - genpd->power_off_latency_ns) + return false; + + genpd->max_off_time_ns = min_dev_off_time_ns; + return true; +} + struct dev_power_governor simple_qos_governor = { .stop_ok = default_stop_ok, + .power_down_ok = default_power_down_ok, }; diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index f6745c213a57..cc1a2450ff7b 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -61,8 +61,13 @@ struct generic_pm_domain { bool suspend_power_off; /* Power status before system suspend */ bool dev_irq_safe; /* Device callbacks are IRQ-safe */ int (*power_off)(struct generic_pm_domain *domain); + s64 power_off_latency_ns; int (*power_on)(struct generic_pm_domain *domain); + s64 power_on_latency_ns; struct gpd_dev_ops dev_ops; + s64 break_even_ns; /* Power break even for the entire domain. */ + s64 max_off_time_ns; /* Maximum allowed "suspended" time. */ + ktime_t power_off_time; }; static inline struct generic_pm_domain *pd_to_genpd(struct dev_pm_domain *pd) @@ -80,6 +85,8 @@ struct gpd_link { struct gpd_timing_data { s64 stop_latency_ns; s64 start_latency_ns; + s64 save_state_latency_ns; + s64 restore_state_latency_ns; s64 break_even_ns; }; -- cgit v1.2.3 From 4f042cdad40e1566a53b7ae85e72b6945a4b0fde Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Thu, 1 Dec 2011 00:05:31 +0100 Subject: PM / Domains: fix compilation failure for CONFIG_PM_GENERIC_DOMAINS unset Fix the following compalitaion breakage: In file included from linux/drivers/sh/pm_runtime.c:15: linux/include/linux/pm_domain.h: In function 'dev_to_genpd': linux/include/linux/pm_domain.h:142: error: implicit declaration of function 'ERR_PTR' linux/include/linux/pm_domain.h:142: warning: return makes pointer from integer without a cast In file included from linux/include/linux/sh_clk.h:10, from linux/drivers/sh/pm_runtime.c:19: linux/include/linux/err.h: At top level: linux/include/linux/err.h:22: error: conflicting types for 'ERR_PTR' linux/include/linux/pm_domain.h:142: note: previous implicit declaration of 'ERR_PTR' was here make[3]: *** [drivers/sh/pm_runtime.o] Error 1 Reported-by: Nobuhiro Iwamatsu Signed-off-by: Guennadi Liakhovetski Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index cc1a2450ff7b..fbb81bc5065a 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -10,6 +10,7 @@ #define _LINUX_PM_DOMAIN_H #include +#include enum gpd_status { GPD_STATE_ACTIVE = 0, /* PM domain is active */ -- cgit v1.2.3 From d310310cbff18ec385c6ab4d58f33b100192a96a Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 1 Dec 2011 22:44:39 +0100 Subject: Freezer / sunrpc / NFS: don't allow TASK_KILLABLE sleeps to block the freezer Allow the freezer to skip wait_on_bit_killable sleeps in the sunrpc layer. This should allow suspend and hibernate events to proceed, even when there are RPC's pending on the wire. Also, wrap the TASK_KILLABLE sleeps in NFS layer in freezer_do_not_count and freezer_count calls. This allows the freezer to skip tasks that are sleeping while looping on EJUKEBOX or NFS4ERR_DELAY sorts of errors. Signed-off-by: Jeff Layton Signed-off-by: Rafael J. Wysocki --- fs/nfs/inode.c | 3 ++- fs/nfs/nfs3proc.c | 3 ++- fs/nfs/nfs4proc.c | 5 +++-- fs/nfs/proc.c | 3 ++- include/linux/freezer.h | 28 ++++++++++++++++++++++++++++ net/sunrpc/sched.c | 3 ++- 6 files changed, 39 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 50a15fa8cf98..bf3a57bbbfcf 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -77,7 +78,7 @@ int nfs_wait_bit_killable(void *word) { if (fatal_signal_pending(current)) return -ERESTARTSYS; - schedule(); + freezable_schedule(); return 0; } diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index d4bc9ed91748..91943953a370 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "iostat.h" #include "internal.h" @@ -32,7 +33,7 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) res = rpc_call_sync(clnt, msg, flags); if (res != -EJUKEBOX && res != -EKEYEXPIRED) break; - schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME); + freezable_schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME); res = -ERESTARTSYS; } while (!fatal_signal_pending(current)); return res; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index be2bbac13817..b28bb19b04f0 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -53,6 +53,7 @@ #include #include #include +#include #include "nfs4_fs.h" #include "delegation.h" @@ -241,7 +242,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) *timeout = NFS4_POLL_RETRY_MIN; if (*timeout > NFS4_POLL_RETRY_MAX) *timeout = NFS4_POLL_RETRY_MAX; - schedule_timeout_killable(*timeout); + freezable_schedule_timeout_killable(*timeout); if (fatal_signal_pending(current)) res = -ERESTARTSYS; *timeout <<= 1; @@ -3950,7 +3951,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4 static unsigned long nfs4_set_lock_task_retry(unsigned long timeout) { - schedule_timeout_killable(timeout); + freezable_schedule_timeout_killable(timeout); timeout <<= 1; if (timeout > NFS4_LOCK_MAXTIMEOUT) return NFS4_LOCK_MAXTIMEOUT; diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index f48125da198a..0c672588fe5a 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -41,6 +41,7 @@ #include #include #include +#include #include "internal.h" #define NFSDBG_FACILITY NFSDBG_PROC @@ -59,7 +60,7 @@ nfs_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) res = rpc_call_sync(clnt, msg, flags); if (res != -EKEYEXPIRED) break; - schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME); + freezable_schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME); res = -ERESTARTSYS; } while (!fatal_signal_pending(current)); return res; diff --git a/include/linux/freezer.h b/include/linux/freezer.h index c1ee2833655e..30f06c220467 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -104,6 +104,29 @@ static inline int freezer_should_skip(struct task_struct *p) return !!(p->flags & PF_FREEZER_SKIP); } +/* + * These macros are intended to be used whenever you want allow a task that's + * sleeping in TASK_UNINTERRUPTIBLE or TASK_KILLABLE state to be frozen. Note + * that neither return any clear indication of whether a freeze event happened + * while in this function. + */ + +/* Like schedule(), but should not block the freezer. */ +#define freezable_schedule() \ +({ \ + freezer_do_not_count(); \ + schedule(); \ + freezer_count(); \ +}) + +/* Like schedule_timeout_killable(), but should not block the freezer. */ +#define freezable_schedule_timeout_killable(timeout) \ +({ \ + freezer_do_not_count(); \ + schedule_timeout_killable(timeout); \ + freezer_count(); \ +}) + /* * Freezer-friendly wrappers around wait_event_interruptible(), * wait_event_killable() and wait_event_interruptible_timeout(), originally @@ -163,6 +186,11 @@ static inline void freezer_count(void) {} static inline int freezer_should_skip(struct task_struct *p) { return 0; } static inline void set_freezable(void) {} +#define freezable_schedule() schedule() + +#define freezable_schedule_timeout_killable(timeout) \ + schedule_timeout_killable(timeout) + #define wait_event_freezable(wq, condition) \ wait_event_interruptible(wq, condition) diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index d12ffa545811..5317b9341b53 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -18,6 +18,7 @@ #include #include #include +#include #include @@ -231,7 +232,7 @@ static int rpc_wait_bit_killable(void *word) { if (fatal_signal_pending(current)) return -ERESTARTSYS; - schedule(); + freezable_schedule(); return 0; } -- cgit v1.2.3 From e84b2c202771bbd538866207efcb1f7dbab8045b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 6 Dec 2011 22:19:54 +0100 Subject: PM / Domains: Make it possible to assign names to generic PM domains Add a name member pointer to struct generic_pm_domain and use it in diagnostic messages regarding the domain power-off and power-on latencies. Update the ARM shmobile SH7372 code to assign names to the PM domains used by it. Signed-off-by: Rafael J. Wysocki Acked-by: Magnus Damm --- arch/arm/mach-shmobile/pm-sh7372.c | 16 ++++++++++++---- drivers/base/power/domain.c | 14 ++++++++++++-- include/linux/pm_domain.h | 1 + 3 files changed, 25 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/arch/arm/mach-shmobile/pm-sh7372.c b/arch/arm/mach-shmobile/pm-sh7372.c index adf1765e69c6..8d9ea8924ed3 100644 --- a/arch/arm/mach-shmobile/pm-sh7372.c +++ b/arch/arm/mach-shmobile/pm-sh7372.c @@ -101,8 +101,8 @@ static int pd_power_down(struct generic_pm_domain *genpd) } if (!sh7372_pd->no_debug) - pr_debug("sh7372 power domain down 0x%08x -> PSTR = 0x%08x\n", - mask, __raw_readl(PSTR)); + pr_debug("%s: Power off, 0x%08x -> PSTR = 0x%08x\n", + genpd->name, mask, __raw_readl(PSTR)); return 0; } @@ -133,8 +133,8 @@ static int __pd_power_up(struct sh7372_pm_domain *sh7372_pd, bool do_resume) ret = -EIO; if (!sh7372_pd->no_debug) - pr_debug("sh7372 power domain up 0x%08x -> PSTR = 0x%08x\n", - mask, __raw_readl(PSTR)); + pr_debug("%s: Power on, 0x%08x -> PSTR = 0x%08x\n", + sh7372_pd->genpd.name, mask, __raw_readl(PSTR)); out: if (ret == 0 && sh7372_pd->resume && do_resume) @@ -233,18 +233,22 @@ void sh7372_pm_add_subdomain(struct sh7372_pm_domain *sh7372_pd, } struct sh7372_pm_domain sh7372_a4lc = { + .genpd.name = "A4LC", .bit_shift = 1, }; struct sh7372_pm_domain sh7372_a4mp = { + .genpd.name = "A4MP", .bit_shift = 2, }; struct sh7372_pm_domain sh7372_d4 = { + .genpd.name = "D4", .bit_shift = 3, }; struct sh7372_pm_domain sh7372_a4r = { + .genpd.name = "A4R", .bit_shift = 5, .gov = &sh7372_always_on_gov, .suspend = sh7372_a4r_suspend, @@ -253,14 +257,17 @@ struct sh7372_pm_domain sh7372_a4r = { }; struct sh7372_pm_domain sh7372_a3rv = { + .genpd.name = "A3RV", .bit_shift = 6, }; struct sh7372_pm_domain sh7372_a3ri = { + .genpd.name = "A3RI", .bit_shift = 8, }; struct sh7372_pm_domain sh7372_a3sp = { + .genpd.name = "A3SP", .bit_shift = 11, .gov = &sh7372_always_on_gov, .no_debug = true, @@ -275,6 +282,7 @@ static void sh7372_a3sp_init(void) } struct sh7372_pm_domain sh7372_a3sg = { + .genpd.name = "A3SG", .bit_shift = 13, }; diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 5a8d67d51f0e..ad6ba2e04677 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -209,8 +209,13 @@ int __pm_genpd_poweron(struct generic_pm_domain *genpd) goto err; elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start)); - if (elapsed_ns > genpd->power_on_latency_ns) + if (elapsed_ns > genpd->power_on_latency_ns) { genpd->power_on_latency_ns = elapsed_ns; + if (genpd->name) + pr_warning("%s: Power-on latency exceeded, " + "new value %lld ns\n", genpd->name, + elapsed_ns); + } } genpd_set_active(genpd); @@ -428,8 +433,13 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd) } elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start)); - if (elapsed_ns > genpd->power_off_latency_ns) + if (elapsed_ns > genpd->power_off_latency_ns) { genpd->power_off_latency_ns = elapsed_ns; + if (genpd->name) + pr_warning("%s: Power-off latency exceeded, " + "new value %lld ns\n", genpd->name, + elapsed_ns); + } } genpd->status = GPD_STATE_POWER_OFF; diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index fbb81bc5065a..fb809b904891 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -50,6 +50,7 @@ struct generic_pm_domain { struct mutex lock; struct dev_power_governor *gov; struct work_struct power_off_work; + char *name; unsigned int in_progress; /* Number of devices being suspended now */ atomic_t sd_count; /* Number of subdomains with power "on" */ enum gpd_status status; /* Current state of the domain */ -- cgit v1.2.3 From 467de1fc67d1bd2954eaac7019c564f28fa2b6a5 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Tue, 6 Dec 2011 23:17:51 +0100 Subject: PM / Freezer: Remove the "userspace only" constraint from freezer[_do_not]_count() At present, the functions freezer_count() and freezer_do_not_count() impose the restriction that they are effective only for userspace processes. However, now, these functions have found more utility than originally intended by the commit which introduced it: ba96a0c8 (freezer: fix vfork problem). And moreover, even the vfork issue actually does not need the above restriction in these functions. So, modify these functions to make them work even for kernel threads, so that they can be used at other places in the kernel, where the userspace restriction doesn't apply. Suggested-by: Oleg Nesterov Suggested-by: Tejun Heo Acked-by: Tejun Heo Reviewed-by: Oleg Nesterov Signed-off-by: Srivatsa S. Bhat Signed-off-by: Rafael J. Wysocki --- include/linux/freezer.h | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 30f06c220467..7bcfe73d999b 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -67,33 +67,27 @@ static inline bool cgroup_freezing(struct task_struct *task) * appropriately in case the child has exited before the freezing of tasks is * complete. However, we don't want kernel threads to be frozen in unexpected * places, so we allow them to block freeze_processes() instead or to set - * PF_NOFREEZE if needed and PF_FREEZER_SKIP is only set for userland vfork - * parents. Fortunately, in the ____call_usermodehelper() case the parent won't - * really block freeze_processes(), since ____call_usermodehelper() (the child) - * does a little before exec/exit and it can't be frozen before waking up the - * parent. + * PF_NOFREEZE if needed. Fortunately, in the ____call_usermodehelper() case the + * parent won't really block freeze_processes(), since ____call_usermodehelper() + * (the child) does a little before exec/exit and it can't be frozen before + * waking up the parent. */ -/* - * If the current task is a user space one, tell the freezer not to count it as - * freezable. - */ + +/* Tell the freezer not to count the current task as freezable. */ static inline void freezer_do_not_count(void) { - if (current->mm) - current->flags |= PF_FREEZER_SKIP; + current->flags |= PF_FREEZER_SKIP; } /* - * If the current task is a user space one, tell the freezer to count it as - * freezable again and try to freeze it. + * Tell the freezer to count the current task as freezable again and try to + * freeze it. */ static inline void freezer_count(void) { - if (current->mm) { - current->flags &= ~PF_FREEZER_SKIP; - try_to_freeze(); - } + current->flags &= ~PF_FREEZER_SKIP; + try_to_freeze(); } /* -- cgit v1.2.3 From 33e638b9070ba5e8812836e20390da6a6af13900 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Tue, 6 Dec 2011 23:18:12 +0100 Subject: PM / Sleep: Use the freezer_count() functions in [un]lock_system_sleep() APIs Now that freezer_count() and freezer_do_not_count() don't have the restriction that they are effective only when called by userspace processes, use them in lock_system_sleep() and unlock_system_sleep() instead of open-coding their parts. Signed-off-by: Srivatsa S. Bhat Acked-by: Tejun Heo Signed-off-by: Rafael J. Wysocki --- include/linux/suspend.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 1f7fff47cfac..906d62cfc15c 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #ifdef CONFIG_VT @@ -380,16 +381,14 @@ static inline void unlock_system_sleep(void) {} static inline void lock_system_sleep(void) { - /* simplified freezer_do_not_count() */ - current->flags |= PF_FREEZER_SKIP; + freezer_do_not_count(); mutex_lock(&pm_mutex); } static inline void unlock_system_sleep(void) { mutex_unlock(&pm_mutex); - /* simplified freezer_count() */ - current->flags &= ~PF_FREEZER_SKIP; + freezer_count(); } #endif -- cgit v1.2.3 From 9b6fc5dc879bc90f765db0e95eefcf123d0d06dd Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Tue, 6 Dec 2011 23:24:38 +0100 Subject: PM / Sleep: Make [un]lock_system_sleep() generic The [un]lock_system_sleep() APIs were originally introduced to mutually exclude memory hotplug and hibernation. Directly using mutex_lock(&pm_mutex) to achieve mutual exclusion with suspend or hibernation code can lead to freezing failures. However, the APIs [un]lock_system_sleep() can be safely used to achieve the same, without causing freezing failures. So, since it would be beneficial to modify all the existing users of mutex_lock(&pm_mutex) (in all parts of the kernel), so that they use these safe APIs intead, make these APIs generic by removing the restriction that they work only when CONFIG_HIBERNATE_CALLBACKS is set. Moreover, that restriction didn't buy us anything anyway. Suggested-by: Tejun Heo Signed-off-by: Srivatsa S. Bhat Signed-off-by: Rafael J. Wysocki --- include/linux/suspend.h | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 906d62cfc15c..95040cc33107 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -332,6 +332,8 @@ static inline bool system_entering_hibernation(void) { return false; } #define PM_RESTORE_PREPARE 0x0005 /* Going to restore a saved image */ #define PM_POST_RESTORE 0x0006 /* Restore failed */ +extern struct mutex pm_mutex; + #ifdef CONFIG_PM_SLEEP void save_processor_state(void); void restore_processor_state(void); @@ -352,6 +354,19 @@ extern bool events_check_enabled; extern bool pm_wakeup_pending(void); extern bool pm_get_wakeup_count(unsigned int *count); extern bool pm_save_wakeup_count(unsigned int count); + +static inline void lock_system_sleep(void) +{ + freezer_do_not_count(); + mutex_lock(&pm_mutex); +} + +static inline void unlock_system_sleep(void) +{ + mutex_unlock(&pm_mutex); + freezer_count(); +} + #else /* !CONFIG_PM_SLEEP */ static inline int register_pm_notifier(struct notifier_block *nb) @@ -367,30 +382,11 @@ static inline int unregister_pm_notifier(struct notifier_block *nb) #define pm_notifier(fn, pri) do { (void)(fn); } while (0) static inline bool pm_wakeup_pending(void) { return false; } -#endif /* !CONFIG_PM_SLEEP */ - -extern struct mutex pm_mutex; -#ifndef CONFIG_HIBERNATE_CALLBACKS static inline void lock_system_sleep(void) {} static inline void unlock_system_sleep(void) {} -#else - -/* Let some subsystems like memory hotadd exclude hibernation */ - -static inline void lock_system_sleep(void) -{ - freezer_do_not_count(); - mutex_lock(&pm_mutex); -} - -static inline void unlock_system_sleep(void) -{ - mutex_unlock(&pm_mutex); - freezer_count(); -} -#endif +#endif /* !CONFIG_PM_SLEEP */ #ifdef CONFIG_ARCH_SAVE_PAGE_KEYS /* -- cgit v1.2.3 From 925b44a273aa8c4c23c006c1228aacd538eead09 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 8 Dec 2011 23:27:28 +0100 Subject: PM / Domains: Provide an always on power domain governor Since systems are likely to have power domains that can't be turned off for various reasons at least temporarily while implementing power domain support provide a default governor which will always refuse to power off the domain, saving platforms having to implement their own. Since the code is so tiny don't bother with a Kconfig symbol for it. Signed-off-by: Mark Brown Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain_governor.c | 13 +++++++++++++ include/linux/pm_domain.h | 2 ++ 2 files changed, 15 insertions(+) (limited to 'include') diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c index da78540e9b40..51527ee92d10 100644 --- a/drivers/base/power/domain_governor.c +++ b/drivers/base/power/domain_governor.c @@ -141,3 +141,16 @@ struct dev_power_governor simple_qos_governor = { .stop_ok = default_stop_ok, .power_down_ok = default_power_down_ok, }; + +static bool always_on_power_down_ok(struct dev_pm_domain *domain) +{ + return false; +} + +/** + * pm_genpd_gov_always_on - A governor implementing an always-on policy + */ +struct dev_power_governor pm_domain_always_on_gov = { + .power_down_ok = always_on_power_down_ok, + .stop_ok = default_stop_ok, +}; diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index fb809b904891..a03a0ad998b8 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -140,6 +140,7 @@ extern int pm_genpd_poweron(struct generic_pm_domain *genpd); extern bool default_stop_ok(struct device *dev); +extern struct dev_power_governor pm_domain_always_on_gov; #else static inline struct generic_pm_domain *dev_to_genpd(struct device *dev) @@ -193,6 +194,7 @@ static inline bool default_stop_ok(struct device *dev) { return false; } +#define pm_domain_always_on_gov NULL #endif static inline int pm_genpd_remove_callbacks(struct device *dev) -- cgit v1.2.3 From b298d289c79211508f11cb50749b0d1d54eb244a Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Fri, 9 Dec 2011 23:36:36 +0100 Subject: PM / Sleep: Fix freezer failures due to racy usermodehelper_is_disabled() Commit a144c6a (PM: Print a warning if firmware is requested when tasks are frozen) introduced usermodehelper_is_disabled() to warn and exit immediately if firmware is requested when usermodehelpers are disabled. However, it is racy. Consider the following scenario, currently used in drivers/base/firmware_class.c: ... if (usermodehelper_is_disabled()) goto out; /* Do actual work */ ... out: return err; Nothing prevents someone from disabling usermodehelpers just after the check in the 'if' condition, which means that it is quite possible to try doing the "actual work" with usermodehelpers disabled, leading to undesirable consequences. In particular, this race condition in _request_firmware() causes task freezing failures whenever suspend/hibernation is in progress because, it wrongly waits to get the firmware/microcode image from userspace when actually the usermodehelpers are disabled or userspace has been frozen. Some of the example scenarios that cause freezing failures due to this race are those that depend on userspace via request_firmware(), such as x86 microcode module initialization and microcode image reload. Previous discussions about this issue can be found at: http://thread.gmane.org/gmane.linux.kernel/1198291/focus=1200591 This patch adds proper synchronization to fix this issue. It is to be noted that this patchset fixes the freezing failures but doesn't remove the warnings. IOW, it does not attempt to add explicit synchronization to x86 microcode driver to avoid requesting microcode image at inopportune moments. Because, the warnings were introduced to highlight such cases, in the first place. And we need not silence the warnings, since we take care of the *real* problem (freezing failure) and hence, after that, the warnings are pretty harmless anyway. Signed-off-by: Srivatsa S. Bhat Signed-off-by: Rafael J. Wysocki --- drivers/base/firmware_class.c | 4 ++++ include/linux/kmod.h | 2 ++ kernel/kmod.c | 23 ++++++++++++++++++++++- 3 files changed, 28 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index 06ed6b4e7df5..d5585da14c8a 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -534,6 +534,8 @@ static int _request_firmware(const struct firmware **firmware_p, return 0; } + read_lock_usermodehelper(); + if (WARN_ON(usermodehelper_is_disabled())) { dev_err(device, "firmware: %s will not be loaded\n", name); retval = -EBUSY; @@ -572,6 +574,8 @@ static int _request_firmware(const struct firmware **firmware_p, fw_destroy_instance(fw_priv); out: + read_unlock_usermodehelper(); + if (retval) { release_firmware(firmware); *firmware_p = NULL; diff --git a/include/linux/kmod.h b/include/linux/kmod.h index b16f65390734..722f477c4ef7 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -117,5 +117,7 @@ extern void usermodehelper_init(void); extern int usermodehelper_disable(void); extern void usermodehelper_enable(void); extern bool usermodehelper_is_disabled(void); +extern void read_lock_usermodehelper(void); +extern void read_unlock_usermodehelper(void); #endif /* __LINUX_KMOD_H__ */ diff --git a/kernel/kmod.c b/kernel/kmod.c index a4bea97c75b6..81b4a27261b2 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -50,6 +51,7 @@ static struct workqueue_struct *khelper_wq; static kernel_cap_t usermodehelper_bset = CAP_FULL_SET; static kernel_cap_t usermodehelper_inheritable = CAP_FULL_SET; static DEFINE_SPINLOCK(umh_sysctl_lock); +static DECLARE_RWSEM(umhelper_sem); #ifdef CONFIG_MODULES @@ -275,6 +277,7 @@ static void __call_usermodehelper(struct work_struct *work) * If set, call_usermodehelper_exec() will exit immediately returning -EBUSY * (used for preventing user land processes from being created after the user * land has been frozen during a system-wide hibernation or suspend operation). + * Should always be manipulated under umhelper_sem acquired for write. */ static int usermodehelper_disabled = 1; @@ -293,6 +296,18 @@ static DECLARE_WAIT_QUEUE_HEAD(running_helpers_waitq); */ #define RUNNING_HELPERS_TIMEOUT (5 * HZ) +void read_lock_usermodehelper(void) +{ + down_read(&umhelper_sem); +} +EXPORT_SYMBOL_GPL(read_lock_usermodehelper); + +void read_unlock_usermodehelper(void) +{ + up_read(&umhelper_sem); +} +EXPORT_SYMBOL_GPL(read_unlock_usermodehelper); + /** * usermodehelper_disable - prevent new helpers from being started */ @@ -300,8 +315,10 @@ int usermodehelper_disable(void) { long retval; + down_write(&umhelper_sem); usermodehelper_disabled = 1; - smp_mb(); + up_write(&umhelper_sem); + /* * From now on call_usermodehelper_exec() won't start any new * helpers, so it is sufficient if running_helpers turns out to @@ -314,7 +331,9 @@ int usermodehelper_disable(void) if (retval) return 0; + down_write(&umhelper_sem); usermodehelper_disabled = 0; + up_write(&umhelper_sem); return -EAGAIN; } @@ -323,7 +342,9 @@ int usermodehelper_disable(void) */ void usermodehelper_enable(void) { + down_write(&umhelper_sem); usermodehelper_disabled = 0; + up_write(&umhelper_sem); } /** -- cgit v1.2.3 From 9b39e73d0c2b265a7f8748b0e9a9f09be84079a8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 18 Dec 2011 00:34:24 +0100 Subject: PM / Sleep: Remove forward-only callbacks from platform bus type The forward-only PM callbacks provided by the platform bus type are not necessary any more, because the PM core executes driver callbacks when the corresponding subsystem callbacks are not present, so drop them. Signed-off-by: Rafael J. Wysocki --- drivers/base/platform.c | 115 ---------------------------------------- include/linux/platform_device.h | 30 +---------- 2 files changed, 1 insertion(+), 144 deletions(-) (limited to 'include') diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 7a24895543e7..7d912d5675d8 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -700,25 +700,6 @@ static int platform_legacy_resume(struct device *dev) return ret; } -int platform_pm_prepare(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (drv && drv->pm && drv->pm->prepare) - ret = drv->pm->prepare(dev); - - return ret; -} - -void platform_pm_complete(struct device *dev) -{ - struct device_driver *drv = dev->driver; - - if (drv && drv->pm && drv->pm->complete) - drv->pm->complete(dev); -} - #endif /* CONFIG_PM_SLEEP */ #ifdef CONFIG_SUSPEND @@ -741,22 +722,6 @@ int platform_pm_suspend(struct device *dev) return ret; } -int platform_pm_suspend_noirq(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->suspend_noirq) - ret = drv->pm->suspend_noirq(dev); - } - - return ret; -} - int platform_pm_resume(struct device *dev) { struct device_driver *drv = dev->driver; @@ -775,22 +740,6 @@ int platform_pm_resume(struct device *dev) return ret; } -int platform_pm_resume_noirq(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->resume_noirq) - ret = drv->pm->resume_noirq(dev); - } - - return ret; -} - #endif /* CONFIG_SUSPEND */ #ifdef CONFIG_HIBERNATE_CALLBACKS @@ -813,22 +762,6 @@ int platform_pm_freeze(struct device *dev) return ret; } -int platform_pm_freeze_noirq(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->freeze_noirq) - ret = drv->pm->freeze_noirq(dev); - } - - return ret; -} - int platform_pm_thaw(struct device *dev) { struct device_driver *drv = dev->driver; @@ -847,22 +780,6 @@ int platform_pm_thaw(struct device *dev) return ret; } -int platform_pm_thaw_noirq(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->thaw_noirq) - ret = drv->pm->thaw_noirq(dev); - } - - return ret; -} - int platform_pm_poweroff(struct device *dev) { struct device_driver *drv = dev->driver; @@ -881,22 +798,6 @@ int platform_pm_poweroff(struct device *dev) return ret; } -int platform_pm_poweroff_noirq(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->poweroff_noirq) - ret = drv->pm->poweroff_noirq(dev); - } - - return ret; -} - int platform_pm_restore(struct device *dev) { struct device_driver *drv = dev->driver; @@ -915,22 +816,6 @@ int platform_pm_restore(struct device *dev) return ret; } -int platform_pm_restore_noirq(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->restore_noirq) - ret = drv->pm->restore_noirq(dev); - } - - return ret; -} - #endif /* CONFIG_HIBERNATE_CALLBACKS */ static const struct dev_pm_ops platform_dev_pm_ops = { diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 2a23f7d1a825..b5267c951161 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -264,62 +264,34 @@ static inline char *early_platform_driver_setup_func(void) \ } #endif /* MODULE */ -#ifdef CONFIG_PM_SLEEP -extern int platform_pm_prepare(struct device *dev); -extern void platform_pm_complete(struct device *dev); -#else -#define platform_pm_prepare NULL -#define platform_pm_complete NULL -#endif - #ifdef CONFIG_SUSPEND extern int platform_pm_suspend(struct device *dev); -extern int platform_pm_suspend_noirq(struct device *dev); extern int platform_pm_resume(struct device *dev); -extern int platform_pm_resume_noirq(struct device *dev); #else #define platform_pm_suspend NULL #define platform_pm_resume NULL -#define platform_pm_suspend_noirq NULL -#define platform_pm_resume_noirq NULL #endif #ifdef CONFIG_HIBERNATE_CALLBACKS extern int platform_pm_freeze(struct device *dev); -extern int platform_pm_freeze_noirq(struct device *dev); extern int platform_pm_thaw(struct device *dev); -extern int platform_pm_thaw_noirq(struct device *dev); extern int platform_pm_poweroff(struct device *dev); -extern int platform_pm_poweroff_noirq(struct device *dev); extern int platform_pm_restore(struct device *dev); -extern int platform_pm_restore_noirq(struct device *dev); #else #define platform_pm_freeze NULL #define platform_pm_thaw NULL #define platform_pm_poweroff NULL #define platform_pm_restore NULL -#define platform_pm_freeze_noirq NULL -#define platform_pm_thaw_noirq NULL -#define platform_pm_poweroff_noirq NULL -#define platform_pm_restore_noirq NULL #endif #ifdef CONFIG_PM_SLEEP #define USE_PLATFORM_PM_SLEEP_OPS \ - .prepare = platform_pm_prepare, \ - .complete = platform_pm_complete, \ .suspend = platform_pm_suspend, \ .resume = platform_pm_resume, \ .freeze = platform_pm_freeze, \ .thaw = platform_pm_thaw, \ .poweroff = platform_pm_poweroff, \ - .restore = platform_pm_restore, \ - .suspend_noirq = platform_pm_suspend_noirq, \ - .resume_noirq = platform_pm_resume_noirq, \ - .freeze_noirq = platform_pm_freeze_noirq, \ - .thaw_noirq = platform_pm_thaw_noirq, \ - .poweroff_noirq = platform_pm_poweroff_noirq, \ - .restore_noirq = platform_pm_restore_noirq, + .restore = platform_pm_restore, #else #define USE_PLATFORM_PM_SLEEP_OPS #endif -- cgit v1.2.3 From 90363ddf0a1a4dccfbb8d0c10b8f488bc7fa69f8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 18 Dec 2011 00:34:42 +0100 Subject: PM: Drop generic_subsys_pm_ops Since the PM core is now going to execute driver callbacks directly if the corresponding subsystem callbacks are not present, forward-only subsystem callbacks (i.e. such that only execute the corresponding driver callbacks) are not necessary any more. Thus it is possible to remove generic_subsys_pm_ops, because the only callback in there that is not forward-only, .runtime_idle, is not really used by the only user of generic_subsys_pm_ops, which is vio_bus_type. However, the generic callback routines themselves cannot be removed from generic_ops.c, because they are used individually by a number of subsystems. Signed-off-by: Rafael J. Wysocki --- arch/powerpc/kernel/vio.c | 1 - drivers/base/power/generic_ops.c | 25 ------------------------- include/linux/pm.h | 13 ------------- 3 files changed, 39 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index f65af61996bd..8b086299ba25 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -1406,7 +1406,6 @@ static struct bus_type vio_bus_type = { .match = vio_bus_match, .probe = vio_bus_probe, .remove = vio_bus_remove, - .pm = GENERIC_SUBSYS_PM_OPS, }; /** diff --git a/drivers/base/power/generic_ops.c b/drivers/base/power/generic_ops.c index 5a5b154bc1e9..10bdd793f0bd 100644 --- a/drivers/base/power/generic_ops.c +++ b/drivers/base/power/generic_ops.c @@ -276,28 +276,3 @@ void pm_generic_complete(struct device *dev) pm_runtime_idle(dev); } #endif /* CONFIG_PM_SLEEP */ - -struct dev_pm_ops generic_subsys_pm_ops = { -#ifdef CONFIG_PM_SLEEP - .prepare = pm_generic_prepare, - .suspend = pm_generic_suspend, - .suspend_noirq = pm_generic_suspend_noirq, - .resume = pm_generic_resume, - .resume_noirq = pm_generic_resume_noirq, - .freeze = pm_generic_freeze, - .freeze_noirq = pm_generic_freeze_noirq, - .thaw = pm_generic_thaw, - .thaw_noirq = pm_generic_thaw_noirq, - .poweroff = pm_generic_poweroff, - .poweroff_noirq = pm_generic_poweroff_noirq, - .restore = pm_generic_restore, - .restore_noirq = pm_generic_restore_noirq, - .complete = pm_generic_complete, -#endif -#ifdef CONFIG_PM_RUNTIME - .runtime_suspend = pm_generic_runtime_suspend, - .runtime_resume = pm_generic_runtime_resume, - .runtime_idle = pm_generic_runtime_idle, -#endif -}; -EXPORT_SYMBOL_GPL(generic_subsys_pm_ops); diff --git a/include/linux/pm.h b/include/linux/pm.h index 3f3ed83a9aa5..21e04dd72a84 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -300,19 +300,6 @@ const struct dev_pm_ops name = { \ SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \ } -/* - * Use this for subsystems (bus types, device types, device classes) that don't - * need any special suspend/resume handling in addition to invoking the PM - * callbacks provided by device drivers supporting both the system sleep PM and - * runtime PM, make the pm member point to generic_subsys_pm_ops. - */ -#ifdef CONFIG_PM -extern struct dev_pm_ops generic_subsys_pm_ops; -#define GENERIC_SUBSYS_PM_OPS (&generic_subsys_pm_ops) -#else -#define GENERIC_SUBSYS_PM_OPS NULL -#endif - /** * PM_EVENT_ messages * -- cgit v1.2.3 From 0f966d74cf77a9140a025464a287e1d2fee8a1fc Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 23 Dec 2011 01:23:30 +0100 Subject: PM / shmobile: Don't include SH7372's INTCS in syscore suspend/resume Since the SH7372's INTCS in included into syscore suspend/resume, which causes the chip to be accessed when PM domains have been turned off during system suspend, the A4R domain containing the INTCS has to stay on during system sleep, which is suboptimal from the power consumption point of view. For this reason, add a new INTC flag, skip_syscore_suspend, to mark the INTCS for intc_suspend() and intc_resume(), so that they don't touch it. This allows the A4R domain to be turned off during system suspend and the INTCS state is resrored during system resume by the A4R's "power on" code. Suggested-by: Magnus Damm Signed-off-by: Rafael J. Wysocki Acked-by: Magnus Damm --- arch/arm/mach-shmobile/intc-sh7372.c | 1 + drivers/sh/intc/core.c | 8 ++++++++ drivers/sh/intc/internals.h | 1 + include/linux/sh_intc.h | 1 + 4 files changed, 11 insertions(+) (limited to 'include') diff --git a/arch/arm/mach-shmobile/intc-sh7372.c b/arch/arm/mach-shmobile/intc-sh7372.c index d087b31b5d12..89afcaba99a1 100644 --- a/arch/arm/mach-shmobile/intc-sh7372.c +++ b/arch/arm/mach-shmobile/intc-sh7372.c @@ -535,6 +535,7 @@ static struct resource intcs_resources[] __initdata = { static struct intc_desc intcs_desc __initdata = { .name = "sh7372-intcs", .force_enable = ENABLED_INTCS, + .skip_syscore_suspend = true, .resource = intcs_resources, .num_resources = ARRAY_SIZE(intcs_resources), .hw = INTC_HW_DESC(intcs_vectors, intcs_groups, intcs_mask_registers, diff --git a/drivers/sh/intc/core.c b/drivers/sh/intc/core.c index 8b7a141ff35e..be5a025eeca3 100644 --- a/drivers/sh/intc/core.c +++ b/drivers/sh/intc/core.c @@ -354,6 +354,8 @@ int __init register_intc_controller(struct intc_desc *desc) if (desc->force_enable) intc_enable_disable_enum(desc, d, desc->force_enable, 1); + d->skip_suspend = desc->skip_syscore_suspend; + nr_intc_controllers++; return 0; @@ -386,6 +388,9 @@ static int intc_suspend(void) list_for_each_entry(d, &intc_list, list) { int irq; + if (d->skip_suspend) + continue; + /* enable wakeup irqs belonging to this intc controller */ for_each_active_irq(irq) { struct irq_data *data; @@ -409,6 +414,9 @@ static void intc_resume(void) list_for_each_entry(d, &intc_list, list) { int irq; + if (d->skip_suspend) + continue; + for_each_active_irq(irq) { struct irq_data *data; struct irq_chip *chip; diff --git a/drivers/sh/intc/internals.h b/drivers/sh/intc/internals.h index 5b934851efa8..b3fe1cf25a28 100644 --- a/drivers/sh/intc/internals.h +++ b/drivers/sh/intc/internals.h @@ -67,6 +67,7 @@ struct intc_desc_int { struct intc_window *window; unsigned int nr_windows; struct irq_chip chip; + bool skip_suspend; }; diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h index 5812fefbcedf..b160645f5599 100644 --- a/include/linux/sh_intc.h +++ b/include/linux/sh_intc.h @@ -95,6 +95,7 @@ struct intc_desc { unsigned int num_resources; intc_enum force_enable; intc_enum force_disable; + bool skip_syscore_suspend; struct intc_hw_desc hw; }; -- cgit v1.2.3 From 40a5f8be2f482783de0f1f0fe856660e489734a8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 23 Dec 2011 01:23:52 +0100 Subject: PM / QoS: Introduce dev_pm_qos_add_ancestor_request() Some devices, like the I2C controller on SH7372, are not necessary for providing power to their children or forwarding wakeup signals (and generally interrupts) from them. They are only needed by their children when there's some data to transfer, so they may be suspended for the majority of time and resumed on demand, when the children have data to send or receive. For this purpose, however, their power.ignore_children flags have to be set, or the PM core wouldn't allow them to be suspended while their children were active. Unfortunately, in some situations it may take too much time to resume such devices so that they can assist their children in transferring data. For example, if such a device belongs to a PM domain which goes to the "power off" state when that device is suspended, it may take too much time to restore power to the domain in response to the request from one of the device's children. In that case, if the parent's resume time is critical, the domain should stay in the "power on" state, although it still may be desirable to power manage the parent itself (e.g. by manipulating its clock). In general, device PM QoS may be used to address this problem. Namely, if the device's children added PM QoS latency constraints for it, they would be able to prevent it from being put into an overly deep low-power state. However, in some cases the devices needing to be serviced are not the immediate children of a "children-ignoring" device, but its grandchildren or even less direct descendants. In those cases, the entity wanting to add a PM QoS request for a given device's ancestor that ignores its children will have to find it in the first place, so introduce a new helper function that may be used to achieve that. This function, dev_pm_qos_add_ancestor_request(), will search for the first ancestor of the given device whose power.ignore_children flag is set and will add a device PM QoS latency request for that ancestor on behalf of the caller. The request added this way may be removed with the help of dev_pm_qos_remove_request() in the future, like any other device PM QoS latency request. Signed-off-by: Rafael J. Wysocki --- drivers/base/power/qos.c | 25 +++++++++++++++++++++++++ include/linux/pm_qos.h | 5 +++++ 2 files changed, 30 insertions(+) (limited to 'include') diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c index 86de6c50fc41..edf7687615e8 100644 --- a/drivers/base/power/qos.c +++ b/drivers/base/power/qos.c @@ -412,3 +412,28 @@ int dev_pm_qos_remove_global_notifier(struct notifier_block *notifier) return blocking_notifier_chain_unregister(&dev_pm_notifiers, notifier); } EXPORT_SYMBOL_GPL(dev_pm_qos_remove_global_notifier); + +/** + * dev_pm_qos_add_ancestor_request - Add PM QoS request for device's ancestor. + * @dev: Device whose ancestor to add the request for. + * @req: Pointer to the preallocated handle. + * @value: Constraint latency value. + */ +int dev_pm_qos_add_ancestor_request(struct device *dev, + struct dev_pm_qos_request *req, s32 value) +{ + struct device *ancestor = dev->parent; + int error = -ENODEV; + + while (ancestor && !ancestor->power.ignore_children) + ancestor = ancestor->parent; + + if (ancestor) + error = dev_pm_qos_add_request(ancestor, req, value); + + if (error) + req->dev = NULL; + + return error; +} +EXPORT_SYMBOL_GPL(dev_pm_qos_add_ancestor_request); diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 83b0ea302a80..fe247b33652d 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -91,6 +91,8 @@ int dev_pm_qos_add_global_notifier(struct notifier_block *notifier); int dev_pm_qos_remove_global_notifier(struct notifier_block *notifier); void dev_pm_qos_constraints_init(struct device *dev); void dev_pm_qos_constraints_destroy(struct device *dev); +int dev_pm_qos_add_ancestor_request(struct device *dev, + struct dev_pm_qos_request *req, s32 value); #else static inline int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node, @@ -150,6 +152,9 @@ static inline void dev_pm_qos_constraints_destroy(struct device *dev) { dev->power.power_state = PMSG_INVALID; } +static inline int dev_pm_qos_add_ancestor_request(struct device *dev, + struct dev_pm_qos_request *req, s32 value) + { return 0; } #endif #endif -- cgit v1.2.3 From b3b73ec0d7fe5bf8f950232aa58dfa0416a62372 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 26 Dec 2011 00:29:55 +0100 Subject: PM / Freezer: fix return value of freezable_schedule_timeout_killable() ...it should return the return code from schedule_timeout_killable(), not the one from freezer_count(). All of the current callers ignore the return code so the bug is harmless but it's worth fixing. Signed-off-by: Jeff Layton Signed-off-by: Rafael J. Wysocki --- include/linux/freezer.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 7bcfe73d999b..0ab54e16a91f 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -116,9 +116,11 @@ static inline int freezer_should_skip(struct task_struct *p) /* Like schedule_timeout_killable(), but should not block the freezer. */ #define freezable_schedule_timeout_killable(timeout) \ ({ \ + long __retval; \ freezer_do_not_count(); \ - schedule_timeout_killable(timeout); \ + __retval = schedule_timeout_killable(timeout); \ freezer_count(); \ + __retval; \ }) /* -- cgit v1.2.3