From de5bdff7a72acc281219be2b8edeeca1fd81c542 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Thu, 16 Feb 2012 14:52:21 +0900 Subject: sched: Make initial SCHED_RR timeslace DEF_TIMESLICE Current the initial SCHED_RR timeslice of init_task is HZ, which means 1s, and is not same as the default SCHED_RR timeslice DEF_TIMESLICE. Change that initial timeslice to the DEF_TIMESLICE. Signed-off-by: Hiroshi Shimamoto [ s/DEF_TIMESLICE/RR_TIMESLICE/g ] Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/4F3C9995.3010800@ct.jp.nec.com Signed-off-by: Ingo Molnar --- kernel/sched/rt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/sched/rt.c') diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index f42ae7fb5ec5..f70206c2c802 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1972,7 +1972,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) if (--p->rt.time_slice) return; - p->rt.time_slice = DEF_TIMESLICE; + p->rt.time_slice = RR_TIMESLICE; /* * Requeue to the end of queue if we are not the only element @@ -2000,7 +2000,7 @@ static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task) * Time slice is 0 for SCHED_FIFO tasks */ if (task->policy == SCHED_RR) - return DEF_TIMESLICE; + return RR_TIMESLICE; else return 0; } -- cgit v1.2.3 From 42c62a589f1ccbf38a02cb732231f9c2fccc5ab0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 18 Oct 2011 22:03:48 +0200 Subject: sched/rt: Keep period timer ticking when rt throttling is active When a runqueue is throttled we cannot disable the period timer because that timer is the only way to undo the throttling. We got stale throttling entries when a rq was throttled and then the global sysctl was disabled, which stopped the timer. Signed-off-by: Peter Zijlstra [ Added changelog ] Signed-off-by: Thomas Gleixner Link: http://lkml.kernel.org/n/tip-nuj34q52p6ro7szapuz84i0v@git.kernel.org Signed-off-by: Ingo Molnar --- kernel/sched/rt.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'kernel/sched/rt.c') diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index f70206c2c802..6d1eb0be1870 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -778,12 +778,9 @@ static inline int balance_runtime(struct rt_rq *rt_rq) static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) { - int i, idle = 1; + int i, idle = 1, throttled = 0; const struct cpumask *span; - if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) - return 1; - span = sched_rt_period_mask(); for_each_cpu(i, span) { int enqueue = 0; @@ -818,12 +815,17 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) if (!rt_rq_throttled(rt_rq)) enqueue = 1; } + if (rt_rq->rt_throttled) + throttled = 1; if (enqueue) sched_rt_rq_enqueue(rt_rq); raw_spin_unlock(&rq->lock); } + if (!throttled && (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)) + return 1; + return idle; } @@ -884,7 +886,8 @@ static void update_curr_rt(struct rq *rq) if (unlikely((s64)delta_exec < 0)) delta_exec = 0; - schedstat_set(curr->se.statistics.exec_max, max(curr->se.statistics.exec_max, delta_exec)); + schedstat_set(curr->se.statistics.exec_max, + max(curr->se.statistics.exec_max, delta_exec)); curr->se.sum_exec_runtime += delta_exec; account_group_exec_runtime(curr, delta_exec); -- cgit v1.2.3 From 7abc63b1bd412f7655b62ef3e35c3c11c5134636 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 18 Oct 2011 22:03:48 +0200 Subject: sched/rt: Do not throttle when PI boosting When a runqueue has rt_runtime_us = 0 then the only way it can accumulate rt_time is via PI boosting. That causes the runqueue to be throttled and replenishing does not change anything due to rt_runtime_us = 0. So avoid that situation by clearing rt_time and skip the throttling alltogether. Signed-off-by: Peter Zijlstra [ Changelog ] Signed-off-by: Thomas Gleixner Link: http://lkml.kernel.org/n/tip-7x70cypsotjb4jvcor3edctk@git.kernel.org Signed-off-by: Ingo Molnar --- kernel/sched/rt.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'kernel/sched/rt.c') diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 6d1eb0be1870..7f7e7cdcb472 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -857,8 +857,24 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) return 0; if (rt_rq->rt_time > runtime) { - rt_rq->rt_throttled = 1; - printk_once(KERN_WARNING "sched: RT throttling activated\n"); + struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); + + /* + * Don't actually throttle groups that have no runtime assigned + * but accrue some time due to boosting. + */ + if (likely(rt_b->rt_runtime)) { + rt_rq->rt_throttled = 1; + printk_once(KERN_WARNING "sched: RT throttling activated\n"); + } else { + /* + * In case we did anyway, make it go away, + * replenishment is a joke, since it will replenish us + * with exactly 0 ns. + */ + rt_rq->rt_time = 0; + } + if (rt_rq_throttled(rt_rq)) { sched_rt_rq_dequeue(rt_rq); return 1; -- cgit v1.2.3 From 3ccf3e8306156a28213adc720aba807e9a901ad5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 27 Feb 2012 10:47:00 +0100 Subject: printk/sched: Introduce special printk_sched() for those awkward moments There's a few awkward printk()s inside of scheduler guts that people prefer to keep but really are rather deadlock prone. Fudge around it by storing the text in a per-cpu buffer and poll it using the existing printk_tick() handler. This will drop output when its more frequent than once a tick, however only the affinity thing could possible go that fast and for that just one should suffice to notify the admin he's done something silly.. Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/n/tip-wua3lmkt3dg8nfts66o6brne@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/printk.h | 10 ++++++++++ kernel/printk.c | 40 +++++++++++++++++++++++++++++++++++++--- kernel/sched/core.c | 2 +- kernel/sched/rt.c | 8 +++++++- 4 files changed, 55 insertions(+), 5 deletions(-) (limited to 'kernel/sched/rt.c') diff --git a/include/linux/printk.h b/include/linux/printk.h index f0e22f75143f..1f77a4174ee0 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -100,6 +100,11 @@ int vprintk(const char *fmt, va_list args); asmlinkage __printf(1, 2) __cold int printk(const char *fmt, ...); +/* + * Special printk facility for scheduler use only, _DO_NOT_USE_ ! + */ +__printf(1, 2) __cold int printk_sched(const char *fmt, ...); + /* * Please don't use printk_ratelimit(), because it shares ratelimiting state * with all other unrelated printk_ratelimit() callsites. Instead use @@ -127,6 +132,11 @@ int printk(const char *s, ...) { return 0; } +static inline __printf(1, 2) __cold +int printk_sched(const char *s, ...) +{ + return 0; +} static inline int printk_ratelimit(void) { return 0; diff --git a/kernel/printk.c b/kernel/printk.c index 13c0a1143f49..7ca7ba591e21 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -1208,13 +1208,47 @@ int is_console_locked(void) return console_locked; } +/* + * Delayed printk facility, for scheduler-internal messages: + */ +#define PRINTK_BUF_SIZE 512 + +#define PRINTK_PENDING_WAKEUP 0x01 +#define PRINTK_PENDING_SCHED 0x02 + static DEFINE_PER_CPU(int, printk_pending); +static DEFINE_PER_CPU(char [PRINTK_BUF_SIZE], printk_sched_buf); + +int printk_sched(const char *fmt, ...) +{ + unsigned long flags; + va_list args; + char *buf; + int r; + + local_irq_save(flags); + buf = __get_cpu_var(printk_sched_buf); + + va_start(args, fmt); + r = vsnprintf(buf, PRINTK_BUF_SIZE, fmt, args); + va_end(args); + + __this_cpu_or(printk_pending, PRINTK_PENDING_SCHED); + local_irq_restore(flags); + + return r; +} void printk_tick(void) { if (__this_cpu_read(printk_pending)) { - __this_cpu_write(printk_pending, 0); - wake_up_interruptible(&log_wait); + int pending = __this_cpu_xchg(printk_pending, 0); + if (pending & PRINTK_PENDING_SCHED) { + char *buf = __get_cpu_var(printk_sched_buf); + printk(KERN_WARNING "[sched_delayed] %s", buf); + } + if (pending & PRINTK_PENDING_WAKEUP) + wake_up_interruptible(&log_wait); } } @@ -1228,7 +1262,7 @@ int printk_needs_cpu(int cpu) void wake_up_klogd(void) { if (waitqueue_active(&log_wait)) - this_cpu_write(printk_pending, 1); + this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP); } /** diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b1ccce819ce2..8781cec7c3e6 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1284,7 +1284,7 @@ static int select_fallback_rq(int cpu, struct task_struct *p) * leave kernel. */ if (p->mm && printk_ratelimit()) { - printk(KERN_INFO "process %d (%s) no longer affine to cpu%d\n", + printk_sched("process %d (%s) no longer affine to cpu%d\n", task_pid_nr(p), p->comm, cpu); } diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 7f7e7cdcb472..b60dad720173 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -864,8 +864,14 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) * but accrue some time due to boosting. */ if (likely(rt_b->rt_runtime)) { + static bool once = false; + rt_rq->rt_throttled = 1; - printk_once(KERN_WARNING "sched: RT throttling activated\n"); + + if (!once) { + once = true; + printk_sched("sched: RT throttling activated\n"); + } } else { /* * In case we did anyway, make it go away, -- cgit v1.2.3