Merge branch 'timers/core-v9' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic/linux-dynticks into timers/nohz

Pull nohz enhancements from Frederic Weisbecker: "Currently in nohz full configs, the tick dependency is checked asynchronously by nohz code from interrupt and context switch for each concerned subsystem with a set of function provided by these. Such functions are made of many conditions and details that can be heavyweight as they are called on fastpath: sched_can_stop_tick(), posix_cpu_timer_can_stop_tick(), perf_event_can_stop_tick()... Thomas suggested a few months ago to make that tick dependency check synchronous. Instead of checking subsystems details from each interrupt to guess if the tick can be stopped, every subsystem that may have a tick dependency should set itself a flag specifying the state of that dependency. This way we can verify if we can stop the tick with a single lightweight mask check on fast path. This conversion from a pull to a push model to implement tick dependency is the core feature of this patchset that is split into: * Nohz wide kick simplification * Improve nohz tracing * Introduce tick dependency mask * Migrate scheduler, posix timers, perf events and sched clock tick dependencies to the tick dependency mask." Signed-off-by: Ingo Molnar <mingo@kernel.org>
author: Ingo Molnar <mingo@kernel.org> 2016-03-08 15:17:54 +0300
committer: Ingo Molnar <mingo@kernel.org> 2016-03-08 15:17:54 +0300
commit: 1f25184656a00a59e3a953189070d42a749f6aee (patch)
tree: c69dbf4f09a6aaaa54f8c962e3a029eb3715d0c9 /kernel
parent: e2857b8f11a289ed2b61d18d0665e05c1053c446 (diff)
parent: 4f49b90abb4aca6fe677c95fc352fd0674d489bd (diff)
download: linux-1f25184656a00a59e3a953189070d42a749f6aee.tar.xz
8 files changed, 271 insertions, 140 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 614614821f00..effe8d652c1d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3112,17 +3112,6 @@ done:
 	return rotate;
 }
 
-#ifdef CONFIG_NO_HZ_FULL
-bool perf_event_can_stop_tick(void)
-{
-	if (atomic_read(&nr_freq_events) ||
-	    __this_cpu_read(perf_throttled_count))
-		return false;
-	else
-		return true;
-}
-#endif
-
 void perf_event_task_tick(void)
 {
 	struct list_head *head = this_cpu_ptr(&active_ctx_list);
@@ -3133,6 +3122,7 @@ void perf_event_task_tick(void)
 
 	__this_cpu_inc(perf_throttled_seq);
 	throttled = __this_cpu_xchg(perf_throttled_count, 0);
+	tick_dep_clear_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);
 
 	list_for_each_entry_safe(ctx, tmp, head, active_ctx_list)
 		perf_adjust_freq_unthr_context(ctx, throttled);
@@ -3564,6 +3554,28 @@ static void unaccount_event_cpu(struct perf_event *event, int cpu)
 		atomic_dec(&per_cpu(perf_cgroup_events, cpu));
 }
 
+#ifdef CONFIG_NO_HZ_FULL
+static DEFINE_SPINLOCK(nr_freq_lock);
+#endif
+
+static void unaccount_freq_event_nohz(void)
+{
+#ifdef CONFIG_NO_HZ_FULL
+	spin_lock(&nr_freq_lock);
+	if (atomic_dec_and_test(&nr_freq_events))
+		tick_nohz_dep_clear(TICK_DEP_BIT_PERF_EVENTS);
+	spin_unlock(&nr_freq_lock);
+#endif
+}
+
+static void unaccount_freq_event(void)
+{
+	if (tick_nohz_full_enabled())
+		unaccount_freq_event_nohz();
+	else
+		atomic_dec(&nr_freq_events);
+}
+
 static void unaccount_event(struct perf_event *event)
 {
 	bool dec = false;
@@ -3580,7 +3592,7 @@ static void unaccount_event(struct perf_event *event)
 	if (event->attr.task)
 		atomic_dec(&nr_task_events);
 	if (event->attr.freq)
-		atomic_dec(&nr_freq_events);
+		unaccount_freq_event();
 	if (event->attr.context_switch) {
 		dec = true;
 		atomic_dec(&nr_switch_events);
@@ -6424,9 +6436,9 @@ static int __perf_event_overflow(struct perf_event *event,
 		if (unlikely(throttle
 			     && hwc->interrupts >= max_samples_per_tick)) {
 			__this_cpu_inc(perf_throttled_count);
+			tick_dep_set_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);
 			hwc->interrupts = MAX_INTERRUPTS;
 			perf_log_throttle(event, 0);
-			tick_nohz_full_kick();
 			ret = 1;
 		}
 	}
@@ -7816,6 +7828,27 @@ static void account_event_cpu(struct perf_event *event, int cpu)
 		atomic_inc(&per_cpu(perf_cgroup_events, cpu));
 }
 
+/* Freq events need the tick to stay alive (see perf_event_task_tick). */
+static void account_freq_event_nohz(void)
+{
+#ifdef CONFIG_NO_HZ_FULL
+	/* Lock so we don't race with concurrent unaccount */
+	spin_lock(&nr_freq_lock);
+	if (atomic_inc_return(&nr_freq_events) == 1)
+		tick_nohz_dep_set(TICK_DEP_BIT_PERF_EVENTS);
+	spin_unlock(&nr_freq_lock);
+#endif
+}
+
+static void account_freq_event(void)
+{
+	if (tick_nohz_full_enabled())
+		account_freq_event_nohz();
+	else
+		atomic_inc(&nr_freq_events);
+}
+
+
 static void account_event(struct perf_event *event)
 {
 	bool inc = false;
@@ -7831,10 +7864,8 @@ static void account_event(struct perf_event *event)
 		atomic_inc(&nr_comm_events);
 	if (event->attr.task)
 		atomic_inc(&nr_task_events);
-	if (event->attr.freq) {
-		if (atomic_inc_return(&nr_freq_events) == 1)
-			tick_nohz_full_kick_all();
-	}
+	if (event->attr.freq)
+		account_freq_event();
 	if (event->attr.context_switch) {
 		atomic_inc(&nr_switch_events);
 		inc = true;
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index bc54e84675da..fedb967a9841 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -61,6 +61,7 @@
 #include <linux/static_key.h>
 #include <linux/workqueue.h>
 #include <linux/compiler.h>
+#include <linux/tick.h>
 
 /*
  * Scheduler clock - returns current time in nanosec units.
@@ -89,6 +90,8 @@ static void __set_sched_clock_stable(void)
 {
 	if (!sched_clock_stable())
 		static_key_slow_inc(&__sched_clock_stable);
+
+	tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE);
 }
 
 void set_sched_clock_stable(void)
@@ -108,6 +111,8 @@ static void __clear_sched_clock_stable(struct work_struct *work)
 	/* XXX worry about clock continuity */
 	if (sched_clock_stable())
 		static_key_slow_dec(&__sched_clock_stable);
+
+	tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE);
 }
 
 static DECLARE_WORK(sched_clock_work, __clear_sched_clock_stable);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9503d590e5ef..1fad82364ffe 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -453,20 +453,6 @@ static inline void init_hrtick(void)
 }
 #endif	/* CONFIG_SCHED_HRTICK */
 
-/*
- * cmpxchg based fetch_or, macro so it works for different integer types
- */
-#define fetch_or(ptr, val)						\
-({	typeof(*(ptr)) __old, __val = *(ptr);				\
- 	for (;;) {							\
- 		__old = cmpxchg((ptr), __val, __val | (val));		\
- 		if (__old == __val)					\
- 			break;						\
- 		__val = __old;						\
- 	}								\
- 	__old;								\
-})
-
 #if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
 /*
  * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG,
@@ -715,31 +701,36 @@ static inline bool got_nohz_idle_kick(void)
 #endif /* CONFIG_NO_HZ_COMMON */
 
 #ifdef CONFIG_NO_HZ_FULL
-bool sched_can_stop_tick(void)
+bool sched_can_stop_tick(struct rq *rq)
 {
+	int fifo_nr_running;
+
+	/* Deadline tasks, even if single, need the tick */
+	if (rq->dl.dl_nr_running)
+		return false;
+
 	/*
-	 * FIFO realtime policy runs the highest priority task. Other runnable
-	 * tasks are of a lower priority. The scheduler tick does nothing.
+	 * FIFO realtime policy runs the highest priority task (after DEADLINE).
+	 * Other runnable tasks are of a lower priority. The scheduler tick
+	 * isn't needed.
 	 */
-	if (current->policy == SCHED_FIFO)
+	fifo_nr_running = rq->rt.rt_nr_running - rq->rt.rr_nr_running;
+	if (fifo_nr_running)
 		return true;
 
 	/*
 	 * Round-robin realtime tasks time slice with other tasks at the same
-	 * realtime priority. Is this task the only one at this priority?
+	 * realtime priority.
 	 */
-	if (current->policy == SCHED_RR) {
-		struct sched_rt_entity *rt_se = &current->rt;
-
-		return list_is_singular(&rt_se->run_list);
+	if (rq->rt.rr_nr_running) {
+		if (rq->rt.rr_nr_running == 1)
+			return true;
+		else
+			return false;
 	}
 
-	/*
-	 * More than one running task need preemption.
-	 * nr_running update is assumed to be visible
-	 * after IPI is sent from wakers.
-	 */
-	if (this_rq()->nr_running > 1)
+	/* Normal multitasking need periodic preemption checks */
+	if (rq->cfs.nr_running > 1)
 		return false;
 
 	return true;
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 8ec86abe0ea1..3f1fcffbb18f 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1142,12 +1142,27 @@ unsigned int rt_se_nr_running(struct sched_rt_entity *rt_se)
 }
 
 static inline
+unsigned int rt_se_rr_nr_running(struct sched_rt_entity *rt_se)
+{
+	struct rt_rq *group_rq = group_rt_rq(rt_se);
+	struct task_struct *tsk;
+
+	if (group_rq)
+		return group_rq->rr_nr_running;
+
+	tsk = rt_task_of(rt_se);
+
+	return (tsk->policy == SCHED_RR) ? 1 : 0;
+}
+
+static inline
 void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 {
 	int prio = rt_se_prio(rt_se);
 
 	WARN_ON(!rt_prio(prio));
 	rt_rq->rt_nr_running += rt_se_nr_running(rt_se);
+	rt_rq->rr_nr_running += rt_se_rr_nr_running(rt_se);
 
 	inc_rt_prio(rt_rq, prio);
 	inc_rt_migration(rt_se, rt_rq);
@@ -1160,6 +1175,7 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 	WARN_ON(!rt_prio(rt_se_prio(rt_se)));
 	WARN_ON(!rt_rq->rt_nr_running);
 	rt_rq->rt_nr_running -= rt_se_nr_running(rt_se);
+	rt_rq->rr_nr_running -= rt_se_rr_nr_running(rt_se);
 
 	dec_rt_prio(rt_rq, rt_se_prio(rt_se));
 	dec_rt_migration(rt_se, rt_rq);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 10f16374df7f..4f0bca770108 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -450,6 +450,7 @@ static inline int rt_bandwidth_enabled(void)
 struct rt_rq {
 	struct rt_prio_array active;
 	unsigned int rt_nr_running;
+	unsigned int rr_nr_running;
 #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
 	struct {
 		int curr; /* highest queued rt task prio */
@@ -1278,6 +1279,35 @@ unsigned long to_ratio(u64 period, u64 runtime);
 
 extern void init_entity_runnable_average(struct sched_entity *se);
 
+#ifdef CONFIG_NO_HZ_FULL
+extern bool sched_can_stop_tick(struct rq *rq);
+
+/*
+ * Tick may be needed by tasks in the runqueue depending on their policy and
+ * requirements. If tick is needed, lets send the target an IPI to kick it out of
+ * nohz mode if necessary.
+ */
+static inline void sched_update_tick_dependency(struct rq *rq)
+{
+	int cpu;
+
+	if (!tick_nohz_full_enabled())
+		return;
+
+	cpu = cpu_of(rq);
+
+	if (!tick_nohz_full_cpu(cpu))
+		return;
+
+	if (sched_can_stop_tick(rq))
+		tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED);
+	else
+		tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
+}
+#else
+static inline void sched_update_tick_dependency(struct rq *rq) { }
+#endif
+
 static inline void add_nr_running(struct rq *rq, unsigned count)
 {
 	unsigned prev_nr = rq->nr_running;
@@ -1289,26 +1319,16 @@ static inline void add_nr_running(struct rq *rq, unsigned count)
 		if (!rq->rd->overload)
 			rq->rd->overload = true;
 #endif
-
-#ifdef CONFIG_NO_HZ_FULL
-		if (tick_nohz_full_cpu(rq->cpu)) {
-			/*
-			 * Tick is needed if more than one task runs on a CPU.
-			 * Send the target an IPI to kick it out of nohz mode.
-			 *
-			 * We assume that IPI implies full memory barrier and the
-			 * new value of rq->nr_running is visible on reception
-			 * from the target.
-			 */
-			tick_nohz_full_kick_cpu(rq->cpu);
-		}
-#endif
 	}
+
+	sched_update_tick_dependency(rq);
 }
 
 static inline void sub_nr_running(struct rq *rq, unsigned count)
 {
 	rq->nr_running -= count;
+	/* Check if we still need preemption */
+	sched_update_tick_dependency(rq);
 }
 
 static inline void rq_last_tick_reset(struct rq *rq)
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index f5e86d282d52..1cafba860b08 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -333,7 +333,6 @@ static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
 	return err;
 }
 
-
 /*
  * Validate the clockid_t for a new CPU-clock timer, and initialize the timer.
  * This is called from sys_timer_create() and do_cpu_nanosleep() with the
@@ -517,6 +516,10 @@ static void arm_timer(struct k_itimer *timer)
 				cputime_expires->sched_exp = exp;
 			break;
 		}
+		if (CPUCLOCK_PERTHREAD(timer->it_clock))
+			tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER);
+		else
+			tick_dep_set_signal(p->signal, TICK_DEP_BIT_POSIX_TIMER);
 	}
 }
 
@@ -582,39 +585,6 @@ static int cpu_timer_sample_group(const clockid_t which_clock,
 	return 0;
 }
 
-#ifdef CONFIG_NO_HZ_FULL
-static void nohz_kick_work_fn(struct work_struct *work)
-{
-	tick_nohz_full_kick_all();
-}
-
-static DECLARE_WORK(nohz_kick_work, nohz_kick_work_fn);
-
-/*
- * We need the IPIs to be sent from sane process context.
- * The posix cpu timers are always set with irqs disabled.
- */
-static void posix_cpu_timer_kick_nohz(void)
-{
-	if (context_tracking_is_enabled())
-		schedule_work(&nohz_kick_work);
-}
-
-bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk)
-{
-	if (!task_cputime_zero(&tsk->cputime_expires))
-		return false;
-
-	/* Check if cputimer is running. This is accessed without locking. */
-	if (READ_ONCE(tsk->signal->cputimer.running))
-		return false;
-
-	return true;
-}
-#else
-static inline void posix_cpu_timer_kick_nohz(void) { }
-#endif
-
 /*
  * Guts of sys_timer_settime for CPU timers.
  * This is called with the timer locked and interrupts disabled.
@@ -761,8 +731,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
 		sample_to_timespec(timer->it_clock,
 				   old_incr, &old->it_interval);
 	}
-	if (!ret)
-		posix_cpu_timer_kick_nohz();
+
 	return ret;
 }
 
@@ -911,6 +880,8 @@ static void check_thread_timers(struct task_struct *tsk,
 			__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
 		}
 	}
+	if (task_cputime_zero(tsk_expires))
+		tick_dep_clear_task(tsk, TICK_DEP_BIT_POSIX_TIMER);
 }
 
 static inline void stop_process_timers(struct signal_struct *sig)
@@ -919,6 +890,7 @@ static inline void stop_process_timers(struct signal_struct *sig)
 
 	/* Turn off cputimer->running. This is done without locking. */
 	WRITE_ONCE(cputimer->running, false);
+	tick_dep_clear_signal(sig, TICK_DEP_BIT_POSIX_TIMER);
 }
 
 static u32 onecputick;
@@ -1095,8 +1067,6 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
 	arm_timer(timer);
 	unlock_task_sighand(p, &flags);
 
-	/* Kick full dynticks CPUs in case they need to tick on the new timer */
-	posix_cpu_timer_kick_nohz();
 out:
 	timer->it_overrun_last = timer->it_overrun;
 	timer->it_overrun = -1;
@@ -1270,7 +1240,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
 		}
 
 		if (!*newval)
-			goto out;
+			return;
 		*newval += now;
 	}
 
@@ -1288,8 +1258,8 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
 			tsk->signal->cputime_expires.virt_exp = *newval;
 		break;
 	}
-out:
-	posix_cpu_timer_kick_nohz();
+
+	tick_dep_set_signal(tsk->signal, TICK_DEP_BIT_POSIX_TIMER);
 }
 
 static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 0b17424349eb..969e6704c3c9 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -22,7 +22,6 @@
 #include <linux/module.h>
 #include <linux/irq_work.h>
 #include <linux/posix-timers.h>
-#include <linux/perf_event.h>
 #include <linux/context_tracking.h>
 
 #include <asm/irq_regs.h>
@@ -158,54 +157,63 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
 cpumask_var_t tick_nohz_full_mask;
 cpumask_var_t housekeeping_mask;
 bool tick_nohz_full_running;
+static unsigned long tick_dep_mask;
 
-static bool can_stop_full_tick(void)
+static void trace_tick_dependency(unsigned long dep)
+{
+	if (dep & TICK_DEP_MASK_POSIX_TIMER) {
+		trace_tick_stop(0, TICK_DEP_MASK_POSIX_TIMER);
+		return;
+	}
+
+	if (dep & TICK_DEP_MASK_PERF_EVENTS) {
+		trace_tick_stop(0, TICK_DEP_MASK_PERF_EVENTS);
+		return;
+	}
+
+	if (dep & TICK_DEP_MASK_SCHED) {
+		trace_tick_stop(0, TICK_DEP_MASK_SCHED);
+		return;
+	}
+
+	if (dep & TICK_DEP_MASK_CLOCK_UNSTABLE)
+		trace_tick_stop(0, TICK_DEP_MASK_CLOCK_UNSTABLE);
+}
+
+static bool can_stop_full_tick(struct tick_sched *ts)
 {
 	WARN_ON_ONCE(!irqs_disabled());
 
-	if (!sched_can_stop_tick()) {
-		trace_tick_stop(0, "more than 1 task in runqueue\n");
+	if (tick_dep_mask) {
+		trace_tick_dependency(tick_dep_mask);
 		return false;
 	}
 
-	if (!posix_cpu_timers_can_stop_tick(current)) {
-		trace_tick_stop(0, "posix timers running\n");
+	if (ts->tick_dep_mask) {
+		trace_tick_dependency(ts->tick_dep_mask);
 		return false;
 	}
 
-	if (!perf_event_can_stop_tick()) {
-		trace_tick_stop(0, "perf events running\n");
+	if (current->tick_dep_mask) {
+		trace_tick_dependency(current->tick_dep_mask);
 		return false;
 	}
 
-	/* sched_clock_tick() needs us? */
-#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
-	/*
-	 * TODO: kick full dynticks CPUs when
-	 * sched_clock_stable is set.
-	 */
-	if (!sched_clock_stable()) {
-		trace_tick_stop(0, "unstable sched clock\n");
-		/*
-		 * Don't allow the user to think they can get
-		 * full NO_HZ with this machine.
-		 */
-		WARN_ONCE(tick_nohz_full_running,
-			  "NO_HZ FULL will not work with unstable sched clock");
+	if (current->signal->tick_dep_mask) {
+		trace_tick_dependency(current->signal->tick_dep_mask);
 		return false;
 	}
-#endif
 
 	return true;
 }
 
-static void nohz_full_kick_work_func(struct irq_work *work)
+static void nohz_full_kick_func(struct irq_work *work)
 {
 	/* Empty, the tick restart happens on tick_nohz_irq_exit() */
 }
 
 static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
-	.func = nohz_full_kick_work_func,
+	.func = nohz_full_kick_func,
 };
 
 /*
@@ -214,7 +222,7 @@ static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
  * This kick, unlike tick_nohz_full_kick_cpu() and tick_nohz_full_kick_all(),
  * is NMI safe.
  */
-void tick_nohz_full_kick(void)
+static void tick_nohz_full_kick(void)
 {
 	if (!tick_nohz_full_cpu(smp_processor_id()))
 		return;
@@ -234,27 +242,112 @@ void tick_nohz_full_kick_cpu(int cpu)
 	irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu);
 }
 
-static void nohz_full_kick_ipi(void *info)
-{
-	/* Empty, the tick restart happens on tick_nohz_irq_exit() */
-}
-
 /*
  * Kick all full dynticks CPUs in order to force these to re-evaluate
  * their dependency on the tick and restart it if necessary.
  */
-void tick_nohz_full_kick_all(void)
+static void tick_nohz_full_kick_all(void)
 {
+	int cpu;
+
 	if (!tick_nohz_full_running)
 		return;
 
 	preempt_disable();
-	smp_call_function_many(tick_nohz_full_mask,
-			       nohz_full_kick_ipi, NULL, false);
-	tick_nohz_full_kick();
+	for_each_cpu_and(cpu, tick_nohz_full_mask, cpu_online_mask)
+		tick_nohz_full_kick_cpu(cpu);
 	preempt_enable();
 }
 
+static void tick_nohz_dep_set_all(unsigned long *dep,
+				  enum tick_dep_bits bit)
+{
+	unsigned long prev;
+
+	prev = fetch_or(dep, BIT_MASK(bit));
+	if (!prev)
+		tick_nohz_full_kick_all();
+}
+
+/*
+ * Set a global tick dependency. Used by perf events that rely on freq and
+ * by unstable clock.
+ */
+void tick_nohz_dep_set(enum tick_dep_bits bit)
+{
+	tick_nohz_dep_set_all(&tick_dep_mask, bit);
+}
+
+void tick_nohz_dep_clear(enum tick_dep_bits bit)
+{
+	clear_bit(bit, &tick_dep_mask);
+}
+
+/*
+ * Set per-CPU tick dependency. Used by scheduler and perf events in order to
+ * manage events throttling.
+ */
+void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit)
+{
+	unsigned long prev;
+	struct tick_sched *ts;
+
+	ts = per_cpu_ptr(&tick_cpu_sched, cpu);
+
+	prev = fetch_or(&ts->tick_dep_mask, BIT_MASK(bit));
+	if (!prev) {
+		preempt_disable();
+		/* Perf needs local kick that is NMI safe */
+		if (cpu == smp_processor_id()) {
+			tick_nohz_full_kick();
+		} else {
+			/* Remote irq work not NMI-safe */
+			if (!WARN_ON_ONCE(in_nmi()))
+				tick_nohz_full_kick_cpu(cpu);
+		}
+		preempt_enable();
+	}
+}
+
+void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
+{
+	struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu);
+
+	clear_bit(bit, &ts->tick_dep_mask);
+}
+
+/*
+ * Set a per-task tick dependency. Posix CPU timers need this in order to elapse
+ * per task timers.
+ */
+void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit)
+{
+	/*
+	 * We could optimize this with just kicking the target running the task
+	 * if that noise matters for nohz full users.
+	 */
+	tick_nohz_dep_set_all(&tsk->tick_dep_mask, bit);
+}
+
+void tick_nohz_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit)
+{
+	clear_bit(bit, &tsk->tick_dep_mask);
+}
+
+/*
+ * Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse
+ * per process timers.
+ */
+void tick_nohz_dep_set_signal(struct signal_struct *sig, enum tick_dep_bits bit)
+{
+	tick_nohz_dep_set_all(&sig->tick_dep_mask, bit);
+}
+
+void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit)
+{
+	clear_bit(bit, &sig->tick_dep_mask);
+}
+
 /*
  * Re-evaluate the need for the tick as we switch the current task.
  * It might need the tick due to per task/process properties:
@@ -263,15 +356,19 @@ void tick_nohz_full_kick_all(void)
 void __tick_nohz_task_switch(void)
 {
 	unsigned long flags;
+	struct tick_sched *ts;
 
 	local_irq_save(flags);
 
 	if (!tick_nohz_full_cpu(smp_processor_id()))
 		goto out;
 
-	if (tick_nohz_tick_stopped() && !can_stop_full_tick())
-		tick_nohz_full_kick();
+	ts = this_cpu_ptr(&tick_cpu_sched);
 
+	if (ts->tick_stopped) {
+		if (current->tick_dep_mask || current->signal->tick_dep_mask)
+			tick_nohz_full_kick();
+	}
 out:
 	local_irq_restore(flags);
 }
@@ -689,7 +786,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 
 		ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
 		ts->tick_stopped = 1;
-		trace_tick_stop(1, " ");
+		trace_tick_stop(1, TICK_DEP_MASK_NONE);
 	}
 
 	/*
@@ -740,7 +837,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts)
 	if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE)
 		return;
 
-	if (can_stop_full_tick())
+	if (can_stop_full_tick(ts))
 		tick_nohz_stop_sched_tick(ts, ktime_get(), cpu);
 	else if (ts->tick_stopped)
 		tick_nohz_restart_sched_tick(ts, ktime_get(), 1);
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index a4a8d4e9baa1..eb4e32566a83 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -60,6 +60,7 @@ struct tick_sched {
 	u64				next_timer;
 	ktime_t				idle_expires;
 	int				do_timer_last;
+	unsigned long			tick_dep_mask;
 };
 
 extern struct tick_sched *tick_get_tick_sched(int cpu);
author	Ingo Molnar <mingo@kernel.org>	2016-03-08 15:17:54 +0300
committer	Ingo Molnar <mingo@kernel.org>	2016-03-08 15:17:54 +0300
commit	1f25184656a00a59e3a953189070d42a749f6aee (patch)
tree	c69dbf4f09a6aaaa54f8c962e3a029eb3715d0c9 /kernel
parent	e2857b8f11a289ed2b61d18d0665e05c1053c446 (diff)
parent	4f49b90abb4aca6fe677c95fc352fd0674d489bd (diff)
download	linux-1f25184656a00a59e3a953189070d42a749f6aee.tar.xz