diff options
| -rw-r--r-- | include/linux/cpu.h | 2 | ||||
| -rw-r--r-- | include/linux/sched.h | 3 | ||||
| -rw-r--r-- | kernel/fork.c | 2 | ||||
| -rw-r--r-- | kernel/sched/core.c | 1 | ||||
| -rw-r--r-- | kernel/sched/idle.c | 162 | 
5 files changed, 107 insertions, 63 deletions
| diff --git a/include/linux/cpu.h b/include/linux/cpu.h index b886dc17f2f3..ac0efae38072 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -245,6 +245,8 @@ void arch_cpu_idle_dead(void);  int cpu_report_state(int cpu);  int cpu_check_up_prepare(int cpu);  void cpu_set_state_online(int cpu); +void play_idle(unsigned long duration_ms); +  #ifdef CONFIG_HOTPLUG_CPU  bool cpu_wait_death(unsigned int cpu, int seconds);  bool cpu_report_death(void); diff --git a/include/linux/sched.h b/include/linux/sched.h index 348f51b0ec92..114c7fcb6af6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2254,6 +2254,7 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut,  /*   * Per process flags   */ +#define PF_IDLE		0x00000002	/* I am an IDLE thread */  #define PF_EXITING	0x00000004	/* getting shut down */  #define PF_EXITPIDONE	0x00000008	/* pi exit done on shut down */  #define PF_VCPU		0x00000010	/* I'm a virtual CPU */ @@ -2609,7 +2610,7 @@ extern struct task_struct *idle_task(int cpu);   */  static inline bool is_idle_task(const struct task_struct *p)  { -	return p->pid == 0; +	return !!(p->flags & PF_IDLE);  }  extern struct task_struct *curr_task(int cpu);  extern void ia64_set_curr_task(int cpu, struct task_struct *p); diff --git a/kernel/fork.c b/kernel/fork.c index 623259fc794d..5074b2f0827b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1537,7 +1537,7 @@ static __latent_entropy struct task_struct *copy_process(  		goto bad_fork_cleanup_count;  	delayacct_tsk_init(p);	/* Must remain after dup_task_struct() */ -	p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER); +	p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER | PF_IDLE);  	p->flags |= PF_FORKNOEXEC;  	INIT_LIST_HEAD(&p->children);  	INIT_LIST_HEAD(&p->sibling); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 94732d1ab00a..63b3a8a49884 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5285,6 +5285,7 @@ void init_idle(struct task_struct *idle, int cpu)  	__sched_fork(0, idle);  	idle->state = TASK_RUNNING;  	idle->se.exec_start = sched_clock(); +	idle->flags |= PF_IDLE;  	kasan_unpoison_task_stack(idle); diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 513e4dfeeae7..6a4bae0a649d 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -205,76 +205,65 @@ exit_idle:   *   * Called with polling cleared.   */ -static void cpu_idle_loop(void) +static void do_idle(void)  { -	int cpu = smp_processor_id(); - -	while (1) { -		/* -		 * If the arch has a polling bit, we maintain an invariant: -		 * -		 * Our polling bit is clear if we're not scheduled (i.e. if -		 * rq->curr != rq->idle).  This means that, if rq->idle has -		 * the polling bit set, then setting need_resched is -		 * guaranteed to cause the cpu to reschedule. -		 */ - -		__current_set_polling(); -		quiet_vmstat(); -		tick_nohz_idle_enter(); +	/* +	 * If the arch has a polling bit, we maintain an invariant: +	 * +	 * Our polling bit is clear if we're not scheduled (i.e. if rq->curr != +	 * rq->idle). This means that, if rq->idle has the polling bit set, +	 * then setting need_resched is guaranteed to cause the CPU to +	 * reschedule. +	 */ -		while (!need_resched()) { -			check_pgt_cache(); -			rmb(); +	__current_set_polling(); +	tick_nohz_idle_enter(); -			if (cpu_is_offline(cpu)) { -				cpuhp_report_idle_dead(); -				arch_cpu_idle_dead(); -			} +	while (!need_resched()) { +		check_pgt_cache(); +		rmb(); -			local_irq_disable(); -			arch_cpu_idle_enter(); - -			/* -			 * In poll mode we reenable interrupts and spin. -			 * -			 * Also if we detected in the wakeup from idle -			 * path that the tick broadcast device expired -			 * for us, we don't want to go deep idle as we -			 * know that the IPI is going to arrive right -			 * away -			 */ -			if (cpu_idle_force_poll || tick_check_broadcast_expired()) -				cpu_idle_poll(); -			else -				cpuidle_idle_call(); - -			arch_cpu_idle_exit(); +		if (cpu_is_offline(smp_processor_id())) { +			cpuhp_report_idle_dead(); +			arch_cpu_idle_dead();  		} -		/* -		 * Since we fell out of the loop above, we know -		 * TIF_NEED_RESCHED must be set, propagate it into -		 * PREEMPT_NEED_RESCHED. -		 * -		 * This is required because for polling idle loops we will -		 * not have had an IPI to fold the state for us. -		 */ -		preempt_set_need_resched(); -		tick_nohz_idle_exit(); -		__current_clr_polling(); +		local_irq_disable(); +		arch_cpu_idle_enter();  		/* -		 * We promise to call sched_ttwu_pending and reschedule -		 * if need_resched is set while polling is set.  That -		 * means that clearing polling needs to be visible -		 * before doing these things. +		 * In poll mode we reenable interrupts and spin. Also if we +		 * detected in the wakeup from idle path that the tick +		 * broadcast device expired for us, we don't want to go deep +		 * idle as we know that the IPI is going to arrive right away.  		 */ -		smp_mb__after_atomic(); - -		sched_ttwu_pending(); -		schedule_preempt_disabled(); +		if (cpu_idle_force_poll || tick_check_broadcast_expired()) +			cpu_idle_poll(); +		else +			cpuidle_idle_call(); +		arch_cpu_idle_exit();  	} + +	/* +	 * Since we fell out of the loop above, we know TIF_NEED_RESCHED must +	 * be set, propagate it into PREEMPT_NEED_RESCHED. +	 * +	 * This is required because for polling idle loops we will not have had +	 * an IPI to fold the state for us. +	 */ +	preempt_set_need_resched(); +	tick_nohz_idle_exit(); +	__current_clr_polling(); + +	/* +	 * We promise to call sched_ttwu_pending() and reschedule if +	 * need_resched() is set while polling is set. That means that clearing +	 * polling needs to be visible before doing these things. +	 */ +	smp_mb__after_atomic(); + +	sched_ttwu_pending(); +	schedule_preempt_disabled();  }  bool cpu_in_idle(unsigned long pc) @@ -283,6 +272,56 @@ bool cpu_in_idle(unsigned long pc)  		pc < (unsigned long)__cpuidle_text_end;  } +struct idle_timer { +	struct hrtimer timer; +	int done; +}; + +static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer) +{ +	struct idle_timer *it = container_of(timer, struct idle_timer, timer); + +	WRITE_ONCE(it->done, 1); +	set_tsk_need_resched(current); + +	return HRTIMER_NORESTART; +} + +void play_idle(unsigned long duration_ms) +{ +	struct idle_timer it; + +	/* +	 * Only FIFO tasks can disable the tick since they don't need the forced +	 * preemption. +	 */ +	WARN_ON_ONCE(current->policy != SCHED_FIFO); +	WARN_ON_ONCE(current->nr_cpus_allowed != 1); +	WARN_ON_ONCE(!(current->flags & PF_KTHREAD)); +	WARN_ON_ONCE(!(current->flags & PF_NO_SETAFFINITY)); +	WARN_ON_ONCE(!duration_ms); + +	rcu_sleep_check(); +	preempt_disable(); +	current->flags |= PF_IDLE; +	cpuidle_use_deepest_state(true); + +	it.done = 0; +	hrtimer_init_on_stack(&it.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); +	it.timer.function = idle_inject_timer_fn; +	hrtimer_start(&it.timer, ms_to_ktime(duration_ms), HRTIMER_MODE_REL_PINNED); + +	while (!READ_ONCE(it.done)) +		do_idle(); + +	cpuidle_use_deepest_state(false); +	current->flags &= ~PF_IDLE; + +	preempt_fold_need_resched(); +	preempt_enable(); +} +EXPORT_SYMBOL_GPL(play_idle); +  void cpu_startup_entry(enum cpuhp_state state)  {  	/* @@ -302,5 +341,6 @@ void cpu_startup_entry(enum cpuhp_state state)  #endif  	arch_cpu_idle_prepare();  	cpuhp_online_idle(state); -	cpu_idle_loop(); +	while (1) +		do_idle();  } | 
