diff options
Diffstat (limited to 'include/linux/sched.h')
| -rw-r--r-- | include/linux/sched.h | 177 | 
1 files changed, 157 insertions, 20 deletions
| diff --git a/include/linux/sched.h b/include/linux/sched.h index 52c4847b05e2..2c036de6c1ee 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -40,7 +40,6 @@ struct sched_param {  #include <linux/pid.h>  #include <linux/percpu.h>  #include <linux/topology.h> -#include <linux/proportions.h>  #include <linux/seccomp.h>  #include <linux/rcupdate.h>  #include <linux/rculist.h> @@ -178,9 +177,11 @@ extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);  extern void calc_global_load(unsigned long ticks);  #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) -extern void update_cpu_load_nohz(int active); +extern void cpu_load_update_nohz_start(void); +extern void cpu_load_update_nohz_stop(void);  #else -static inline void update_cpu_load_nohz(int active) { } +static inline void cpu_load_update_nohz_start(void) { } +static inline void cpu_load_update_nohz_stop(void) { }  #endif  extern void dump_cpu_task(int cpu); @@ -372,6 +373,15 @@ extern void cpu_init (void);  extern void trap_init(void);  extern void update_process_times(int user);  extern void scheduler_tick(void); +extern int sched_cpu_starting(unsigned int cpu); +extern int sched_cpu_activate(unsigned int cpu); +extern int sched_cpu_deactivate(unsigned int cpu); + +#ifdef CONFIG_HOTPLUG_CPU +extern int sched_cpu_dying(unsigned int cpu); +#else +# define sched_cpu_dying	NULL +#endif  extern void sched_show_task(struct task_struct *p); @@ -511,6 +521,7 @@ static inline int get_dumpable(struct mm_struct *mm)  #define MMF_HAS_UPROBES		19	/* has uprobes */  #define MMF_RECALC_UPROBES	20	/* MMF_HAS_UPROBES can be wrong */ +#define MMF_OOM_REAPED		21	/* mm has been already reaped */  #define MMF_INIT_MASK		(MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) @@ -658,6 +669,7 @@ struct signal_struct {  	atomic_t		sigcnt;  	atomic_t		live;  	int			nr_threads; +	atomic_t oom_victims; /* # of TIF_MEDIE threads in this thread group */  	struct list_head	thread_head;  	wait_queue_head_t	wait_chldexit;	/* for wait4() */ @@ -935,9 +947,19 @@ enum cpu_idle_type {  };  /* + * Integer metrics need fixed point arithmetic, e.g., sched/fair + * has a few: load, load_avg, util_avg, freq, and capacity. + * + * We define a basic fixed point arithmetic range, and then formalize + * all these metrics based on that basic range. + */ +# define SCHED_FIXEDPOINT_SHIFT	10 +# define SCHED_FIXEDPOINT_SCALE	(1L << SCHED_FIXEDPOINT_SHIFT) + +/*   * Increase resolution of cpu_capacity calculations   */ -#define SCHED_CAPACITY_SHIFT	10 +#define SCHED_CAPACITY_SHIFT	SCHED_FIXEDPOINT_SHIFT  #define SCHED_CAPACITY_SCALE	(1L << SCHED_CAPACITY_SHIFT)  /* @@ -1199,18 +1221,56 @@ struct load_weight {  };  /* - * The load_avg/util_avg accumulates an infinite geometric series. - * 1) load_avg factors frequency scaling into the amount of time that a - * sched_entity is runnable on a rq into its weight. For cfs_rq, it is the - * aggregated such weights of all runnable and blocked sched_entities. - * 2) util_avg factors frequency and cpu scaling into the amount of time - * that a sched_entity is running on a CPU, in the range [0..SCHED_LOAD_SCALE]. - * For cfs_rq, it is the aggregated such times of all runnable and + * The load_avg/util_avg accumulates an infinite geometric series + * (see __update_load_avg() in kernel/sched/fair.c). + * + * [load_avg definition] + * + *   load_avg = runnable% * scale_load_down(load) + * + * where runnable% is the time ratio that a sched_entity is runnable. + * For cfs_rq, it is the aggregated load_avg of all runnable and   * blocked sched_entities. - * The 64 bit load_sum can: - * 1) for cfs_rq, afford 4353082796 (=2^64/47742/88761) entities with - * the highest weight (=88761) always runnable, we should not overflow - * 2) for entity, support any load.weight always runnable + * + * load_avg may also take frequency scaling into account: + * + *   load_avg = runnable% * scale_load_down(load) * freq% + * + * where freq% is the CPU frequency normalized to the highest frequency. + * + * [util_avg definition] + * + *   util_avg = running% * SCHED_CAPACITY_SCALE + * + * where running% is the time ratio that a sched_entity is running on + * a CPU. For cfs_rq, it is the aggregated util_avg of all runnable + * and blocked sched_entities. + * + * util_avg may also factor frequency scaling and CPU capacity scaling: + * + *   util_avg = running% * SCHED_CAPACITY_SCALE * freq% * capacity% + * + * where freq% is the same as above, and capacity% is the CPU capacity + * normalized to the greatest capacity (due to uarch differences, etc). + * + * N.B., the above ratios (runnable%, running%, freq%, and capacity%) + * themselves are in the range of [0, 1]. To do fixed point arithmetics, + * we therefore scale them to as large a range as necessary. This is for + * example reflected by util_avg's SCHED_CAPACITY_SCALE. + * + * [Overflow issue] + * + * The 64-bit load_sum can have 4353082796 (=2^64/47742/88761) entities + * with the highest load (=88761), always runnable on a single cfs_rq, + * and should not overflow as the number already hits PID_MAX_LIMIT. + * + * For all other cases (including 32-bit kernels), struct load_weight's + * weight will overflow first before we do, because: + * + *    Max(load_avg) <= Max(load.weight) + * + * Then it is the load_weight's responsibility to consider overflow + * issues.   */  struct sched_avg {  	u64 last_update_time, load_sum; @@ -1596,6 +1656,7 @@ struct task_struct {  	unsigned long sas_ss_sp;  	size_t sas_ss_size; +	unsigned sas_ss_flags;  	struct callback_head *task_works; @@ -1871,6 +1932,11 @@ extern int arch_task_struct_size __read_mostly;  /* Future-safe accessor for struct task_struct's cpus_allowed. */  #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) +static inline int tsk_nr_cpus_allowed(struct task_struct *p) +{ +	return p->nr_cpus_allowed; +} +  #define TNF_MIGRATED	0x01  #define TNF_NO_GROUP	0x02  #define TNF_SHARED	0x04 @@ -2184,6 +2250,7 @@ static inline void memalloc_noio_restore(unsigned int flags)  #define PFA_NO_NEW_PRIVS 0	/* May not gain new privileges. */  #define PFA_SPREAD_PAGE  1      /* Spread page cache over cpuset */  #define PFA_SPREAD_SLAB  2      /* Spread some slab caches over cpuset */ +#define PFA_LMK_WAITING  3      /* Lowmemorykiller is waiting */  #define TASK_PFA_TEST(name, func)					\ @@ -2207,6 +2274,9 @@ TASK_PFA_TEST(SPREAD_SLAB, spread_slab)  TASK_PFA_SET(SPREAD_SLAB, spread_slab)  TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab) +TASK_PFA_TEST(LMK_WAITING, lmk_waiting) +TASK_PFA_SET(LMK_WAITING, lmk_waiting) +  /*   * task->jobctl flags   */ @@ -2303,8 +2373,6 @@ extern unsigned long long notrace sched_clock(void);  /*   * See the comment in kernel/sched/clock.c   */ -extern u64 cpu_clock(int cpu); -extern u64 local_clock(void);  extern u64 running_clock(void);  extern u64 sched_clock_cpu(int cpu); @@ -2323,6 +2391,16 @@ static inline void sched_clock_idle_sleep_event(void)  static inline void sched_clock_idle_wakeup_event(u64 delta_ns)  {  } + +static inline u64 cpu_clock(int cpu) +{ +	return sched_clock(); +} + +static inline u64 local_clock(void) +{ +	return sched_clock(); +}  #else  /*   * Architectures can set this to 1 if they have specified @@ -2337,6 +2415,26 @@ extern void clear_sched_clock_stable(void);  extern void sched_clock_tick(void);  extern void sched_clock_idle_sleep_event(void);  extern void sched_clock_idle_wakeup_event(u64 delta_ns); + +/* + * As outlined in clock.c, provides a fast, high resolution, nanosecond + * time source that is monotonic per cpu argument and has bounded drift + * between cpus. + * + * ######################### BIG FAT WARNING ########################## + * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can # + * # go backwards !!                                                  # + * #################################################################### + */ +static inline u64 cpu_clock(int cpu) +{ +	return sched_clock_cpu(cpu); +} + +static inline u64 local_clock(void) +{ +	return sched_clock_cpu(raw_smp_processor_id()); +}  #endif  #ifdef CONFIG_IRQ_TIME_ACCOUNTING @@ -2575,6 +2673,18 @@ static inline int kill_cad_pid(int sig, int priv)   */  static inline int on_sig_stack(unsigned long sp)  { +	/* +	 * If the signal stack is SS_AUTODISARM then, by construction, we +	 * can't be on the signal stack unless user code deliberately set +	 * SS_AUTODISARM when we were already on it. +	 * +	 * This improves reliability: if user state gets corrupted such that +	 * the stack pointer points very close to the end of the signal stack, +	 * then this check will enable the signal to be handled anyway. +	 */ +	if (current->sas_ss_flags & SS_AUTODISARM) +		return 0; +  #ifdef CONFIG_STACK_GROWSUP  	return sp >= current->sas_ss_sp &&  		sp - current->sas_ss_sp < current->sas_ss_size; @@ -2592,6 +2702,13 @@ static inline int sas_ss_flags(unsigned long sp)  	return on_sig_stack(sp) ? SS_ONSTACK : 0;  } +static inline void sas_ss_reset(struct task_struct *p) +{ +	p->sas_ss_sp = 0; +	p->sas_ss_size = 0; +	p->sas_ss_flags = SS_DISABLE; +} +  static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig)  {  	if (unlikely((ksig->ka.sa.sa_flags & SA_ONSTACK)) && ! sas_ss_flags(sp)) @@ -2610,14 +2727,24 @@ extern struct mm_struct * mm_alloc(void);  /* mmdrop drops the mm and the page tables */  extern void __mmdrop(struct mm_struct *); -static inline void mmdrop(struct mm_struct * mm) +static inline void mmdrop(struct mm_struct *mm)  {  	if (unlikely(atomic_dec_and_test(&mm->mm_count)))  		__mmdrop(mm);  } +static inline bool mmget_not_zero(struct mm_struct *mm) +{ +	return atomic_inc_not_zero(&mm->mm_users); +} +  /* mmput gets rid of the mappings and all user-space */  extern void mmput(struct mm_struct *); +/* same as above but performs the slow path from the async kontext. Can + * be called from the atomic context as well + */ +extern void mmput_async(struct mm_struct *); +  /* Grab a reference to a task's mm, if it is not already going away */  extern struct mm_struct *get_task_mm(struct task_struct *task);  /* @@ -2646,7 +2773,14 @@ static inline int copy_thread_tls(  }  #endif  extern void flush_thread(void); -extern void exit_thread(void); + +#ifdef CONFIG_HAVE_EXIT_THREAD +extern void exit_thread(struct task_struct *tsk); +#else +static inline void exit_thread(struct task_struct *tsk) +{ +} +#endif  extern void exit_files(struct task_struct *);  extern void __cleanup_sighand(struct sighand_struct *); @@ -3240,7 +3374,10 @@ struct update_util_data {  		     u64 time, unsigned long util, unsigned long max);  }; -void cpufreq_set_update_util_data(int cpu, struct update_util_data *data); +void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data, +			void (*func)(struct update_util_data *data, u64 time, +				     unsigned long util, unsigned long max)); +void cpufreq_remove_update_util_hook(int cpu);  #endif /* CONFIG_CPU_FREQ */  #endif | 
