diff options
Diffstat (limited to 'include/linux')
-rw-r--r-- | include/linux/sched.h | 41 | ||||
-rw-r--r-- | include/linux/sched/prio.h | 40 | ||||
-rw-r--r-- | include/linux/sched/rt.h | 19 |
3 files changed, 74 insertions, 26 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index a781dec1cd0b..c49a2585ff7d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -3,6 +3,8 @@ #include <uapi/linux/sched.h> +#include <linux/sched/prio.h> + struct sched_param { int sched_priority; @@ -1077,6 +1079,7 @@ struct sched_entity { #endif #ifdef CONFIG_FAIR_GROUP_SCHED + int depth; struct sched_entity *parent; /* rq on which this entity is (to be) queued: */ struct cfs_rq *cfs_rq; @@ -1470,9 +1473,10 @@ struct task_struct { unsigned int numa_scan_period; unsigned int numa_scan_period_max; int numa_preferred_nid; - int numa_migrate_deferred; unsigned long numa_migrate_retry; u64 node_stamp; /* migration stamp */ + u64 last_task_numa_placement; + u64 last_sum_exec_runtime; struct callback_head numa_work; struct list_head numa_entry; @@ -1483,15 +1487,22 @@ struct task_struct { * Scheduling placement decisions are made based on the these counts. * The values remain static for the duration of a PTE scan */ - unsigned long *numa_faults; + unsigned long *numa_faults_memory; unsigned long total_numa_faults; /* * numa_faults_buffer records faults per node during the current - * scan window. When the scan completes, the counts in numa_faults - * decay and these values are copied. + * scan window. When the scan completes, the counts in + * numa_faults_memory decay and these values are copied. + */ + unsigned long *numa_faults_buffer_memory; + + /* + * Track the nodes the process was running on when a NUMA hinting + * fault was incurred. */ - unsigned long *numa_faults_buffer; + unsigned long *numa_faults_cpu; + unsigned long *numa_faults_buffer_cpu; /* * numa_faults_locality tracks if faults recorded during the last @@ -1596,8 +1607,8 @@ extern void task_numa_fault(int last_node, int node, int pages, int flags); extern pid_t task_numa_group_id(struct task_struct *p); extern void set_numabalancing_state(bool enabled); extern void task_numa_free(struct task_struct *p); - -extern unsigned int sysctl_numa_balancing_migrate_deferred; +extern bool should_numa_migrate_memory(struct task_struct *p, struct page *page, + int src_nid, int dst_cpu); #else static inline void task_numa_fault(int last_node, int node, int pages, int flags) @@ -1613,6 +1624,11 @@ static inline void set_numabalancing_state(bool enabled) static inline void task_numa_free(struct task_struct *p) { } +static inline bool should_numa_migrate_memory(struct task_struct *p, + struct page *page, int src_nid, int dst_cpu) +{ + return true; +} #endif static inline struct pid *task_pid(struct task_struct *task) @@ -2080,7 +2096,16 @@ static inline void sched_autogroup_exit(struct signal_struct *sig) { } extern bool yield_to(struct task_struct *p, bool preempt); extern void set_user_nice(struct task_struct *p, long nice); extern int task_prio(const struct task_struct *p); -extern int task_nice(const struct task_struct *p); +/** + * task_nice - return the nice value of a given task. + * @p: the task in question. + * + * Return: The nice value [ -20 ... 0 ... 19 ]. + */ +static inline int task_nice(const struct task_struct *p) +{ + return PRIO_TO_NICE((p)->static_prio); +} extern int can_nice(const struct task_struct *p, const int nice); extern int task_curr(const struct task_struct *p); extern int idle_cpu(int cpu); diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h new file mode 100644 index 000000000000..410ccb74c9e6 --- /dev/null +++ b/include/linux/sched/prio.h @@ -0,0 +1,40 @@ +#ifndef _SCHED_PRIO_H +#define _SCHED_PRIO_H + +/* + * Priority of a process goes from 0..MAX_PRIO-1, valid RT + * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH + * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority + * values are inverted: lower p->prio value means higher priority. + * + * The MAX_USER_RT_PRIO value allows the actual maximum + * RT priority to be separate from the value exported to + * user-space. This allows kernel threads to set their + * priority to a value higher than any user task. Note: + * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO. + */ + +#define MAX_USER_RT_PRIO 100 +#define MAX_RT_PRIO MAX_USER_RT_PRIO + +#define MAX_PRIO (MAX_RT_PRIO + 40) +#define DEFAULT_PRIO (MAX_RT_PRIO + 20) + +/* + * Convert user-nice values [ -20 ... 0 ... 19 ] + * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], + * and back. + */ +#define NICE_TO_PRIO(nice) (MAX_RT_PRIO + (nice) + 20) +#define PRIO_TO_NICE(prio) ((prio) - MAX_RT_PRIO - 20) + +/* + * 'User priority' is the nice value converted to something we + * can work with better when scaling various scheduler parameters, + * it's a [ 0 ... 39 ] range. + */ +#define USER_PRIO(p) ((p)-MAX_RT_PRIO) +#define TASK_USER_PRIO(p) USER_PRIO((p)->static_prio) +#define MAX_USER_PRIO (USER_PRIO(MAX_PRIO)) + +#endif /* _SCHED_PRIO_H */ diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h index 34e4ebea8fce..f7453d4c5613 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -1,24 +1,7 @@ #ifndef _SCHED_RT_H #define _SCHED_RT_H -/* - * Priority of a process goes from 0..MAX_PRIO-1, valid RT - * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH - * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority - * values are inverted: lower p->prio value means higher priority. - * - * The MAX_USER_RT_PRIO value allows the actual maximum - * RT priority to be separate from the value exported to - * user-space. This allows kernel threads to set their - * priority to a value higher than any user task. Note: - * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO. - */ - -#define MAX_USER_RT_PRIO 100 -#define MAX_RT_PRIO MAX_USER_RT_PRIO - -#define MAX_PRIO (MAX_RT_PRIO + 40) -#define DEFAULT_PRIO (MAX_RT_PRIO + 20) +#include <linux/sched/prio.h> static inline int rt_prio(int prio) { |