diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/sched/core.c | 14 | ||||
-rw-r--r-- | kernel/sched/fair.c | 46 |
2 files changed, 55 insertions, 5 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ee28253c9ac0..8fccd8721bb8 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4396,6 +4396,17 @@ void set_numabalancing_state(bool enabled) } #ifdef CONFIG_PROC_SYSCTL +static void reset_memory_tiering(void) +{ + struct pglist_data *pgdat; + + for_each_online_pgdat(pgdat) { + pgdat->nbp_threshold = 0; + pgdat->nbp_th_nr_cand = node_page_state(pgdat, PGPROMOTE_CANDIDATE); + pgdat->nbp_th_start = jiffies_to_msecs(jiffies); + } +} + int sysctl_numa_balancing(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { @@ -4412,6 +4423,9 @@ int sysctl_numa_balancing(struct ctl_table *table, int write, if (err < 0) return err; if (write) { + if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING) && + (state & NUMA_BALANCING_MEMORY_TIERING)) + reset_memory_tiering(); sysctl_numa_balancing_mode = state; __set_numabalancing_state(state); } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 1d1dd88daaab..d642e9ff2829 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1527,6 +1527,35 @@ static bool numa_promotion_rate_limit(struct pglist_data *pgdat, return false; } +#define NUMA_MIGRATION_ADJUST_STEPS 16 + +static void numa_promotion_adjust_threshold(struct pglist_data *pgdat, + unsigned long rate_limit, + unsigned int ref_th) +{ + unsigned int now, start, th_period, unit_th, th; + unsigned long nr_cand, ref_cand, diff_cand; + + now = jiffies_to_msecs(jiffies); + th_period = sysctl_numa_balancing_scan_period_max; + start = pgdat->nbp_th_start; + if (now - start > th_period && + cmpxchg(&pgdat->nbp_th_start, start, now) == start) { + ref_cand = rate_limit * + sysctl_numa_balancing_scan_period_max / MSEC_PER_SEC; + nr_cand = node_page_state(pgdat, PGPROMOTE_CANDIDATE); + diff_cand = nr_cand - pgdat->nbp_th_nr_cand; + unit_th = ref_th * 2 / NUMA_MIGRATION_ADJUST_STEPS; + th = pgdat->nbp_threshold ? : ref_th; + if (diff_cand > ref_cand * 11 / 10) + th = max(th - unit_th, unit_th); + else if (diff_cand < ref_cand * 9 / 10) + th = min(th + unit_th, ref_th * 2); + pgdat->nbp_th_nr_cand = nr_cand; + pgdat->nbp_threshold = th; + } +} + bool should_numa_migrate_memory(struct task_struct *p, struct page * page, int src_nid, int dst_cpu) { @@ -1541,19 +1570,26 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page, if (sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING && !node_is_toptier(src_nid)) { struct pglist_data *pgdat; - unsigned long rate_limit, latency, th; + unsigned long rate_limit; + unsigned int latency, th, def_th; pgdat = NODE_DATA(dst_nid); - if (pgdat_free_space_enough(pgdat)) + if (pgdat_free_space_enough(pgdat)) { + /* workload changed, reset hot threshold */ + pgdat->nbp_threshold = 0; return true; + } + + def_th = sysctl_numa_balancing_hot_threshold; + rate_limit = sysctl_numa_balancing_promote_rate_limit << \ + (20 - PAGE_SHIFT); + numa_promotion_adjust_threshold(pgdat, rate_limit, def_th); - th = sysctl_numa_balancing_hot_threshold; + th = pgdat->nbp_threshold ? : def_th; latency = numa_hint_fault_latency(page); if (latency >= th) return false; - rate_limit = sysctl_numa_balancing_promote_rate_limit << \ - (20 - PAGE_SHIFT); return !numa_promotion_rate_limit(pgdat, rate_limit, thp_nr_pages(page)); } |