diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/sched/fair.c | 80 |
1 files changed, 77 insertions, 3 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 159b20296dd5..accbfbbfa6a5 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -8447,6 +8447,11 @@ enum group_type { */ group_misfit_task, /* + * Balance SMT group that's fully busy. Can benefit from migration + * a task on SMT with busy sibling to another CPU on idle core. + */ + group_smt_balance, + /* * SD_ASYM_PACKING only: One local CPU with higher capacity is available, * and the task should be migrated to it instead of running on the * current CPU. @@ -9154,6 +9159,7 @@ struct sg_lb_stats { unsigned int group_weight; enum group_type group_type; unsigned int group_asym_packing; /* Tasks should be moved to preferred CPU */ + unsigned int group_smt_balance; /* Task on busy SMT be moved */ unsigned long group_misfit_task_load; /* A CPU has a task too big for its capacity */ #ifdef CONFIG_NUMA_BALANCING unsigned int nr_numa_running; @@ -9427,6 +9433,9 @@ group_type group_classify(unsigned int imbalance_pct, if (sgs->group_asym_packing) return group_asym_packing; + if (sgs->group_smt_balance) + return group_smt_balance; + if (sgs->group_misfit_task_load) return group_misfit_task; @@ -9496,6 +9505,36 @@ sched_asym(struct lb_env *env, struct sd_lb_stats *sds, struct sg_lb_stats *sgs return sched_asym_prefer(env->dst_cpu, group->asym_prefer_cpu); } +/* One group has more than one SMT CPU while the other group does not */ +static inline bool smt_vs_nonsmt_groups(struct sched_group *sg1, + struct sched_group *sg2) +{ + if (!sg1 || !sg2) + return false; + + return (sg1->flags & SD_SHARE_CPUCAPACITY) != + (sg2->flags & SD_SHARE_CPUCAPACITY); +} + +static inline bool smt_balance(struct lb_env *env, struct sg_lb_stats *sgs, + struct sched_group *group) +{ + if (env->idle == CPU_NOT_IDLE) + return false; + + /* + * For SMT source group, it is better to move a task + * to a CPU that doesn't have multiple tasks sharing its CPU capacity. + * Note that if a group has a single SMT, SD_SHARE_CPUCAPACITY + * will not be on. + */ + if (group->flags & SD_SHARE_CPUCAPACITY && + sgs->sum_h_nr_running > 1) + return true; + + return false; +} + static inline bool sched_reduced_capacity(struct rq *rq, struct sched_domain *sd) { @@ -9588,6 +9627,10 @@ static inline void update_sg_lb_stats(struct lb_env *env, sgs->group_asym_packing = 1; } + /* Check for loaded SMT group to be balanced to dst CPU */ + if (!local_group && smt_balance(env, sgs, group)) + sgs->group_smt_balance = 1; + sgs->group_type = group_classify(env->sd->imbalance_pct, group, sgs); /* Computing avg_load makes sense only when group is overloaded */ @@ -9672,6 +9715,7 @@ static bool update_sd_pick_busiest(struct lb_env *env, return false; break; + case group_smt_balance: case group_fully_busy: /* * Select the fully busy group with highest avg_load. In @@ -9701,6 +9745,18 @@ static bool update_sd_pick_busiest(struct lb_env *env, case group_has_spare: /* + * Do not pick sg with SMT CPUs over sg with pure CPUs, + * as we do not want to pull task off SMT core with one task + * and make the core idle. + */ + if (smt_vs_nonsmt_groups(sds->busiest, sg)) { + if (sg->flags & SD_SHARE_CPUCAPACITY && sgs->sum_h_nr_running <= 1) + return false; + else + return true; + } + + /* * Select not overloaded group with lowest number of idle cpus * and highest number of running tasks. We could also compare * the spare capacity which is more stable but it can end up @@ -9896,6 +9952,7 @@ static bool update_pick_idlest(struct sched_group *idlest, case group_imbalanced: case group_asym_packing: + case group_smt_balance: /* Those types are not used in the slow wakeup path */ return false; @@ -10027,6 +10084,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) case group_imbalanced: case group_asym_packing: + case group_smt_balance: /* Those type are not used in the slow wakeup path */ return NULL; @@ -10281,6 +10339,13 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s return; } + if (busiest->group_type == group_smt_balance) { + /* Reduce number of tasks sharing CPU capacity */ + env->migration_type = migrate_task; + env->imbalance = 1; + return; + } + if (busiest->group_type == group_imbalanced) { /* * In the group_imb case we cannot rely on group-wide averages @@ -10536,16 +10601,23 @@ static struct sched_group *find_busiest_group(struct lb_env *env) goto force_balance; if (busiest->group_type != group_overloaded) { - if (env->idle == CPU_NOT_IDLE) + if (env->idle == CPU_NOT_IDLE) { /* * If the busiest group is not overloaded (and as a * result the local one too) but this CPU is already * busy, let another idle CPU try to pull task. */ goto out_balanced; + } + + if (busiest->group_type == group_smt_balance && + smt_vs_nonsmt_groups(sds.local, sds.busiest)) { + /* Let non SMT CPU pull from SMT CPU sharing with sibling */ + goto force_balance; + } if (busiest->group_weight > 1 && - local->idle_cpus <= (busiest->idle_cpus + 1)) + local->idle_cpus <= (busiest->idle_cpus + 1)) { /* * If the busiest group is not overloaded * and there is no imbalance between this and busiest @@ -10556,12 +10628,14 @@ static struct sched_group *find_busiest_group(struct lb_env *env) * there is more than 1 CPU per group. */ goto out_balanced; + } - if (busiest->sum_h_nr_running == 1) + if (busiest->sum_h_nr_running == 1) { /* * busiest doesn't have any tasks waiting to run */ goto out_balanced; + } } force_balance: |