diff options
Diffstat (limited to 'kernel/cgroup/cpuset.c')
-rw-r--r-- | kernel/cgroup/cpuset.c | 63 |
1 files changed, 46 insertions, 17 deletions
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index a29c0b13706b..636f1c682ac0 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -1205,12 +1205,13 @@ void rebuild_sched_domains(void) /** * update_tasks_cpumask - Update the cpumasks of tasks in the cpuset. * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed + * @new_cpus: the temp variable for the new effective_cpus mask * * Iterate through each task of @cs updating its cpus_allowed to the * effective cpuset's. As this function is called with cpuset_rwsem held, * cpuset membership stays stable. */ -static void update_tasks_cpumask(struct cpuset *cs) +static void update_tasks_cpumask(struct cpuset *cs, struct cpumask *new_cpus) { struct css_task_iter it; struct task_struct *task; @@ -1224,7 +1225,10 @@ static void update_tasks_cpumask(struct cpuset *cs) if (top_cs && (task->flags & PF_KTHREAD) && kthread_is_per_cpu(task)) continue; - set_cpus_allowed_ptr(task, cs->effective_cpus); + + cpumask_and(new_cpus, cs->effective_cpus, + task_cpu_possible_mask(task)); + set_cpus_allowed_ptr(task, new_cpus); } css_task_iter_end(&it); } @@ -1267,7 +1271,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, int turning_on); /** * update_parent_subparts_cpumask - update subparts_cpus mask of parent cpuset - * @cpuset: The cpuset that requests change in partition root state + * @cs: The cpuset that requests change in partition root state * @cmd: Partition root state change command * @newmask: Optional new cpumask for partcmd_update * @tmp: Temporary addmask and delmask @@ -1346,7 +1350,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd, * A parent can be left with no CPU as long as there is no * task directly associated with the parent partition. */ - if (!cpumask_intersects(cs->cpus_allowed, parent->effective_cpus) && + if (cpumask_subset(parent->effective_cpus, cs->cpus_allowed) && partition_is_populated(parent, cs)) return PERR_NOCPUS; @@ -1509,7 +1513,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd, spin_unlock_irq(&callback_lock); if (adding || deleting) - update_tasks_cpumask(parent); + update_tasks_cpumask(parent, tmp->new_cpus); /* * Set or clear CS_SCHED_LOAD_BALANCE when partcmd_update, if necessary. @@ -1661,7 +1665,7 @@ update_parent_subparts: WARN_ON(!is_in_v2_mode() && !cpumask_equal(cp->cpus_allowed, cp->effective_cpus)); - update_tasks_cpumask(cp); + update_tasks_cpumask(cp, tmp->new_cpus); /* * On legacy hierarchy, if the effective cpumask of any non- @@ -2309,7 +2313,7 @@ static int update_prstate(struct cpuset *cs, int new_prs) } } - update_tasks_cpumask(parent); + update_tasks_cpumask(parent, tmpmask.new_cpus); if (parent->child_ecpus_count) update_sibling_cpumasks(parent, cs, &tmpmask); @@ -2324,6 +2328,7 @@ out: new_prs = -new_prs; spin_lock_irq(&callback_lock); cs->partition_root_state = new_prs; + WRITE_ONCE(cs->prs_err, err); spin_unlock_irq(&callback_lock); /* * Update child cpusets, if present. @@ -3281,8 +3286,6 @@ struct cgroup_subsys cpuset_cgrp_subsys = { int __init cpuset_init(void) { - BUG_ON(percpu_init_rwsem(&cpuset_rwsem)); - BUG_ON(!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL)); BUG_ON(!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL)); BUG_ON(!zalloc_cpumask_var(&top_cpuset.subparts_cpus, GFP_KERNEL)); @@ -3347,7 +3350,7 @@ hotplug_update_tasks_legacy(struct cpuset *cs, * as the tasks will be migrated to an ancestor. */ if (cpus_updated && !cpumask_empty(cs->cpus_allowed)) - update_tasks_cpumask(cs); + update_tasks_cpumask(cs, new_cpus); if (mems_updated && !nodes_empty(cs->mems_allowed)) update_tasks_nodemask(cs); @@ -3384,7 +3387,7 @@ hotplug_update_tasks(struct cpuset *cs, spin_unlock_irq(&callback_lock); if (cpus_updated) - update_tasks_cpumask(cs); + update_tasks_cpumask(cs, new_cpus); if (mems_updated) update_tasks_nodemask(cs); } @@ -3691,15 +3694,38 @@ void __init cpuset_init_smp(void) * Description: Returns the cpumask_var_t cpus_allowed of the cpuset * attached to the specified @tsk. Guaranteed to return some non-empty * subset of cpu_online_mask, even if this means going outside the - * tasks cpuset. + * tasks cpuset, except when the task is in the top cpuset. **/ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) { unsigned long flags; + struct cpuset *cs; spin_lock_irqsave(&callback_lock, flags); - guarantee_online_cpus(tsk, pmask); + rcu_read_lock(); + + cs = task_cs(tsk); + if (cs != &top_cpuset) + guarantee_online_cpus(tsk, pmask); + /* + * Tasks in the top cpuset won't get update to their cpumasks + * when a hotplug online/offline event happens. So we include all + * offline cpus in the allowed cpu list. + */ + if ((cs == &top_cpuset) || cpumask_empty(pmask)) { + const struct cpumask *possible_mask = task_cpu_possible_mask(tsk); + + /* + * We first exclude cpus allocated to partitions. If there is no + * allowable online cpu left, we fall back to all possible cpus. + */ + cpumask_andnot(pmask, possible_mask, top_cpuset.subparts_cpus); + if (!cpumask_intersects(pmask, cpu_online_mask)) + cpumask_copy(pmask, possible_mask); + } + + rcu_read_unlock(); spin_unlock_irqrestore(&callback_lock, flags); } @@ -3879,8 +3905,7 @@ bool __cpuset_node_allowed(int node, gfp_t gfp_mask) } /** - * cpuset_mem_spread_node() - On which node to begin search for a file page - * cpuset_slab_spread_node() - On which node to begin search for a slab page + * cpuset_spread_node() - On which node to begin search for a page * * If a task is marked PF_SPREAD_PAGE or PF_SPREAD_SLAB (as for * tasks in a cpuset with is_spread_page or is_spread_slab set), @@ -3904,12 +3929,14 @@ bool __cpuset_node_allowed(int node, gfp_t gfp_mask) * is passed an offline node, it will fall back to the local node. * See kmem_cache_alloc_node(). */ - static int cpuset_spread_node(int *rotor) { return *rotor = next_node_in(*rotor, current->mems_allowed); } +/** + * cpuset_mem_spread_node() - On which node to begin search for a file page + */ int cpuset_mem_spread_node(void) { if (current->cpuset_mem_spread_rotor == NUMA_NO_NODE) @@ -3919,6 +3946,9 @@ int cpuset_mem_spread_node(void) return cpuset_spread_node(¤t->cpuset_mem_spread_rotor); } +/** + * cpuset_slab_spread_node() - On which node to begin search for a slab page + */ int cpuset_slab_spread_node(void) { if (current->cpuset_slab_spread_rotor == NUMA_NO_NODE) @@ -3927,7 +3957,6 @@ int cpuset_slab_spread_node(void) return cpuset_spread_node(¤t->cpuset_slab_spread_rotor); } - EXPORT_SYMBOL_GPL(cpuset_mem_spread_node); /** |