diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-01-11 02:03:02 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-01-11 02:03:02 +0300 |
commit | 58624e4bc876198a5dc41be1d7dd39e7c944b9c6 (patch) | |
tree | a6c7b7377869c24ff7741203b73ce209b7c43276 | |
parent | 257a8be4e9a6fc3e821c337275256416750afa5b (diff) | |
parent | 3cb97a927fffe443e1e7e8eddbfebfdb062e86ed (diff) | |
download | linux-58624e4bc876198a5dc41be1d7dd39e7c944b9c6.tar.xz |
Merge tag 'cgroup-for-6.13-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup fixes from Tejun Heo:
"Cpuset fixes:
- Fix isolated CPUs leaking into sched domains
- Remove now unnecessary kernfs active break which can trigger a
warning
- Comment updates"
* tag 'cgroup-for-6.13-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
cgroup/cpuset: remove kernfs active break
cgroup/cpuset: Prevent leakage of isolated CPUs into sched domains
cgroup/cpuset: Remove stale text
-rw-r--r-- | kernel/cgroup/cpuset.c | 44 | ||||
-rwxr-xr-x | tools/testing/selftests/cgroup/test_cpuset_prs.sh | 33 |
2 files changed, 30 insertions, 47 deletions
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index f321ed515f3a..0f910c828973 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -197,10 +197,8 @@ static struct cpuset top_cpuset = { /* * There are two global locks guarding cpuset structures - cpuset_mutex and - * callback_lock. We also require taking task_lock() when dereferencing a - * task's cpuset pointer. See "The task_lock() exception", at the end of this - * comment. The cpuset code uses only cpuset_mutex. Other kernel subsystems - * can use cpuset_lock()/cpuset_unlock() to prevent change to cpuset + * callback_lock. The cpuset code uses only cpuset_mutex. Other kernel + * subsystems can use cpuset_lock()/cpuset_unlock() to prevent change to cpuset * structures. Note that cpuset_mutex needs to be a mutex as it is used in * paths that rely on priority inheritance (e.g. scheduler - on RT) for * correctness. @@ -229,9 +227,6 @@ static struct cpuset top_cpuset = { * The cpuset_common_seq_show() handlers only hold callback_lock across * small pieces of code, such as when reading out possibly multi-word * cpumasks and nodemasks. - * - * Accessing a task's cpuset should be done in accordance with the - * guidelines for accessing subsystem state in kernel/cgroup.c */ static DEFINE_MUTEX(cpuset_mutex); @@ -890,7 +885,15 @@ v2: */ if (cgrpv2) { for (i = 0; i < ndoms; i++) { - cpumask_copy(doms[i], csa[i]->effective_cpus); + /* + * The top cpuset may contain some boot time isolated + * CPUs that need to be excluded from the sched domain. + */ + if (csa[i] == &top_cpuset) + cpumask_and(doms[i], csa[i]->effective_cpus, + housekeeping_cpumask(HK_TYPE_DOMAIN)); + else + cpumask_copy(doms[i], csa[i]->effective_cpus); if (dattr) dattr[i] = SD_ATTR_INIT; } @@ -3121,29 +3124,6 @@ ssize_t cpuset_write_resmask(struct kernfs_open_file *of, int retval = -ENODEV; buf = strstrip(buf); - - /* - * CPU or memory hotunplug may leave @cs w/o any execution - * resources, in which case the hotplug code asynchronously updates - * configuration and transfers all tasks to the nearest ancestor - * which can execute. - * - * As writes to "cpus" or "mems" may restore @cs's execution - * resources, wait for the previously scheduled operations before - * proceeding, so that we don't end up keep removing tasks added - * after execution capability is restored. - * - * cpuset_handle_hotplug may call back into cgroup core asynchronously - * via cgroup_transfer_tasks() and waiting for it from a cgroupfs - * operation like this one can lead to a deadlock through kernfs - * active_ref protection. Let's break the protection. Losing the - * protection is okay as we check whether @cs is online after - * grabbing cpuset_mutex anyway. This only happens on the legacy - * hierarchies. - */ - css_get(&cs->css); - kernfs_break_active_protection(of->kn); - cpus_read_lock(); mutex_lock(&cpuset_mutex); if (!is_cpuset_online(cs)) @@ -3176,8 +3156,6 @@ ssize_t cpuset_write_resmask(struct kernfs_open_file *of, out_unlock: mutex_unlock(&cpuset_mutex); cpus_read_unlock(); - kernfs_unbreak_active_protection(of->kn); - css_put(&cs->css); flush_workqueue(cpuset_migrate_mm_wq); return retval ?: nbytes; } diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh index 03c1bdaed2c3..400a696a0d21 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh @@ -86,15 +86,15 @@ echo "" > test/cpuset.cpus # # If isolated CPUs have been reserved at boot time (as shown in -# cpuset.cpus.isolated), these isolated CPUs should be outside of CPUs 0-7 +# cpuset.cpus.isolated), these isolated CPUs should be outside of CPUs 0-8 # that will be used by this script for testing purpose. If not, some of -# the tests may fail incorrectly. These isolated CPUs will also be removed -# before being compared with the expected results. +# the tests may fail incorrectly. These pre-isolated CPUs should stay in +# an isolated state throughout the testing process for now. # BOOT_ISOLCPUS=$(cat $CGROUP2/cpuset.cpus.isolated) if [[ -n "$BOOT_ISOLCPUS" ]] then - [[ $(echo $BOOT_ISOLCPUS | sed -e "s/[,-].*//") -le 7 ]] && + [[ $(echo $BOOT_ISOLCPUS | sed -e "s/[,-].*//") -le 8 ]] && skip_test "Pre-isolated CPUs ($BOOT_ISOLCPUS) overlap CPUs to be tested" echo "Pre-isolated CPUs: $BOOT_ISOLCPUS" fi @@ -684,14 +684,18 @@ check_isolcpus() fi # + # Appending pre-isolated CPUs + # Even though CPU #8 isn't used for testing, it can't be pre-isolated + # to make appending those CPUs easier. + # + [[ -n "$BOOT_ISOLCPUS" ]] && { + EXPECT_VAL=${EXPECT_VAL:+${EXPECT_VAL},}${BOOT_ISOLCPUS} + EXPECT_VAL2=${EXPECT_VAL2:+${EXPECT_VAL2},}${BOOT_ISOLCPUS} + } + + # # Check cpuset.cpus.isolated cpumask # - if [[ -z "$BOOT_ISOLCPUS" ]] - then - ISOLCPUS=$(cat $ISCPUS) - else - ISOLCPUS=$(cat $ISCPUS | sed -e "s/,*$BOOT_ISOLCPUS//") - fi [[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && { # Take a 50ms pause and try again pause 0.05 @@ -731,8 +735,6 @@ check_isolcpus() fi done [[ "$ISOLCPUS" = *- ]] && ISOLCPUS=${ISOLCPUS}$LASTISOLCPU - [[ -n "BOOT_ISOLCPUS" ]] && - ISOLCPUS=$(echo $ISOLCPUS | sed -e "s/,*$BOOT_ISOLCPUS//") [[ "$EXPECT_VAL" = "$ISOLCPUS" ]] } @@ -836,8 +838,11 @@ run_state_test() # if available [[ -n "$ICPUS" ]] && { check_isolcpus $ICPUS - [[ $? -ne 0 ]] && test_fail $I "isolated CPU" \ - "Expect $ICPUS, get $ISOLCPUS instead" + [[ $? -ne 0 ]] && { + [[ -n "$BOOT_ISOLCPUS" ]] && ICPUS=${ICPUS},${BOOT_ISOLCPUS} + test_fail $I "isolated CPU" \ + "Expect $ICPUS, get $ISOLCPUS instead" + } } reset_cgroup_states # |