diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-09-30 19:55:41 +0300 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-09-30 19:55:41 +0300 |
| commit | 755fa5b4fb36627796af19932a432d343220ec63 (patch) | |
| tree | 9936420452423ffcdc6df5188a65849770c7dce6 /include/linux | |
| parent | 77fc3f6696554a10cf7557c89bb3f1892ec29559 (diff) | |
| parent | 8f0fdbd4a06bf795c68bc9839d9c349ab592654f (diff) | |
| download | linux-755fa5b4fb36627796af19932a432d343220ec63.tar.xz | |
Merge tag 'cgroup-for-6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup updates from Tejun Heo:
- Extensive cpuset code cleanup and refactoring work with no functional
changes: CPU mask computation logic refactoring, introducing new
helpers, removing redundant code paths, and improving error handling
for better maintainability.
- A few bug fixes to cpuset including fixes for partition creation
failures when isolcpus is in use, missing error returns, and null
pointer access prevention in free_tmpmasks().
- Core cgroup changes include replacing the global percpu_rwsem with
per-threadgroup rwsem when writing to cgroup.procs for better
scalability, workqueue conversions to use WQ_PERCPU and
system_percpu_wq to prepare for workqueue default switching from
percpu to unbound, and removal of unused code including the
post_attach callback.
- New cgroup.stat.local time accounting feature that tracks frozen time
duration.
- Misc changes including selftests updates (new freezer time tests and
backward compatibility fixes), documentation sync, string function
safety improvements, and 64-bit division fixes.
* tag 'cgroup-for-6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: (39 commits)
cpuset: remove is_prs_invalid helper
cpuset: remove impossible warning in update_parent_effective_cpumask
cpuset: remove redundant special case for null input in node mask update
cpuset: fix missing error return in update_cpumask
cpuset: Use new excpus for nocpu error check when enabling root partition
cpuset: fix failure to enable isolated partition when containing isolcpus
Documentation: cgroup-v2: Sync manual toctree
cpuset: use partition_cpus_change for setting exclusive cpus
cpuset: use parse_cpulist for setting cpus.exclusive
cpuset: introduce partition_cpus_change
cpuset: refactor cpus_allowed_validate_change
cpuset: refactor out validate_partition
cpuset: introduce cpus_excl_conflict and mems_excl_conflict helpers
cpuset: refactor CPU mask buffer parsing logic
cpuset: Refactor exclusive CPU mask computation logic
cpuset: change return type of is_partition_[in]valid to bool
cpuset: remove unused assignment to trialcs->partition_root_state
cpuset: move the root cpuset write check earlier
cgroup/cpuset: Remove redundant rcu_read_lock/unlock() in spin_lock
cgroup: Remove redundant rcu_read_lock/unlock() in spin_lock
...
Diffstat (limited to 'include/linux')
| -rw-r--r-- | include/linux/cgroup-defs.h | 43 | ||||
| -rw-r--r-- | include/linux/cgroup.h | 5 | ||||
| -rw-r--r-- | include/linux/sched/signal.h | 4 |
3 files changed, 50 insertions, 2 deletions
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 6b93a64115fe..93318fce31f3 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -91,6 +91,12 @@ enum { * cgroup_threadgroup_rwsem. This makes hot path operations such as * forks and exits into the slow path and more expensive. * + * Alleviate the contention between fork, exec, exit operations and + * writing to cgroup.procs by taking a per threadgroup rwsem instead of + * the global cgroup_threadgroup_rwsem. Fork and other operations + * from threads in different thread groups no longer contend with + * writing to cgroup.procs. + * * The static usage pattern of creating a cgroup, enabling controllers, * and then seeding it with CLONE_INTO_CGROUP doesn't require write * locking cgroup_threadgroup_rwsem and thus doesn't benefit from @@ -140,6 +146,17 @@ enum { __CFTYPE_ADDED = (1 << 18), }; +enum cgroup_attach_lock_mode { + /* Default */ + CGRP_ATTACH_LOCK_GLOBAL, + + /* When pid=0 && threadgroup=false, see comments in cgroup_procs_write_start */ + CGRP_ATTACH_LOCK_NONE, + + /* When favordynmods is on, see comments above CGRP_ROOT_FAVOR_DYNMODS */ + CGRP_ATTACH_LOCK_PER_THREADGROUP, +}; + /* * cgroup_file is the handle for a file instance created in a cgroup which * is used, for example, to generate file changed notifications. This can @@ -433,6 +450,23 @@ struct cgroup_freezer_state { * frozen, SIGSTOPped, and PTRACEd. */ int nr_frozen_tasks; + + /* Freeze time data consistency protection */ + seqcount_t freeze_seq; + + /* + * Most recent time the cgroup was requested to freeze. + * Accesses guarded by freeze_seq counter. Writes serialized + * by css_set_lock. + */ + u64 freeze_start_nsec; + + /* + * Total duration the cgroup has spent freezing. + * Accesses guarded by freeze_seq counter. Writes serialized + * by css_set_lock. + */ + u64 frozen_nsec; }; struct cgroup { @@ -746,7 +780,6 @@ struct cgroup_subsys { int (*can_attach)(struct cgroup_taskset *tset); void (*cancel_attach)(struct cgroup_taskset *tset); void (*attach)(struct cgroup_taskset *tset); - void (*post_attach)(void); int (*can_fork)(struct task_struct *task, struct css_set *cset); void (*cancel_fork)(struct task_struct *task, struct css_set *cset); @@ -822,6 +855,7 @@ struct cgroup_subsys { }; extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem; +extern bool cgroup_enable_per_threadgroup_rwsem; struct cgroup_of_peak { unsigned long value; @@ -833,11 +867,14 @@ struct cgroup_of_peak { * @tsk: target task * * Allows cgroup operations to synchronize against threadgroup changes - * using a percpu_rw_semaphore. + * using a global percpu_rw_semaphore and a per threadgroup rw_semaphore when + * favordynmods is on. See the comment above CGRP_ROOT_FAVOR_DYNMODS definition. */ static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) { percpu_down_read(&cgroup_threadgroup_rwsem); + if (cgroup_enable_per_threadgroup_rwsem) + down_read(&tsk->signal->cgroup_threadgroup_rwsem); } /** @@ -848,6 +885,8 @@ static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) */ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) { + if (cgroup_enable_per_threadgroup_rwsem) + up_read(&tsk->signal->cgroup_threadgroup_rwsem); percpu_up_read(&cgroup_threadgroup_rwsem); } diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index bab98357960d..7e292bdff2c1 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -355,6 +355,11 @@ static inline bool css_is_dying(struct cgroup_subsys_state *css) return css->flags & CSS_DYING; } +static inline bool css_is_online(struct cgroup_subsys_state *css) +{ + return css->flags & CSS_ONLINE; +} + static inline bool css_is_self(struct cgroup_subsys_state *css) { if (css == &css->cgroup->self) { diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 1ef1edbaaf79..7d6449982822 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -226,6 +226,10 @@ struct signal_struct { struct tty_audit_buf *tty_audit_buf; #endif +#ifdef CONFIG_CGROUPS + struct rw_semaphore cgroup_threadgroup_rwsem; +#endif + /* * Thread is the potential origin of an oom condition; kill first on * oom |
