summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-04-15 20:18:49 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2026-04-15 20:18:49 +0300
commitb71f0be2d23d876648758d57bc6761500e3b9c70 (patch)
tree323b07b4f1234d912b86b958730eb6cf5088b9f3
parent05cef13fa80de8cec481ae5a015e58bc6340ca2d (diff)
parent3348e1e83a0f8a5ca1095843bc3316aaef7aae34 (diff)
downloadlinux-b71f0be2d23d876648758d57bc6761500e3b9c70.tar.xz
Merge tag 'cgroup-for-7.1' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup updates from Tejun Heo: - cgroup_file_notify() locking converted from a global lock to per-cgroup_file spinlock with a lockless fast-path when no notification is needed - Misc changes including exposing cgroup helpers for sched_ext and minor fixes * tag 'cgroup-for-7.1' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: cgroup/rdma: fix swapped arguments in pr_warn() format string cgroup/dmem: remove region parameter from dmemcg_parse_limit cgroup: replace global cgroup_file_kn_lock with per-cgroup_file lock cgroup: add lockless fast-path checks to cgroup_file_notify() cgroup: reduce cgroup_file_kn_lock hold time in cgroup_file_notify() cgroup: Expose some cgroup helpers
-rw-r--r--include/linux/cgroup-defs.h1
-rw-r--r--include/linux/cgroup.h65
-rw-r--r--kernel/cgroup/cgroup-internal.h6
-rw-r--r--kernel/cgroup/cgroup.c108
-rw-r--r--kernel/cgroup/dmem.c5
-rw-r--r--kernel/cgroup/rdma.c2
6 files changed, 95 insertions, 92 deletions
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 7f87399938fa..f197ca104737 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -167,6 +167,7 @@ struct cgroup_file {
struct kernfs_node *kn;
unsigned long notified_at;
struct timer_list notify_timer;
+ spinlock_t lock;
};
/*
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index bc892e3b37ee..e52160e85af4 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -42,6 +42,14 @@ struct kernel_clone_args;
#ifdef CONFIG_CGROUPS
+/*
+ * To avoid confusing the compiler (and generating warnings) with code
+ * that attempts to access what would be a 0-element array (i.e. sized
+ * to a potentially empty array when CGROUP_SUBSYS_COUNT == 0), this
+ * constant expression can be added.
+ */
+#define CGROUP_HAS_SUBSYS_CONFIG (CGROUP_SUBSYS_COUNT > 0)
+
enum css_task_iter_flags {
CSS_TASK_ITER_PROCS = (1U << 0), /* walk only threadgroup leaders */
CSS_TASK_ITER_THREADED = (1U << 1), /* walk all threaded css_sets in the domain */
@@ -76,6 +84,7 @@ enum cgroup_lifetime_events {
extern struct file_system_type cgroup_fs_type;
extern struct cgroup_root cgrp_dfl_root;
extern struct css_set init_css_set;
+extern struct mutex cgroup_mutex;
extern spinlock_t css_set_lock;
extern struct blocking_notifier_head cgroup_lifetime_notifier;
@@ -103,6 +112,8 @@ extern struct blocking_notifier_head cgroup_lifetime_notifier;
#define cgroup_subsys_on_dfl(ss) \
static_branch_likely(&ss ## _on_dfl_key)
+bool cgroup_on_dfl(const struct cgroup *cgrp);
+
bool css_has_online_children(struct cgroup_subsys_state *css);
struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss);
struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgroup,
@@ -274,6 +285,32 @@ void css_task_iter_end(struct css_task_iter *it);
for ((pos) = css_next_descendant_post(NULL, (css)); (pos); \
(pos) = css_next_descendant_post((pos), (css)))
+/* iterate over child cgrps, lock should be held throughout iteration */
+#define cgroup_for_each_live_child(child, cgrp) \
+ list_for_each_entry((child), &(cgrp)->self.children, self.sibling) \
+ if (({ lockdep_assert_held(&cgroup_mutex); \
+ cgroup_is_dead(child); })) \
+ ; \
+ else
+
+/* walk live descendants in pre order */
+#define cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) \
+ css_for_each_descendant_pre((d_css), cgroup_css((cgrp), NULL)) \
+ if (({ lockdep_assert_held(&cgroup_mutex); \
+ (dsct) = (d_css)->cgroup; \
+ cgroup_is_dead(dsct); })) \
+ ; \
+ else
+
+/* walk live descendants in postorder */
+#define cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) \
+ css_for_each_descendant_post((d_css), cgroup_css((cgrp), NULL)) \
+ if (({ lockdep_assert_held(&cgroup_mutex); \
+ (dsct) = (d_css)->cgroup; \
+ cgroup_is_dead(dsct); })) \
+ ; \
+ else
+
/**
* cgroup_taskset_for_each - iterate cgroup_taskset
* @task: the loop cursor
@@ -337,6 +374,27 @@ static inline u64 cgroup_id(const struct cgroup *cgrp)
}
/**
+ * cgroup_css - obtain a cgroup's css for the specified subsystem
+ * @cgrp: the cgroup of interest
+ * @ss: the subsystem of interest (%NULL returns @cgrp->self)
+ *
+ * Return @cgrp's css (cgroup_subsys_state) associated with @ss. This
+ * function must be called either under cgroup_mutex or rcu_read_lock() and
+ * the caller is responsible for pinning the returned css if it wants to
+ * keep accessing it outside the said locks. This function may return
+ * %NULL if @cgrp doesn't have @subsys_id enabled.
+ */
+static inline struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp,
+ struct cgroup_subsys *ss)
+{
+ if (CGROUP_HAS_SUBSYS_CONFIG && ss)
+ return rcu_dereference_check(cgrp->subsys[ss->id],
+ lockdep_is_held(&cgroup_mutex));
+ else
+ return &cgrp->self;
+}
+
+/**
* css_is_dying - test whether the specified css is dying
* @css: target css
*
@@ -372,6 +430,11 @@ static inline bool css_is_self(struct cgroup_subsys_state *css)
return false;
}
+static inline bool cgroup_is_dead(const struct cgroup *cgrp)
+{
+ return !(cgrp->self.flags & CSS_ONLINE);
+}
+
static inline void cgroup_get(struct cgroup *cgrp)
{
css_get(&cgrp->self);
@@ -387,8 +450,6 @@ static inline void cgroup_put(struct cgroup *cgrp)
css_put(&cgrp->self);
}
-extern struct mutex cgroup_mutex;
-
static inline void cgroup_lock(void)
{
mutex_lock(&cgroup_mutex);
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
index 3bfe37693d68..58797123b752 100644
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -184,11 +184,6 @@ extern bool cgrp_dfl_visible;
for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT && \
(((ss) = cgroup_subsys[ssid]) || true); (ssid)++)
-static inline bool cgroup_is_dead(const struct cgroup *cgrp)
-{
- return !(cgrp->self.flags & CSS_ONLINE);
-}
-
static inline bool notify_on_release(const struct cgroup *cgrp)
{
return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
@@ -222,7 +217,6 @@ static inline void get_css_set(struct css_set *cset)
}
bool cgroup_ssid_enabled(int ssid);
-bool cgroup_on_dfl(const struct cgroup *cgrp);
struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root);
struct cgroup *task_cgroup_from_root(struct task_struct *task,
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 4ca3cb993da2..1f084ee71443 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -69,14 +69,6 @@
#define CGROUP_FILE_NOTIFY_MIN_INTV DIV_ROUND_UP(HZ, 100)
/*
- * To avoid confusing the compiler (and generating warnings) with code
- * that attempts to access what would be a 0-element array (i.e. sized
- * to a potentially empty array when CGROUP_SUBSYS_COUNT == 0), this
- * constant expression can be added.
- */
-#define CGROUP_HAS_SUBSYS_CONFIG (CGROUP_SUBSYS_COUNT > 0)
-
-/*
* cgroup_mutex is the master lock. Any modification to cgroup or its
* hierarchy must be performed while holding it.
*
@@ -107,12 +99,6 @@ static bool cgroup_debug __read_mostly;
*/
static DEFINE_SPINLOCK(cgroup_idr_lock);
-/*
- * Protects cgroup_file->kn for !self csses. It synchronizes notifications
- * against file removal/re-creation across css hiding.
- */
-static DEFINE_SPINLOCK(cgroup_file_kn_lock);
-
DEFINE_PERCPU_RWSEM(cgroup_threadgroup_rwsem);
#define cgroup_assert_mutex_or_rcu_locked() \
@@ -510,27 +496,6 @@ static u32 cgroup_ss_mask(struct cgroup *cgrp)
}
/**
- * cgroup_css - obtain a cgroup's css for the specified subsystem
- * @cgrp: the cgroup of interest
- * @ss: the subsystem of interest (%NULL returns @cgrp->self)
- *
- * Return @cgrp's css (cgroup_subsys_state) associated with @ss. This
- * function must be called either under cgroup_mutex or rcu_read_lock() and
- * the caller is responsible for pinning the returned css if it wants to
- * keep accessing it outside the said locks. This function may return
- * %NULL if @cgrp doesn't have @subsys_id enabled.
- */
-static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp,
- struct cgroup_subsys *ss)
-{
- if (CGROUP_HAS_SUBSYS_CONFIG && ss)
- return rcu_dereference_check(cgrp->subsys[ss->id],
- lockdep_is_held(&cgroup_mutex));
- else
- return &cgrp->self;
-}
-
-/**
* cgroup_e_css_by_mask - obtain a cgroup's effective css for the specified ss
* @cgrp: the cgroup of interest
* @ss: the subsystem of interest (%NULL returns @cgrp->self)
@@ -741,32 +706,6 @@ EXPORT_SYMBOL_GPL(of_css);
} \
} while (false)
-/* iterate over child cgrps, lock should be held throughout iteration */
-#define cgroup_for_each_live_child(child, cgrp) \
- list_for_each_entry((child), &(cgrp)->self.children, self.sibling) \
- if (({ lockdep_assert_held(&cgroup_mutex); \
- cgroup_is_dead(child); })) \
- ; \
- else
-
-/* walk live descendants in pre order */
-#define cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) \
- css_for_each_descendant_pre((d_css), cgroup_css((cgrp), NULL)) \
- if (({ lockdep_assert_held(&cgroup_mutex); \
- (dsct) = (d_css)->cgroup; \
- cgroup_is_dead(dsct); })) \
- ; \
- else
-
-/* walk live descendants in postorder */
-#define cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) \
- css_for_each_descendant_post((d_css), cgroup_css((cgrp), NULL)) \
- if (({ lockdep_assert_held(&cgroup_mutex); \
- (dsct) = (d_css)->cgroup; \
- cgroup_is_dead(dsct); })) \
- ; \
- else
-
/*
* The default css_set - used by init and its children prior to any
* hierarchies being mounted. It contains a pointer to the root state
@@ -1748,9 +1687,9 @@ static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
struct cgroup_subsys_state *css = cgroup_css(cgrp, cft->ss);
struct cgroup_file *cfile = (void *)css + cft->file_offset;
- spin_lock_irq(&cgroup_file_kn_lock);
- cfile->kn = NULL;
- spin_unlock_irq(&cgroup_file_kn_lock);
+ spin_lock_irq(&cfile->lock);
+ WRITE_ONCE(cfile->kn, NULL);
+ spin_unlock_irq(&cfile->lock);
timer_delete_sync(&cfile->notify_timer);
}
@@ -4429,10 +4368,8 @@ static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
struct cgroup_file *cfile = (void *)css + cft->file_offset;
timer_setup(&cfile->notify_timer, cgroup_file_notify_timer, 0);
-
- spin_lock_irq(&cgroup_file_kn_lock);
+ spin_lock_init(&cfile->lock);
cfile->kn = kn;
- spin_unlock_irq(&cgroup_file_kn_lock);
}
return 0;
@@ -4687,21 +4624,32 @@ int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
*/
void cgroup_file_notify(struct cgroup_file *cfile)
{
- unsigned long flags;
+ unsigned long flags, last, next;
+ struct kernfs_node *kn = NULL;
- spin_lock_irqsave(&cgroup_file_kn_lock, flags);
+ if (!READ_ONCE(cfile->kn))
+ return;
+
+ last = READ_ONCE(cfile->notified_at);
+ next = last + CGROUP_FILE_NOTIFY_MIN_INTV;
+ if (time_in_range(jiffies, last, next)) {
+ timer_reduce(&cfile->notify_timer, next);
+ if (timer_pending(&cfile->notify_timer))
+ return;
+ }
+
+ spin_lock_irqsave(&cfile->lock, flags);
if (cfile->kn) {
- unsigned long last = cfile->notified_at;
- unsigned long next = last + CGROUP_FILE_NOTIFY_MIN_INTV;
+ kn = cfile->kn;
+ kernfs_get(kn);
+ WRITE_ONCE(cfile->notified_at, jiffies);
+ }
+ spin_unlock_irqrestore(&cfile->lock, flags);
- if (time_in_range(jiffies, last, next)) {
- timer_reduce(&cfile->notify_timer, next);
- } else {
- kernfs_notify(cfile->kn);
- cfile->notified_at = jiffies;
- }
+ if (kn) {
+ kernfs_notify(kn);
+ kernfs_put(kn);
}
- spin_unlock_irqrestore(&cgroup_file_kn_lock, flags);
}
EXPORT_SYMBOL_GPL(cgroup_file_notify);
@@ -4714,10 +4662,10 @@ void cgroup_file_show(struct cgroup_file *cfile, bool show)
{
struct kernfs_node *kn;
- spin_lock_irq(&cgroup_file_kn_lock);
+ spin_lock_irq(&cfile->lock);
kn = cfile->kn;
kernfs_get(kn);
- spin_unlock_irq(&cgroup_file_kn_lock);
+ spin_unlock_irq(&cfile->lock);
if (kn)
kernfs_show(kn, show);
diff --git a/kernel/cgroup/dmem.c b/kernel/cgroup/dmem.c
index 9d95824dc6fa..1ab1fb47f271 100644
--- a/kernel/cgroup/dmem.c
+++ b/kernel/cgroup/dmem.c
@@ -707,8 +707,7 @@ static int dmem_cgroup_region_capacity_show(struct seq_file *sf, void *v)
return 0;
}
-static int dmemcg_parse_limit(char *options, struct dmem_cgroup_region *region,
- u64 *new_limit)
+static int dmemcg_parse_limit(char *options, u64 *new_limit)
{
char *end;
@@ -762,7 +761,7 @@ static ssize_t dmemcg_limit_write(struct kernfs_open_file *of,
if (!region)
return -EINVAL;
- err = dmemcg_parse_limit(options, region, &new_limit);
+ err = dmemcg_parse_limit(options, &new_limit);
if (err < 0)
goto out_put;
diff --git a/kernel/cgroup/rdma.c b/kernel/cgroup/rdma.c
index 09258eebb5c7..9967fb25c563 100644
--- a/kernel/cgroup/rdma.c
+++ b/kernel/cgroup/rdma.c
@@ -173,7 +173,7 @@ uncharge_cg_locked(struct rdma_cgroup *cg,
* the system.
*/
if (unlikely(!rpool)) {
- pr_warn("Invalid device %p or rdma cgroup %p\n", cg, device);
+ pr_warn("Invalid device %p or rdma cgroup %p\n", device, cg);
return;
}