diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-06-17 14:03:56 +0300 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-06-17 14:03:56 +0300 |
| commit | 83476cc97bc635a3ff502bd194c79bfb1f1ae050 (patch) | |
| tree | efa273a93be9a4480b575a6c2d46e5c201b109e9 | |
| parent | d4d9d39f046012ff330e81dcd9b1beadf3759f7e (diff) | |
| parent | a99ce697ea5e27b867c9ba4ee55fa5ba3b8d1188 (diff) | |
| download | linux-83476cc97bc635a3ff502bd194c79bfb1f1ae050.tar.xz | |
Merge tag 'cgroup-for-7.2' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup updates from Tejun Heo:
- Last cycle deferred css teardown on cgroup removal until the cgroup
depopulated, so a css is not taken offline while tasks can still
reference it. Disabling a controller through cgroup.subtree_control
still had the same problem. This reworks the deferral from per-cgroup
to per-css so that path is covered too.
- New RDMA controller monitoring files: rdma.peak for per-device peak
usage and rdma.events / rdma.events.local for resource-limit
exhaustion. The max-limit parser was rewritten, fixing two input
parsing bugs.
- cpuset: fix a sched-domain leak on the domain-rebuild failure path
and skip a redundant hardwall ancestor scan on v2.
- Misc: pair the remaining lockless cgroup.max.* reads with WRITE_ONCE,
assorted selftest robustness fixes, and doc path corrections.
* tag 'cgroup-for-7.2' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: (22 commits)
cgroup: Migrate tasks to the root css when a controller is rebound
docs: cgroup: Fix stale source file paths
cgroup/cpuset: Free sched domains on rebuild guard failure
cgroup: pair max limit READ_ONCE() with WRITE_ONCE()
selftests/cgroup: enable memory controller in hugetlb memcg test
cgroup/rdma: Drop unnecessary READ_ONCE() on event counters
cgroup: Defer kill_css_finish() in cgroup_apply_control_disable()
cgroup: Add per-subsys-css kill_css_finish deferral
cgroup: Move populated counters to cgroup_subsys_state
cgroup: Annotate unlocked nr_populated_* accesses with READ_ONCE/WRITE_ONCE
cgroup: Inline cgroup_has_tasks() in cgroup.h
cgroup/rdma: document rdma.peak, rdma.events and rdma.events.local
cgroup/rdma: add rdma.events.local for per-cgroup allocation failure attribution
cgroup/rdma: add rdma.events to track resource limit exhaustion
cgroup/rdma: add rdma.peak for per-device peak usage tracking
selftests/cgroup: check malloc return value in alloc_anon functions
cgroup/cpuset: Skip hardwall ancestor scan in cpuset v2 in cpuset_current_node_allowed()
selftests/cgroup: fix misleading debug message in test_cgfreezer_time_child
selftests/cgroup: fix child process escaping to parent cleanup in test_cpucg_nice
selftests/cgroup: Add NULL check after malloc in cgroup_util.c
...
| -rw-r--r-- | Documentation/admin-guide/cgroup-v1/cgroups.rst | 2 | ||||
| -rw-r--r-- | Documentation/admin-guide/cgroup-v1/memcg_test.rst | 2 | ||||
| -rw-r--r-- | Documentation/admin-guide/cgroup-v2.rst | 53 | ||||
| -rw-r--r-- | include/linux/cgroup-defs.h | 30 | ||||
| -rw-r--r-- | include/linux/cgroup.h | 27 | ||||
| -rw-r--r-- | include/linux/cgroup_rdma.h | 4 | ||||
| -rw-r--r-- | kernel/cgroup/cgroup.c | 222 | ||||
| -rw-r--r-- | kernel/cgroup/cpuset-v1.c | 2 | ||||
| -rw-r--r-- | kernel/cgroup/cpuset.c | 10 | ||||
| -rw-r--r-- | kernel/cgroup/rdma.c | 315 | ||||
| -rw-r--r-- | tools/testing/selftests/cgroup/lib/cgroup_util.c | 9 | ||||
| -rw-r--r-- | tools/testing/selftests/cgroup/test_cpu.c | 2 | ||||
| -rwxr-xr-x | tools/testing/selftests/cgroup/test_cpuset_prs.sh | 2 | ||||
| -rw-r--r-- | tools/testing/selftests/cgroup/test_freezer.c | 2 | ||||
| -rw-r--r-- | tools/testing/selftests/cgroup/test_hugetlb_memcg.c | 8 | ||||
| -rw-r--r-- | tools/testing/selftests/cgroup/test_memcontrol.c | 53 |
16 files changed, 532 insertions, 211 deletions
diff --git a/Documentation/admin-guide/cgroup-v1/cgroups.rst b/Documentation/admin-guide/cgroup-v1/cgroups.rst index 463f98453323..e501f45ea93f 100644 --- a/Documentation/admin-guide/cgroup-v1/cgroups.rst +++ b/Documentation/admin-guide/cgroup-v1/cgroups.rst @@ -525,7 +525,7 @@ cgroup. It may also be taken to prevent cgroups from being modified, but more specific locks may be more appropriate in that situation. -See kernel/cgroup.c for more details. +See kernel/cgroup/cgroup.c for more details. Subsystems can take/release the cgroup_mutex via the functions cgroup_lock()/cgroup_unlock(). diff --git a/Documentation/admin-guide/cgroup-v1/memcg_test.rst b/Documentation/admin-guide/cgroup-v1/memcg_test.rst index 7c7cd457cf69..ebedbc3c3f9c 100644 --- a/Documentation/admin-guide/cgroup-v1/memcg_test.rst +++ b/Documentation/admin-guide/cgroup-v1/memcg_test.rst @@ -321,7 +321,7 @@ Under below explanation, we assume CONFIG_SWAP=y. ---------------------- Memory controller implements memory thresholds using cgroups notification - API. You can use tools/cgroup/cgroup_event_listener.c to test it. + API. You can use samples/cgroup/cgroup_event_listener.c to test it. (Shell-A) Create cgroup and run event listener:: diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 6efd0095ed99..993446ab66d0 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -2785,6 +2785,59 @@ RDMA Interface Files mlx4_0 hca_handle=1 hca_object=20 ocrdma1 hca_handle=1 hca_object=23 + rdma.peak + A read-only nested-keyed file that exists for all the cgroups + except root. It shows the historical high watermark of + resource usage per device since the cgroup was created. + + An example for mlx4 and ocrdma device follows:: + + mlx4_0 hca_handle=1 hca_object=20 + ocrdma1 hca_handle=0 hca_object=23 + + rdma.events + A read-only nested-keyed file which exists on non-root + cgroups. The following nested keys are defined. + + max + The number of times a process in this cgroup or its + descendants attempted an RDMA resource allocation that + was rejected because a rdma.max limit in the subtree + was reached. This is a hierarchical counter: the event + is propagated upward to all ancestor cgroups. A value + change in this file generates a file modified event. + + alloc_fail + The number of RDMA resource allocation attempts that + originated in this cgroup or its descendants and failed + due to a rdma.max limit being reached. This is a + hierarchical counter propagated upward. + + An example for mlx4 device follows:: + + mlx4_0 hca_handle.max=5 hca_handle.alloc_fail=3 hca_object.max=0 hca_object.alloc_fail=0 + + rdma.events.local + Similar to rdma.events but the fields in the file are local + to the cgroup i.e. not hierarchical. The file modified event + generated on this file reflects only the local events. + + The following nested keys are defined. + + max + The number of times a process in this cgroup or its + descendants attempted an RDMA resource allocation that + was rejected because this cgroup's own rdma.max limit + was reached. + alloc_fail + The number of RDMA resource allocation attempts + originating from this cgroup that failed due to this + cgroup's or an ancestor's rdma.max limit. + + An example for mlx4 device follows:: + + mlx4_0 hca_handle.max=5 hca_handle.alloc_fail=0 hca_object.max=0 hca_object.alloc_fail=0 + DMEM ---- diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 50a784da7a81..de2cd6238c2a 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -254,6 +254,18 @@ struct cgroup_subsys_state { int nr_descendants; /* + * Hierarchical populated state. For cgroup->self, nr_populated_csets + * counts populated csets linked via cgrp_cset_link. + * nr_populated_children counts immediate-child csses whose own + * populated state is nonzero. Protected by css_set_lock. + */ + int nr_populated_csets; + int nr_populated_children; + + /* deferred kill_css_finish() queued by css_update_populated() */ + struct work_struct kill_finish_work; + + /* * A singly-linked list of css structures to be rstat flushed. * This is a scratch field to be used exclusively by * css_rstat_flush(). @@ -504,17 +516,12 @@ struct cgroup { int max_descendants; /* - * Each non-empty css_set associated with this cgroup contributes - * one to nr_populated_csets. The counter is zero iff this cgroup - * doesn't have any tasks. - * - * All children which have non-zero nr_populated_csets and/or - * nr_populated_children of their own contribute one to either - * nr_populated_domain_children or nr_populated_threaded_children - * depending on their type. Each counter is zero iff all cgroups - * of the type in the subtree proper don't have any tasks. + * Domain/threaded split of self.nr_populated_children: each counts + * immediate-child cgroups whose subtree is populated and sums to + * self.nr_populated_children. Kept as separate fields to allow readers + * like cgroup_can_be_thread_root() unlocked access. Protected by + * css_set_lock; updated by css_update_populated(). */ - int nr_populated_csets; int nr_populated_domain_children; int nr_populated_threaded_children; @@ -611,9 +618,6 @@ struct cgroup { /* used to wait for offlining of csses */ wait_queue_head_t offline_waitq; - /* defers killing csses after removal until cgroup is depopulated */ - struct work_struct finish_destroy_work; - /* used to schedule release agent */ struct work_struct release_agent_work; diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index c5648fcf74e2..f2aa46a4f871 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -640,11 +640,32 @@ static inline bool task_under_cgroup_hierarchy(struct task_struct *task, return cgroup_is_descendant(cset->dfl_cgrp, ancestor); } -/* no synchronization, the result can only be used as a hint */ +/* + * Populated counters: writes happen under css_set_lock. The accessors below + * may read unlocked. What an unpopulated result means depends on context: + * + * - No lock held. Just a snapshot. May race with concurrent updates and is + * useful only as a hint. + * + * - cgroup_mutex held. Migration into the cgroup is blocked, so an observed + * !populated stays !populated until cgroup_mutex is dropped. + * + * - CSS_DYING set. The css can no longer be repopulated, so !populated is + * sticky once observed. + */ +static inline bool cgroup_has_tasks(struct cgroup *cgrp) +{ + return READ_ONCE(cgrp->self.nr_populated_csets); +} + +static inline bool css_is_populated(struct cgroup_subsys_state *css) +{ + return READ_ONCE(css->nr_populated_csets) || READ_ONCE(css->nr_populated_children); +} + static inline bool cgroup_is_populated(struct cgroup *cgrp) { - return cgrp->nr_populated_csets + cgrp->nr_populated_domain_children + - cgrp->nr_populated_threaded_children; + return css_is_populated(&cgrp->self); } /* returns ino associated with a cgroup */ diff --git a/include/linux/cgroup_rdma.h b/include/linux/cgroup_rdma.h index 80edae03c313..404e746552ca 100644 --- a/include/linux/cgroup_rdma.h +++ b/include/linux/cgroup_rdma.h @@ -24,6 +24,10 @@ struct rdma_cgroup { * that belongs to this cgroup. */ struct list_head rpools; + + /* Handles for rdma.events[.local] */ + struct cgroup_file events_file; + struct cgroup_file events_local_file; }; struct rdmacg_device { diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 6152add0c5eb..38f8d9df8fbc 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -197,6 +197,14 @@ static u32 cgrp_dfl_implicit_ss_mask; /* some controllers can be threaded on the default hierarchy */ static u32 cgrp_dfl_threaded_ss_mask; +/* + * Set across rebind_subsystems() to the controllers leaving a hierarchy. + * Guarded by cgroup_mutex. Makes find_existing_css_set() resolve them to the + * root css so the affected tasks are migrated there before + * cgroup_apply_control_disable() kills the per-cgroup csses. + */ +static u32 cgroup_rebind_ss_mask; + /* The list of hierarchy roots */ LIST_HEAD(cgroup_roots); static int cgroup_root_count; @@ -264,7 +272,6 @@ static void cgroup_finalize_control(struct cgroup *cgrp, int ret); static void css_task_iter_skip(struct css_task_iter *it, struct task_struct *task); static int cgroup_destroy_locked(struct cgroup *cgrp); -static void cgroup_finish_destroy(struct cgroup *cgrp); static void kill_css_sync(struct cgroup_subsys_state *css); static void kill_css_finish(struct cgroup_subsys_state *css); static struct cgroup_subsys_state *css_create(struct cgroup *cgrp, @@ -376,11 +383,6 @@ static void cgroup_idr_remove(struct idr *idr, int id) spin_unlock_bh(&cgroup_idr_lock); } -static bool cgroup_has_tasks(struct cgroup *cgrp) -{ - return cgrp->nr_populated_csets; -} - static bool cgroup_is_threaded(struct cgroup *cgrp) { return cgrp->dom_cgrp != cgrp; @@ -409,7 +411,7 @@ static bool cgroup_can_be_thread_root(struct cgroup *cgrp) return false; /* can only have either domain or threaded children */ - if (cgrp->nr_populated_domain_children) + if (READ_ONCE(cgrp->nr_populated_domain_children)) return false; /* and no domain controllers can be enabled */ @@ -761,62 +763,76 @@ static bool css_set_populated(struct css_set *cset) } /** - * cgroup_update_populated - update the populated count of a cgroup - * @cgrp: the target cgroup - * @populated: inc or dec populated count - * - * One of the css_sets associated with @cgrp is either getting its first - * task or losing the last. Update @cgrp->nr_populated_* accordingly. The - * count is propagated towards root so that a given cgroup's - * nr_populated_children is zero iff none of its descendants contain any - * tasks. - * - * @cgrp's interface file "cgroup.populated" is zero if both - * @cgrp->nr_populated_csets and @cgrp->nr_populated_children are zero and - * 1 otherwise. When the sum changes from or to zero, userland is notified - * that the content of the interface file has changed. This can be used to - * detect when @cgrp and its descendants become populated or empty. + * css_update_populated - update the populated state of a css and ancestors + * @css: leaf css whose own populated count is changing + * @populated: inc or dec + * + * One of the css_sets pinned by @css is getting its first task or losing the + * last. Propagate the transition up the parent chain so that a css's + * nr_populated_children is zero iff none of its descendants contain any tasks. + * + * For a cgroup->self walk, also runs cgroup-side bookkeeping at each level: + * domain/threaded child split, deferred-destroy trigger, and notification via + * "cgroup.populated" (zero iff cgrp->self has neither populated csets nor + * populated children; userland is notified on transitions). */ -static void cgroup_update_populated(struct cgroup *cgrp, bool populated) +static void css_update_populated(struct cgroup_subsys_state *css, bool populated) { - struct cgroup *child = NULL; + struct cgroup_subsys_state *child = NULL; int adj = populated ? 1 : -1; lockdep_assert_held(&css_set_lock); do { - bool was_populated = cgroup_is_populated(cgrp); + /* non-NULL only on the cgroup->self walk */ + struct cgroup *cgrp = css_is_self(css) ? css->cgroup : NULL; + bool was_populated = css_is_populated(css); if (!child) { - cgrp->nr_populated_csets += adj; + WRITE_ONCE(css->nr_populated_csets, + css->nr_populated_csets + adj); } else { - if (cgroup_is_threaded(child)) - cgrp->nr_populated_threaded_children += adj; - else - cgrp->nr_populated_domain_children += adj; + WRITE_ONCE(css->nr_populated_children, + css->nr_populated_children + adj); + if (cgrp) { + if (cgroup_is_threaded(child->cgroup)) + WRITE_ONCE(cgrp->nr_populated_threaded_children, + cgrp->nr_populated_threaded_children + adj); + else + WRITE_ONCE(cgrp->nr_populated_domain_children, + cgrp->nr_populated_domain_children + adj); + } } - if (was_populated == cgroup_is_populated(cgrp)) + if (was_populated == css_is_populated(css)) break; /* - * Subtree just emptied below an offlined cgrp. Fire deferred - * destroy. The transition is one-shot. + * Pair with smp_mb() in kill_css_sync(). Either we observe + * CSS_DYING and queue, or the caller observes our decrement + * and fires synchronously. + */ + smp_mb(); + + /* + * Subtree just emptied below a dying css. Fire deferred kill. + * The transition is one-shot for a dying css. */ - if (was_populated && !css_is_online(&cgrp->self)) { - cgroup_get(cgrp); - WARN_ON_ONCE(!queue_work(cgroup_offline_wq, - &cgrp->finish_destroy_work)); + if (was_populated && css_is_dying(css)) { + css_get(css); + WARN_ON_ONCE(!queue_work(cgroup_offline_wq, &css->kill_finish_work)); } - cgroup1_check_for_release(cgrp); - TRACE_CGROUP_PATH(notify_populated, cgrp, - cgroup_is_populated(cgrp)); - cgroup_file_notify(&cgrp->events_file); + if (cgrp) { + cgroup1_check_for_release(cgrp); + TRACE_CGROUP_PATH(notify_populated, cgrp, + cgroup_is_populated(cgrp)); + cgroup_file_notify(&cgrp->events_file); + } - child = cgrp; - cgrp = cgroup_parent(cgrp); - } while (cgrp); + child = css; + css = css->parent; + } while (css); } /** @@ -824,17 +840,27 @@ static void cgroup_update_populated(struct cgroup *cgrp, bool populated) * @cset: target css_set * @populated: whether @cset is populated or depopulated * - * @cset is either getting the first task or losing the last. Update the - * populated counters of all associated cgroups accordingly. + * @cset is either getting the first task or losing the last. Update the + * populated counters along each linked cgroup's self chain and each + * subsystem css that @cset pins. */ static void css_set_update_populated(struct css_set *cset, bool populated) { struct cgrp_cset_link *link; + struct cgroup_subsys *ss; + int ssid; lockdep_assert_held(&css_set_lock); list_for_each_entry(link, &cset->cgrp_links, cgrp_link) - cgroup_update_populated(link->cgrp, populated); + css_update_populated(&link->cgrp->self, populated); + + for_each_subsys(ss, ssid) { + struct cgroup_subsys_state *css = cset->subsys[ssid]; + + if (css) + css_update_populated(css, populated); + } } /* @@ -1065,7 +1091,15 @@ static struct css_set *find_existing_css_set(struct css_set *old_cset, * won't change, so no need for locking. */ for_each_subsys(ss, i) { - if (root->subsys_mask & (1UL << i)) { + if (unlikely(cgroup_rebind_ss_mask & (1UL << i))) { + /* + * @ss is leaving this hierarchy and its per-cgroup + * csses are about to be killed. Resolve to the + * surviving root css so the tasks are migrated there. + */ + template[i] = cgroup_css(&root->cgrp, ss); + WARN_ON_ONCE(!template[i]); + } else if (root->subsys_mask & (1UL << i)) { /* * @ss is in this hierarchy, so we want the * effective css from @cgrp. @@ -1835,11 +1869,17 @@ int rebind_subsystems(struct cgroup_root *dst_root, u32 ss_mask) struct cgroup *scgrp = &cgrp_dfl_root.cgrp; /* - * Controllers from default hierarchy that need to be rebound - * are all disabled together in one go. + * Controllers leaving the default hierarchy are disabled + * together. cgroup_rebind_ss_mask makes cgroup_apply_control() + * migrate their tasks to the root css, so the per-cgroup csses + * are unpopulated when cgroup_finalize_control() kills them. + * Clear it before cgroup_finalize_control(), which does no + * css_set lookup. */ cgrp_dfl_root.subsys_mask &= ~dfl_disable_ss_mask; + cgroup_rebind_ss_mask = dfl_disable_ss_mask; WARN_ON(cgroup_apply_control(scgrp)); + cgroup_rebind_ss_mask = 0; cgroup_finalize_control(scgrp, 0); } @@ -1853,9 +1893,14 @@ int rebind_subsystems(struct cgroup_root *dst_root, u32 ss_mask) WARN_ON(!css || cgroup_css(dcgrp, ss)); if (src_root != &cgrp_dfl_root) { - /* disable from the source */ + /* + * Disable from the source, migrating its tasks to the + * root css first (see cgroup_rebind_ss_mask). + */ src_root->subsys_mask &= ~(1 << ssid); + cgroup_rebind_ss_mask = 1 << ssid; WARN_ON(cgroup_apply_control(scgrp)); + cgroup_rebind_ss_mask = 0; cgroup_finalize_control(scgrp, 0); } @@ -2051,16 +2096,6 @@ static int cgroup_reconfigure(struct fs_context *fc) return 0; } -static void cgroup_finish_destroy_work_fn(struct work_struct *work) -{ - struct cgroup *cgrp = container_of(work, struct cgroup, finish_destroy_work); - - cgroup_lock(); - cgroup_finish_destroy(cgrp); - cgroup_unlock(); - cgroup_put(cgrp); -} - static void init_cgroup_housekeeping(struct cgroup *cgrp) { struct cgroup_subsys *ss; @@ -2087,7 +2122,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) #endif init_waitqueue_head(&cgrp->offline_waitq); - INIT_WORK(&cgrp->finish_destroy_work, cgroup_finish_destroy_work_fn); INIT_WORK(&cgrp->release_agent_work, cgroup1_release_agent); } @@ -2192,7 +2226,7 @@ int cgroup_setup_root(struct cgroup_root *root, u32 ss_mask) hash_for_each(css_set_table, i, cset, hlist) { link_css_set(&tmp_links, cset, root_cgrp); if (css_set_populated(cset)) - cgroup_update_populated(root_cgrp, true); + css_update_populated(&root_cgrp->self, true); } spin_unlock_irq(&css_set_lock); @@ -3230,7 +3264,7 @@ restart: struct cgroup_subsys_state *css = cgroup_css(dsct, ss); DEFINE_WAIT(wait); - if (!css || !percpu_ref_is_dying(&css->refcnt)) + if (!css || !css_is_dying(css)) continue; cgroup_get_live(dsct); @@ -3398,7 +3432,8 @@ static void cgroup_apply_control_disable(struct cgroup *cgrp) if (css->parent && !(cgroup_ss_mask(dsct) & (1 << ss->id))) { kill_css_sync(css); - kill_css_finish(css); + if (!css_is_populated(css)) + kill_css_finish(css); } else if (!css_visible(css)) { css_clear_dir(css); if (ss->css_reset) @@ -3726,7 +3761,7 @@ static ssize_t cgroup_max_descendants_write(struct kernfs_open_file *of, if (!cgrp) return -ENOENT; - cgrp->max_descendants = descendants; + WRITE_ONCE(cgrp->max_descendants, descendants); cgroup_kn_unlock(of->kn); @@ -3769,7 +3804,7 @@ static ssize_t cgroup_max_depth_write(struct kernfs_open_file *of, if (!cgrp) return -ENOENT; - cgrp->max_depth = depth; + WRITE_ONCE(cgrp->max_depth, depth); cgroup_kn_unlock(of->kn); @@ -5684,6 +5719,22 @@ static void css_release(struct percpu_ref *ref) queue_work(cgroup_release_wq, &css->destroy_work); } +/* + * Deferred kill_css_finish() fired from css_update_populated() once a dying + * css's hierarchical populated state drops to zero. Pinned by css_get() at the + * queue site; matched by css_put() here. + */ +static void kill_css_finish_work_fn(struct work_struct *work) +{ + struct cgroup_subsys_state *css = + container_of(work, struct cgroup_subsys_state, kill_finish_work); + + cgroup_lock(); + kill_css_finish(css); + cgroup_unlock(); + css_put(css); +} + static void init_and_link_css(struct cgroup_subsys_state *css, struct cgroup_subsys *ss, struct cgroup *cgrp) { @@ -5697,6 +5748,7 @@ static void init_and_link_css(struct cgroup_subsys_state *css, css->id = -1; INIT_LIST_HEAD(&css->sibling); INIT_LIST_HEAD(&css->children); + INIT_WORK(&css->kill_finish_work, kill_css_finish_work_fn); css->serial_nr = css_serial_nr_next++; atomic_set(&css->online_cnt, 0); @@ -6074,6 +6126,13 @@ static void kill_css_sync(struct cgroup_subsys_state *css) css->flags |= CSS_DYING; /* + * Pair with smp_mb() in css_update_populated(). Either our + * caller observes the walker's decrement and fires + * synchronously, or the walker observes CSS_DYING and queues. + */ + smp_mb(); + + /* * This must happen before css is disassociated with its cgroup. * See seq_css() for details. */ @@ -6148,9 +6207,9 @@ static void kill_css_finish(struct cgroup_subsys_state *css) * - This function: synchronous user-visible state teardown plus kill_css_sync() * on each subsystem css. * - * - cgroup_finish_destroy(): kicks the percpu_ref kill via kill_css_finish() on - * each subsystem css. Fires once @cgrp's subtree is fully drained, either - * inline here or from cgroup_update_populated(). + * - For each subsys css: fire kill_css_finish() synchronously if the subtree is + * already drained, otherwise rely on css_update_populated() to queue + * kill_finish_work when the last populated cset under the css empties. * * - The percpu_ref kill chain: css_killed_ref_fn -> css_killed_work_fn -> * ->css_offline() -> release/free. @@ -6228,29 +6287,14 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) /* put the base reference */ percpu_ref_kill(&cgrp->self.refcnt); - if (!cgroup_is_populated(cgrp)) - cgroup_finish_destroy(cgrp); + for_each_css(css, ssid, cgrp) { + if (!css_is_populated(css)) + kill_css_finish(css); + } return 0; }; -/** - * cgroup_finish_destroy - deferred half of @cgrp destruction - * @cgrp: cgroup whose subtree just became empty - * - * See cgroup_destroy_locked() for the rationale. - */ -static void cgroup_finish_destroy(struct cgroup *cgrp) -{ - struct cgroup_subsys_state *css; - int ssid; - - lockdep_assert_held(&cgroup_mutex); - - for_each_css(css, ssid, cgrp) - kill_css_finish(css); -} - int cgroup_rmdir(struct kernfs_node *kn) { struct cgroup *cgrp; diff --git a/kernel/cgroup/cpuset-v1.c b/kernel/cgroup/cpuset-v1.c index 7308e9b02495..3e9968dd91e9 100644 --- a/kernel/cgroup/cpuset-v1.c +++ b/kernel/cgroup/cpuset-v1.c @@ -312,7 +312,7 @@ void cpuset1_hotplug_update_tasks(struct cpuset *cs, * This is full cgroup operation which will also call back into * cpuset. Execute it asynchronously using workqueue. */ - if (is_empty && cs->css.cgroup->nr_populated_csets && + if (is_empty && cgroup_has_tasks(cs->css.cgroup) && css_tryget_online(&cs->css)) { struct cpuset_remove_tasks_struct *s; diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index c9e14fda3d6f..591e3aa487fc 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -432,7 +432,7 @@ static inline bool partition_is_populated(struct cpuset *cs, * nr_populated_domain_children may include populated * csets from descendants that are partitions. */ - if (cs->css.cgroup->nr_populated_csets || + if (cgroup_has_tasks(cs->css.cgroup) || cs->attach_in_progress) return true; @@ -1004,8 +1004,11 @@ void rebuild_sched_domains_locked(void) * prevent the panic. */ for (i = 0; doms && i < ndoms; i++) { - if (WARN_ON_ONCE(!cpumask_subset(doms[i], cpu_active_mask))) + if (WARN_ON_ONCE(!cpumask_subset(doms[i], cpu_active_mask))) { + free_sched_domains(doms, ndoms); + kfree(attr); return; + } } /* Have scheduler rebuild the domains */ @@ -4236,6 +4239,9 @@ bool cpuset_current_node_allowed(int node, gfp_t gfp_mask) if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */ return false; + if (cpuset_v2()) + return true; + /* Not hardwall and node outside mems_allowed: scan up cpusets */ spin_lock_irqsave(&callback_lock, flags); diff --git a/kernel/cgroup/rdma.c b/kernel/cgroup/rdma.c index 4fdab4cf49e0..5e82a03b3270 100644 --- a/kernel/cgroup/rdma.c +++ b/kernel/cgroup/rdma.c @@ -9,6 +9,7 @@ */ #include <linux/bitops.h> +#include <linux/limits.h> #include <linux/slab.h> #include <linux/seq_file.h> #include <linux/cgroup.h> @@ -17,6 +18,22 @@ #define RDMACG_MAX_STR "max" +enum rdmacg_limit_tokens { + RDMACG_HCA_HANDLE_VAL, + RDMACG_HCA_HANDLE_MAX, + RDMACG_HCA_OBJECT_VAL, + RDMACG_HCA_OBJECT_MAX, + NR_RDMACG_LIMIT_TOKENS, +}; + +static const match_table_t rdmacg_limit_tokens = { + { RDMACG_HCA_HANDLE_VAL, "hca_handle=%d" }, + { RDMACG_HCA_HANDLE_MAX, "hca_handle=max" }, + { RDMACG_HCA_OBJECT_VAL, "hca_object=%d" }, + { RDMACG_HCA_OBJECT_MAX, "hca_object=max" }, + { NR_RDMACG_LIMIT_TOKENS, NULL }, +}; + /* * Protects list of resource pools maintained on per cgroup basis * and rdma device list. @@ -27,6 +44,7 @@ static LIST_HEAD(rdmacg_devices); enum rdmacg_file_type { RDMACG_RESOURCE_TYPE_MAX, RDMACG_RESOURCE_TYPE_STAT, + RDMACG_RESOURCE_TYPE_PEAK, }; /* @@ -43,6 +61,7 @@ static char const *rdmacg_resource_names[] = { struct rdmacg_resource { int max; int usage; + int peak; }; /* @@ -62,6 +81,12 @@ struct rdmacg_resource_pool { u64 usage_sum; /* total number counts which are set to max */ int num_max_cnt; + + /* per-resource event counters */ + u64 events_max[RDMACG_RESOURCE_MAX]; + u64 events_alloc_fail[RDMACG_RESOURCE_MAX]; + u64 events_local_max[RDMACG_RESOURCE_MAX]; + u64 events_local_alloc_fail[RDMACG_RESOURCE_MAX]; }; static struct rdma_cgroup *css_rdmacg(struct cgroup_subsys_state *css) @@ -109,6 +134,26 @@ static void free_cg_rpool_locked(struct rdmacg_resource_pool *rpool) kfree(rpool); } +static bool rpool_has_persistent_state(struct rdmacg_resource_pool *rpool) +{ + int i; + + /* + * Keep the rpool alive if any peak value is non-zero, + * so that rdma.peak persists as a historical high- + * watermark even after all resources are freed. + */ + for (i = 0; i < RDMACG_RESOURCE_MAX; i++) { + if (rpool->resources[i].peak || + rpool->events_max[i] || + rpool->events_local_max[i] || + rpool->events_alloc_fail[i] || + rpool->events_local_alloc_fail[i]) + return true; + } + return false; +} + static struct rdmacg_resource_pool * find_cg_rpool_locked(struct rdma_cgroup *cg, struct rdmacg_device *device) @@ -187,11 +232,67 @@ uncharge_cg_locked(struct rdma_cgroup *cg, rpool->usage_sum--; if (rpool->usage_sum == 0 && rpool->num_max_cnt == RDMACG_RESOURCE_MAX) { - /* - * No user of the rpool and all entries are set to max, so - * safe to delete this rpool. - */ - free_cg_rpool_locked(rpool); + if (!rpool_has_persistent_state(rpool)) { + /* + * No user of the rpool and all entries are set to max, so + * safe to delete this rpool. + */ + free_cg_rpool_locked(rpool); + } + } +} + +/** + * rdmacg_event_locked - fire event when resource allocation exceeds limit + * @cg: requesting cgroup + * @over_cg: cgroup whose limit was exceeded + * @device: rdma device + * @index: resource type index + * + * Must be called under rdmacg_mutex. Updates event counters in the + * resource pools of @cg and @over_cg, propagates hierarchical max + * events from @over_cg (including itself) upward, and notifies + * userspace via cgroup_file_notify(). + */ +static void rdmacg_event_locked(struct rdma_cgroup *cg, + struct rdma_cgroup *over_cg, + struct rdmacg_device *device, + enum rdmacg_resource_type index) +{ + struct rdmacg_resource_pool *rpool; + struct rdma_cgroup *p; + + lockdep_assert_held(&rdmacg_mutex); + + /* Increment local alloc_fail in requesting cgroup */ + rpool = find_cg_rpool_locked(cg, device); + if (rpool) { + rpool->events_local_alloc_fail[index]++; + cgroup_file_notify(&cg->events_local_file); + } + + /* Increment local max in the over-limit cgroup */ + rpool = find_cg_rpool_locked(over_cg, device); + if (rpool) { + rpool->events_local_max[index]++; + cgroup_file_notify(&over_cg->events_local_file); + } + + /* Propagate hierarchical max events upward */ + for (p = over_cg; parent_rdmacg(p); p = parent_rdmacg(p)) { + rpool = get_cg_rpool_locked(p, device); + if (!IS_ERR(rpool)) { + rpool->events_max[index]++; + cgroup_file_notify(&p->events_file); + } + } + /* Propagate hierarchical alloc_fail from requesting cgroup upward */ + for (p = cg; parent_rdmacg(p); p = parent_rdmacg(p)) { + rpool = get_cg_rpool_locked(p, device); + if (!IS_ERR(rpool)) { + rpool->events_alloc_fail[index]++; + cgroup_file_notify(&p->events_file); + } } } @@ -293,12 +394,20 @@ int rdmacg_try_charge(struct rdma_cgroup **rdmacg, } } } + /* Update peak only after all charges succeed */ + for (p = cg; p; p = parent_rdmacg(p)) { + rpool = find_cg_rpool_locked(p, device); + if (rpool && rpool->resources[index].usage > rpool->resources[index].peak) + rpool->resources[index].peak = rpool->resources[index].usage; + } mutex_unlock(&rdmacg_mutex); *rdmacg = cg; return 0; err: + if (ret == -EAGAIN) + rdmacg_event_locked(cg, p, device, index); mutex_unlock(&rdmacg_mutex); rdmacg_uncharge_hierarchy(cg, device, p, index); return ret; @@ -355,62 +464,6 @@ void rdmacg_unregister_device(struct rdmacg_device *device) } EXPORT_SYMBOL(rdmacg_unregister_device); -static int parse_resource(char *c, int *intval) -{ - substring_t argstr; - char *name, *value = c; - size_t len; - int ret, i; - - name = strsep(&value, "="); - if (!name || !value) - return -EINVAL; - - i = match_string(rdmacg_resource_names, RDMACG_RESOURCE_MAX, name); - if (i < 0) - return i; - - len = strlen(value); - - argstr.from = value; - argstr.to = value + len; - - ret = match_int(&argstr, intval); - if (ret >= 0) { - if (*intval < 0) - return -EINVAL; - return i; - } - if (strncmp(value, RDMACG_MAX_STR, len) == 0) { - *intval = S32_MAX; - return i; - } - return -EINVAL; -} - -static int rdmacg_parse_limits(char *options, - int *new_limits, unsigned long *enables) -{ - char *c; - int err = -EINVAL; - - /* parse resource options */ - while ((c = strsep(&options, " ")) != NULL) { - int index, intval; - - index = parse_resource(c, &intval); - if (index < 0) - goto err; - - new_limits[index] = intval; - *enables |= BIT(index); - } - return 0; - -err: - return err; -} - static struct rdmacg_device *rdmacg_get_device_locked(const char *name) { struct rdmacg_device *device; @@ -432,6 +485,7 @@ static ssize_t rdmacg_resource_set_max(struct kernfs_open_file *of, struct rdmacg_resource_pool *rpool; struct rdmacg_device *device; char *options = strstrip(buf); + char *p; int *new_limits; unsigned long enables = 0; int i = 0, ret = 0; @@ -449,9 +503,45 @@ static ssize_t rdmacg_resource_set_max(struct kernfs_open_file *of, goto err; } - ret = rdmacg_parse_limits(options, new_limits, &enables); - if (ret) - goto parse_err; + /* parse resource limit tokens */ + while ((p = strsep(&options, " \t\n"))) { + substring_t args[MAX_OPT_ARGS]; + int tok, intval; + + if (!*p) + continue; + + tok = match_token(p, rdmacg_limit_tokens, args); + switch (tok) { + case RDMACG_HCA_HANDLE_VAL: + if (match_int(&args[0], &intval) || intval < 0) { + ret = -EINVAL; + goto parse_err; + } + new_limits[RDMACG_RESOURCE_HCA_HANDLE] = intval; + enables |= BIT(RDMACG_RESOURCE_HCA_HANDLE); + break; + case RDMACG_HCA_HANDLE_MAX: + new_limits[RDMACG_RESOURCE_HCA_HANDLE] = S32_MAX; + enables |= BIT(RDMACG_RESOURCE_HCA_HANDLE); + break; + case RDMACG_HCA_OBJECT_VAL: + if (match_int(&args[0], &intval) || intval < 0) { + ret = -EINVAL; + goto parse_err; + } + new_limits[RDMACG_RESOURCE_HCA_OBJECT] = intval; + enables |= BIT(RDMACG_RESOURCE_HCA_OBJECT); + break; + case RDMACG_HCA_OBJECT_MAX: + new_limits[RDMACG_RESOURCE_HCA_OBJECT] = S32_MAX; + enables |= BIT(RDMACG_RESOURCE_HCA_OBJECT); + break; + default: + ret = -EINVAL; + goto parse_err; + } + } /* acquire lock to synchronize with hot plug devices */ mutex_lock(&rdmacg_mutex); @@ -474,11 +564,13 @@ static ssize_t rdmacg_resource_set_max(struct kernfs_open_file *of, if (rpool->usage_sum == 0 && rpool->num_max_cnt == RDMACG_RESOURCE_MAX) { - /* - * No user of the rpool and all entries are set to max, so - * safe to delete this rpool. - */ - free_cg_rpool_locked(rpool); + if (!rpool_has_persistent_state(rpool)) { + /* + * No user of the rpool and all entries are set to max, so + * safe to delete this rpool. + */ + free_cg_rpool_locked(rpool); + } } dev_err: @@ -508,6 +600,8 @@ static void print_rpool_values(struct seq_file *sf, value = rpool->resources[i].max; else value = S32_MAX; + } else if (sf_type == RDMACG_RESOURCE_TYPE_PEAK) { + value = rpool ? rpool->resources[i].peak : 0; } else { if (rpool) value = rpool->resources[i].usage; @@ -544,6 +638,64 @@ static int rdmacg_resource_read(struct seq_file *sf, void *v) return 0; } +static int rdmacg_events_show(struct seq_file *sf, void *v) +{ + struct rdma_cgroup *cg = css_rdmacg(seq_css(sf)); + struct rdmacg_resource_pool *rpool; + struct rdmacg_device *device; + int i; + + mutex_lock(&rdmacg_mutex); + + list_for_each_entry(device, &rdmacg_devices, dev_node) { + rpool = find_cg_rpool_locked(cg, device); + + seq_printf(sf, "%s ", device->name); + for (i = 0; i < RDMACG_RESOURCE_MAX; i++) { + seq_printf(sf, "%s.max=%llu %s.alloc_fail=%llu", + rdmacg_resource_names[i], + rpool ? rpool->events_max[i] : 0ULL, + rdmacg_resource_names[i], + rpool ? rpool->events_alloc_fail[i] : 0ULL); + if (i < RDMACG_RESOURCE_MAX - 1) + seq_putc(sf, ' '); + } + seq_putc(sf, '\n'); + } + + mutex_unlock(&rdmacg_mutex); + return 0; +} + +static int rdmacg_events_local_show(struct seq_file *sf, void *v) +{ + struct rdma_cgroup *cg = css_rdmacg(seq_css(sf)); + struct rdmacg_resource_pool *rpool; + struct rdmacg_device *device; + int i; + + mutex_lock(&rdmacg_mutex); + + list_for_each_entry(device, &rdmacg_devices, dev_node) { + rpool = find_cg_rpool_locked(cg, device); + + seq_printf(sf, "%s ", device->name); + for (i = 0; i < RDMACG_RESOURCE_MAX; i++) { + seq_printf(sf, "%s.max=%llu %s.alloc_fail=%llu", + rdmacg_resource_names[i], + rpool ? rpool->events_local_max[i] : 0ULL, + rdmacg_resource_names[i], + rpool ? rpool->events_local_alloc_fail[i] : 0ULL); + if (i < RDMACG_RESOURCE_MAX - 1) + seq_putc(sf, ' '); + } + seq_putc(sf, '\n'); + } + + mutex_unlock(&rdmacg_mutex); + return 0; +} + static struct cftype rdmacg_files[] = { { .name = "max", @@ -558,6 +710,24 @@ static struct cftype rdmacg_files[] = { .private = RDMACG_RESOURCE_TYPE_STAT, .flags = CFTYPE_NOT_ON_ROOT, }, + { + .name = "peak", + .seq_show = rdmacg_resource_read, + .private = RDMACG_RESOURCE_TYPE_PEAK, + .flags = CFTYPE_NOT_ON_ROOT, + }, + { + .name = "events", + .seq_show = rdmacg_events_show, + .file_offset = offsetof(struct rdma_cgroup, events_file), + .flags = CFTYPE_NOT_ON_ROOT, + }, + { + .name = "events.local", + .seq_show = rdmacg_events_local_show, + .file_offset = offsetof(struct rdma_cgroup, events_local_file), + .flags = CFTYPE_NOT_ON_ROOT, + }, { } /* terminate */ }; @@ -577,6 +747,13 @@ rdmacg_css_alloc(struct cgroup_subsys_state *parent) static void rdmacg_css_free(struct cgroup_subsys_state *css) { struct rdma_cgroup *cg = css_rdmacg(css); + struct rdmacg_resource_pool *rpool, *tmp; + + /* Clean up rpools kept alive by non-zero peak values */ + mutex_lock(&rdmacg_mutex); + list_for_each_entry_safe(rpool, tmp, &cg->rpools, cg_node) + free_cg_rpool_locked(rpool); + mutex_unlock(&rdmacg_mutex); kfree(cg); } diff --git a/tools/testing/selftests/cgroup/lib/cgroup_util.c b/tools/testing/selftests/cgroup/lib/cgroup_util.c index 42f54936f4bb..a7b3380d88d7 100644 --- a/tools/testing/selftests/cgroup/lib/cgroup_util.c +++ b/tools/testing/selftests/cgroup/lib/cgroup_util.c @@ -59,7 +59,8 @@ char *cg_name(const char *root, const char *name) size_t len = strlen(root) + strlen(name) + 2; char *ret = malloc(len); - snprintf(ret, len, "%s/%s", root, name); + if (ret) + snprintf(ret, len, "%s/%s", root, name); return ret; } @@ -69,7 +70,8 @@ char *cg_name_indexed(const char *root, const char *name, int index) size_t len = strlen(root) + strlen(name) + 10; char *ret = malloc(len); - snprintf(ret, len, "%s/%s_%d", root, name, index); + if (ret) + snprintf(ret, len, "%s/%s_%d", root, name, index); return ret; } @@ -79,7 +81,8 @@ char *cg_control(const char *cgroup, const char *control) size_t len = strlen(cgroup) + strlen(control) + 2; char *ret = malloc(len); - snprintf(ret, len, "%s/%s", cgroup, control); + if (ret) + snprintf(ret, len, "%s/%s", cgroup, control); return ret; } diff --git a/tools/testing/selftests/cgroup/test_cpu.c b/tools/testing/selftests/cgroup/test_cpu.c index c83f05438d7c..7a40d76b9548 100644 --- a/tools/testing/selftests/cgroup/test_cpu.c +++ b/tools/testing/selftests/cgroup/test_cpu.c @@ -278,7 +278,7 @@ static int test_cpucg_nice(const char *root) char buf[64]; snprintf(buf, sizeof(buf), "%d", getpid()); if (cg_write(cpucg, "cgroup.procs", buf)) - goto cleanup; + exit(EXIT_FAILURE); /* Try to keep niced CPU usage as constrained to hog_cpu as possible */ nice(1); diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh index 683b05062810..0d41aa0d343d 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh @@ -627,7 +627,7 @@ set_ctrl_state_noerr() online_cpus() { - [[ -n "OFFLINE_CPUS" ]] && { + [[ -n "$OFFLINE_CPUS" ]] && { for C in $OFFLINE_CPUS do write_cpu_online ${C}=1 diff --git a/tools/testing/selftests/cgroup/test_freezer.c b/tools/testing/selftests/cgroup/test_freezer.c index 97fae92c8387..ead68542d45e 100644 --- a/tools/testing/selftests/cgroup/test_freezer.c +++ b/tools/testing/selftests/cgroup/test_freezer.c @@ -1353,7 +1353,7 @@ static int test_cgfreezer_time_child(const char *root) } if (ctime <= ptime) { - debug("Expect ctime (%ld) <= ptime (%ld)\n", ctime, ptime); + debug("Expect ctime (%ld) > ptime (%ld)\n", ctime, ptime); goto cleanup; } diff --git a/tools/testing/selftests/cgroup/test_hugetlb_memcg.c b/tools/testing/selftests/cgroup/test_hugetlb_memcg.c index f451aa449be6..b627d84358b1 100644 --- a/tools/testing/selftests/cgroup/test_hugetlb_memcg.c +++ b/tools/testing/selftests/cgroup/test_hugetlb_memcg.c @@ -217,6 +217,14 @@ int main(int argc, char **argv) if (cg_find_unified_root(root, sizeof(root), NULL)) ksft_exit_skip("cgroup v2 isn't mounted\n"); + if (cg_read_strstr(root, "cgroup.controllers", "memory")) + ksft_exit_skip("memory controller isn't available\n"); + + if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) { + if (cg_write(root, "cgroup.subtree_control", "+memory")) + ksft_exit_skip("Failed to set memory controller\n"); + } + switch (test_hugetlb_memcg(root)) { case KSFT_PASS: ksft_test_result_pass("test_hugetlb_memcg\n"); diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c index b43da9bc20c4..21aedb35cc12 100644 --- a/tools/testing/selftests/cgroup/test_memcontrol.c +++ b/tools/testing/selftests/cgroup/test_memcontrol.c @@ -55,15 +55,31 @@ cleanup: return -1; } -int alloc_anon(const char *cgroup, void *arg) +static char *alloc_and_populate_anon(size_t size) { - size_t size = (unsigned long)arg; char *buf, *ptr; buf = malloc(size); + if (buf == NULL) { + fprintf(stderr, "malloc() failed\n"); + return NULL; + } + for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) *ptr = 0; + return buf; +} + +int alloc_anon(const char *cgroup, void *arg) +{ + size_t size = (unsigned long)arg; + char *buf; + + buf = alloc_and_populate_anon(size); + if (!buf) + return -1; + free(buf); return 0; } @@ -174,18 +190,13 @@ cleanup_free: static int alloc_anon_50M_check(const char *cgroup, void *arg) { size_t size = MB(50); - char *buf, *ptr; + char *buf; long anon, current; int ret = -1; - buf = malloc(size); - if (buf == NULL) { - fprintf(stderr, "malloc() failed\n"); + buf = alloc_and_populate_anon(size); + if (!buf) return -1; - } - - for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) - *ptr = 0; current = cg_read_long(cgroup, "memory.current"); if (current < size) @@ -406,16 +417,11 @@ static int alloc_anon_noexit(const char *cgroup, void *arg) { int ppid = getppid(); size_t size = (unsigned long)arg; - char *buf, *ptr; + char *buf; - buf = malloc(size); - if (buf == NULL) { - fprintf(stderr, "malloc() failed\n"); + buf = alloc_and_populate_anon(size); + if (!buf) return -1; - } - - for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) - *ptr = 0; while (getppid() == ppid) sleep(1); @@ -990,18 +996,13 @@ static int alloc_anon_50M_check_swap(const char *cgroup, void *arg) { long mem_max = (long)arg; size_t size = MB(50); - char *buf, *ptr; + char *buf; long mem_current, swap_current; int ret = -1; - buf = malloc(size); - if (buf == NULL) { - fprintf(stderr, "malloc() failed\n"); + buf = alloc_and_populate_anon(size); + if (!buf) return -1; - } - - for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) - *ptr = 0; mem_current = cg_read_long(cgroup, "memory.current"); if (!mem_current || !values_close(mem_current, mem_max, 3)) |
