summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/cgroup-defs.h16
-rw-r--r--kernel/cgroup/cgroup-internal.h10
-rw-r--r--kernel/cgroup/cgroup.c14
-rw-r--r--kernel/cgroup/rstat.c180
4 files changed, 112 insertions, 108 deletions
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 133531fcfb33..04cb42419310 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -259,11 +259,11 @@ struct css_set {
};
/*
- * cgroup basic resource usage statistics. Accounting is done per-cpu in
- * cgroup_cpu_stat which is then lazily propagated up the hierarchy on
- * reads.
+ * rstat - cgroup scalable recursive statistics. Accounting is done
+ * per-cpu in cgroup_rstat_cpu which is then lazily propagated up the
+ * hierarchy on reads.
*
- * When a stat gets updated, the cgroup_cpu_stat and its ancestors are
+ * When a stat gets updated, the cgroup_rstat_cpu and its ancestors are
* linked into the updated tree. On the following read, propagation only
* considers and consumes the updated tree. This makes reading O(the
* number of descendants which have been active since last read) instead of
@@ -274,7 +274,7 @@ struct css_set {
* become very expensive. By propagating selectively, increasing reading
* frequency decreases the cost of each read.
*/
-struct cgroup_cpu_stat {
+struct cgroup_rstat_cpu {
/*
* ->sync protects all the current counters. These are the only
* fields which get updated in the hot path.
@@ -297,7 +297,7 @@ struct cgroup_cpu_stat {
* to the cgroup makes it unnecessary for each per-cpu struct to
* point back to the associated cgroup.
*
- * Protected by per-cpu cgroup_cpu_stat_lock.
+ * Protected by per-cpu cgroup_rstat_cpu_lock.
*/
struct cgroup *updated_children; /* terminated by self cgroup */
struct cgroup *updated_next; /* NULL iff not on the list */
@@ -408,8 +408,10 @@ struct cgroup {
*/
struct cgroup *dom_cgrp;
+ /* per-cpu recursive resource statistics */
+ struct cgroup_rstat_cpu __percpu *rstat_cpu;
+
/* cgroup basic resource statistics */
- struct cgroup_cpu_stat __percpu *cpu_stat;
struct cgroup_stat pending_stat; /* pending from children */
struct cgroup_stat stat;
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
index b928b27050c6..092711114a1f 100644
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -201,13 +201,13 @@ int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
int cgroup_task_count(const struct cgroup *cgrp);
/*
- * stat.c
+ * rstat.c
*/
-void cgroup_stat_flush(struct cgroup *cgrp);
-int cgroup_stat_init(struct cgroup *cgrp);
-void cgroup_stat_exit(struct cgroup *cgrp);
+void cgroup_rstat_flush(struct cgroup *cgrp);
+int cgroup_rstat_init(struct cgroup *cgrp);
+void cgroup_rstat_exit(struct cgroup *cgrp);
void cgroup_stat_show_cputime(struct seq_file *seq);
-void cgroup_stat_boot(void);
+void cgroup_rstat_boot(void);
/*
* namespace.c
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index fdb7a582f8fc..32eb7ce0ad71 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -144,14 +144,14 @@ static struct static_key_true *cgroup_subsys_on_dfl_key[] = {
};
#undef SUBSYS
-static DEFINE_PER_CPU(struct cgroup_cpu_stat, cgrp_dfl_root_cpu_stat);
+static DEFINE_PER_CPU(struct cgroup_rstat_cpu, cgrp_dfl_root_rstat_cpu);
/*
* The default hierarchy, reserved for the subsystems that are otherwise
* unattached - it never has more than a single cgroup, and all tasks are
* part of that cgroup.
*/
-struct cgroup_root cgrp_dfl_root = { .cgrp.cpu_stat = &cgrp_dfl_root_cpu_stat };
+struct cgroup_root cgrp_dfl_root = { .cgrp.rstat_cpu = &cgrp_dfl_root_rstat_cpu };
EXPORT_SYMBOL_GPL(cgrp_dfl_root);
/*
@@ -4592,7 +4592,7 @@ static void css_free_rwork_fn(struct work_struct *work)
cgroup_put(cgroup_parent(cgrp));
kernfs_put(cgrp->kn);
if (cgroup_on_dfl(cgrp))
- cgroup_stat_exit(cgrp);
+ cgroup_rstat_exit(cgrp);
kfree(cgrp);
} else {
/*
@@ -4629,7 +4629,7 @@ static void css_release_work_fn(struct work_struct *work)
trace_cgroup_release(cgrp);
if (cgroup_on_dfl(cgrp))
- cgroup_stat_flush(cgrp);
+ cgroup_rstat_flush(cgrp);
for (tcgrp = cgroup_parent(cgrp); tcgrp;
tcgrp = cgroup_parent(tcgrp))
@@ -4817,7 +4817,7 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
goto out_free_cgrp;
if (cgroup_on_dfl(parent)) {
- ret = cgroup_stat_init(cgrp);
+ ret = cgroup_rstat_init(cgrp);
if (ret)
goto out_cancel_ref;
}
@@ -4882,7 +4882,7 @@ out_idr_free:
cgroup_idr_remove(&root->cgroup_idr, cgrp->id);
out_stat_exit:
if (cgroup_on_dfl(parent))
- cgroup_stat_exit(cgrp);
+ cgroup_rstat_exit(cgrp);
out_cancel_ref:
percpu_ref_exit(&cgrp->self.refcnt);
out_free_cgrp:
@@ -5275,7 +5275,7 @@ int __init cgroup_init(void)
BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files));
BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files));
- cgroup_stat_boot();
+ cgroup_rstat_boot();
/*
* The latency of the synchronize_sched() is too high for cgroups,
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index 1e111dd455c4..6824047b57a9 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -2,26 +2,26 @@
#include <linux/sched/cputime.h>
-static DEFINE_MUTEX(cgroup_stat_mutex);
-static DEFINE_PER_CPU(raw_spinlock_t, cgroup_cpu_stat_lock);
+static DEFINE_MUTEX(cgroup_rstat_mutex);
+static DEFINE_PER_CPU(raw_spinlock_t, cgroup_rstat_cpu_lock);
-static struct cgroup_cpu_stat *cgroup_cpu_stat(struct cgroup *cgrp, int cpu)
+static struct cgroup_rstat_cpu *cgroup_rstat_cpu(struct cgroup *cgrp, int cpu)
{
- return per_cpu_ptr(cgrp->cpu_stat, cpu);
+ return per_cpu_ptr(cgrp->rstat_cpu, cpu);
}
/**
- * cgroup_cpu_stat_updated - keep track of updated cpu_stat
+ * cgroup_rstat_cpu_updated - keep track of updated rstat_cpu
* @cgrp: target cgroup
- * @cpu: cpu on which cpu_stat was updated
+ * @cpu: cpu on which rstat_cpu was updated
*
- * @cgrp's cpu_stat on @cpu was updated. Put it on the parent's matching
- * cpu_stat->updated_children list. See the comment on top of
- * cgroup_cpu_stat definition for details.
+ * @cgrp's rstat_cpu on @cpu was updated. Put it on the parent's matching
+ * rstat_cpu->updated_children list. See the comment on top of
+ * cgroup_rstat_cpu definition for details.
*/
-static void cgroup_cpu_stat_updated(struct cgroup *cgrp, int cpu)
+static void cgroup_rstat_cpu_updated(struct cgroup *cgrp, int cpu)
{
- raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_cpu_stat_lock, cpu);
+ raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu);
struct cgroup *parent;
unsigned long flags;
@@ -33,7 +33,7 @@ static void cgroup_cpu_stat_updated(struct cgroup *cgrp, int cpu)
* instead of NULL, we can tell whether @cgrp is on the list by
* testing the next pointer for NULL.
*/
- if (cgroup_cpu_stat(cgrp, cpu)->updated_next)
+ if (cgroup_rstat_cpu(cgrp, cpu)->updated_next)
return;
raw_spin_lock_irqsave(cpu_lock, flags);
@@ -41,42 +41,42 @@ static void cgroup_cpu_stat_updated(struct cgroup *cgrp, int cpu)
/* put @cgrp and all ancestors on the corresponding updated lists */
for (parent = cgroup_parent(cgrp); parent;
cgrp = parent, parent = cgroup_parent(cgrp)) {
- struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu);
- struct cgroup_cpu_stat *pcstat = cgroup_cpu_stat(parent, cpu);
+ struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
+ struct cgroup_rstat_cpu *prstatc = cgroup_rstat_cpu(parent, cpu);
/*
* Both additions and removals are bottom-up. If a cgroup
* is already in the tree, all ancestors are.
*/
- if (cstat->updated_next)
+ if (rstatc->updated_next)
break;
- cstat->updated_next = pcstat->updated_children;
- pcstat->updated_children = cgrp;
+ rstatc->updated_next = prstatc->updated_children;
+ prstatc->updated_children = cgrp;
}
raw_spin_unlock_irqrestore(cpu_lock, flags);
}
/**
- * cgroup_cpu_stat_pop_updated - iterate and dismantle cpu_stat updated tree
+ * cgroup_rstat_cpu_pop_updated - iterate and dismantle rstat_cpu updated tree
* @pos: current position
* @root: root of the tree to traversal
* @cpu: target cpu
*
- * Walks the udpated cpu_stat tree on @cpu from @root. %NULL @pos starts
+ * Walks the udpated rstat_cpu tree on @cpu from @root. %NULL @pos starts
* the traversal and %NULL return indicates the end. During traversal,
* each returned cgroup is unlinked from the tree. Must be called with the
- * matching cgroup_cpu_stat_lock held.
+ * matching cgroup_rstat_cpu_lock held.
*
* The only ordering guarantee is that, for a parent and a child pair
* covered by a given traversal, if a child is visited, its parent is
* guaranteed to be visited afterwards.
*/
-static struct cgroup *cgroup_cpu_stat_pop_updated(struct cgroup *pos,
- struct cgroup *root, int cpu)
+static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos,
+ struct cgroup *root, int cpu)
{
- struct cgroup_cpu_stat *cstat;
+ struct cgroup_rstat_cpu *rstatc;
struct cgroup *parent;
if (pos == root)
@@ -93,10 +93,10 @@ static struct cgroup *cgroup_cpu_stat_pop_updated(struct cgroup *pos,
/* walk down to the first leaf */
while (true) {
- cstat = cgroup_cpu_stat(pos, cpu);
- if (cstat->updated_children == pos)
+ rstatc = cgroup_rstat_cpu(pos, cpu);
+ if (rstatc->updated_children == pos)
break;
- pos = cstat->updated_children;
+ pos = rstatc->updated_children;
}
/*
@@ -106,23 +106,23 @@ static struct cgroup *cgroup_cpu_stat_pop_updated(struct cgroup *pos,
* child in most cases. The only exception is @root.
*/
parent = cgroup_parent(pos);
- if (parent && cstat->updated_next) {
- struct cgroup_cpu_stat *pcstat = cgroup_cpu_stat(parent, cpu);
- struct cgroup_cpu_stat *ncstat;
+ if (parent && rstatc->updated_next) {
+ struct cgroup_rstat_cpu *prstatc = cgroup_rstat_cpu(parent, cpu);
+ struct cgroup_rstat_cpu *nrstatc;
struct cgroup **nextp;
- nextp = &pcstat->updated_children;
+ nextp = &prstatc->updated_children;
while (true) {
- ncstat = cgroup_cpu_stat(*nextp, cpu);
+ nrstatc = cgroup_rstat_cpu(*nextp, cpu);
if (*nextp == pos)
break;
WARN_ON_ONCE(*nextp == parent);
- nextp = &ncstat->updated_next;
+ nextp = &nrstatc->updated_next;
}
- *nextp = cstat->updated_next;
- cstat->updated_next = NULL;
+ *nextp = rstatc->updated_next;
+ rstatc->updated_next = NULL;
}
return pos;
@@ -139,19 +139,19 @@ static void cgroup_stat_accumulate(struct cgroup_stat *dst_stat,
static void cgroup_cpu_stat_flush_one(struct cgroup *cgrp, int cpu)
{
struct cgroup *parent = cgroup_parent(cgrp);
- struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu);
- struct task_cputime *last_cputime = &cstat->last_cputime;
+ struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
+ struct task_cputime *last_cputime = &rstatc->last_cputime;
struct task_cputime cputime;
struct cgroup_stat delta;
unsigned seq;
- lockdep_assert_held(&cgroup_stat_mutex);
+ lockdep_assert_held(&cgroup_rstat_mutex);
/* fetch the current per-cpu values */
do {
- seq = __u64_stats_fetch_begin(&cstat->sync);
- cputime = cstat->cputime;
- } while (__u64_stats_fetch_retry(&cstat->sync, seq));
+ seq = __u64_stats_fetch_begin(&rstatc->sync);
+ cputime = rstatc->cputime;
+ } while (__u64_stats_fetch_retry(&rstatc->sync, seq));
/* accumulate the deltas to propgate */
delta.cputime.utime = cputime.utime - last_cputime->utime;
@@ -170,26 +170,27 @@ static void cgroup_cpu_stat_flush_one(struct cgroup *cgrp, int cpu)
cgroup_stat_accumulate(&parent->pending_stat, &delta);
}
-/* see cgroup_stat_flush() */
-static void cgroup_stat_flush_locked(struct cgroup *cgrp)
+/* see cgroup_rstat_flush() */
+static void cgroup_rstat_flush_locked(struct cgroup *cgrp)
{
int cpu;
- lockdep_assert_held(&cgroup_stat_mutex);
+ lockdep_assert_held(&cgroup_rstat_mutex);
for_each_possible_cpu(cpu) {
- raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_cpu_stat_lock, cpu);
+ raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock,
+ cpu);
struct cgroup *pos = NULL;
raw_spin_lock_irq(cpu_lock);
- while ((pos = cgroup_cpu_stat_pop_updated(pos, cgrp, cpu)))
+ while ((pos = cgroup_rstat_cpu_pop_updated(pos, cgrp, cpu)))
cgroup_cpu_stat_flush_one(pos, cpu);
raw_spin_unlock_irq(cpu_lock);
}
}
/**
- * cgroup_stat_flush - flush stats in @cgrp's subtree
+ * cgroup_rstat_flush - flush stats in @cgrp's subtree
* @cgrp: target cgroup
*
* Collect all per-cpu stats in @cgrp's subtree into the global counters
@@ -199,61 +200,62 @@ static void cgroup_stat_flush_locked(struct cgroup *cgrp)
* This also gets all cgroups in the subtree including @cgrp off the
* ->updated_children lists.
*/
-void cgroup_stat_flush(struct cgroup *cgrp)
+void cgroup_rstat_flush(struct cgroup *cgrp)
{
- mutex_lock(&cgroup_stat_mutex);
- cgroup_stat_flush_locked(cgrp);
- mutex_unlock(&cgroup_stat_mutex);
+ mutex_lock(&cgroup_rstat_mutex);
+ cgroup_rstat_flush_locked(cgrp);
+ mutex_unlock(&cgroup_rstat_mutex);
}
-static struct cgroup_cpu_stat *cgroup_cpu_stat_account_begin(struct cgroup *cgrp)
+static struct cgroup_rstat_cpu *
+cgroup_cpu_stat_account_begin(struct cgroup *cgrp)
{
- struct cgroup_cpu_stat *cstat;
+ struct cgroup_rstat_cpu *rstatc;
- cstat = get_cpu_ptr(cgrp->cpu_stat);
- u64_stats_update_begin(&cstat->sync);
- return cstat;
+ rstatc = get_cpu_ptr(cgrp->rstat_cpu);
+ u64_stats_update_begin(&rstatc->sync);
+ return rstatc;
}
static void cgroup_cpu_stat_account_end(struct cgroup *cgrp,
- struct cgroup_cpu_stat *cstat)
+ struct cgroup_rstat_cpu *rstatc)
{
- u64_stats_update_end(&cstat->sync);
- cgroup_cpu_stat_updated(cgrp, smp_processor_id());
- put_cpu_ptr(cstat);
+ u64_stats_update_end(&rstatc->sync);
+ cgroup_rstat_cpu_updated(cgrp, smp_processor_id());
+ put_cpu_ptr(rstatc);
}
void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec)
{
- struct cgroup_cpu_stat *cstat;
+ struct cgroup_rstat_cpu *rstatc;
- cstat = cgroup_cpu_stat_account_begin(cgrp);
- cstat->cputime.sum_exec_runtime += delta_exec;
- cgroup_cpu_stat_account_end(cgrp, cstat);
+ rstatc = cgroup_cpu_stat_account_begin(cgrp);
+ rstatc->cputime.sum_exec_runtime += delta_exec;
+ cgroup_cpu_stat_account_end(cgrp, rstatc);
}
void __cgroup_account_cputime_field(struct cgroup *cgrp,
enum cpu_usage_stat index, u64 delta_exec)
{
- struct cgroup_cpu_stat *cstat;
+ struct cgroup_rstat_cpu *rstatc;
- cstat = cgroup_cpu_stat_account_begin(cgrp);
+ rstatc = cgroup_cpu_stat_account_begin(cgrp);
switch (index) {
case CPUTIME_USER:
case CPUTIME_NICE:
- cstat->cputime.utime += delta_exec;
+ rstatc->cputime.utime += delta_exec;
break;
case CPUTIME_SYSTEM:
case CPUTIME_IRQ:
case CPUTIME_SOFTIRQ:
- cstat->cputime.stime += delta_exec;
+ rstatc->cputime.stime += delta_exec;
break;
default:
break;
}
- cgroup_cpu_stat_account_end(cgrp, cstat);
+ cgroup_cpu_stat_account_end(cgrp, rstatc);
}
void cgroup_stat_show_cputime(struct seq_file *seq)
@@ -264,15 +266,15 @@ void cgroup_stat_show_cputime(struct seq_file *seq)
if (!cgroup_parent(cgrp))
return;
- mutex_lock(&cgroup_stat_mutex);
+ mutex_lock(&cgroup_rstat_mutex);
- cgroup_stat_flush_locked(cgrp);
+ cgroup_rstat_flush_locked(cgrp);
usage = cgrp->stat.cputime.sum_exec_runtime;
cputime_adjust(&cgrp->stat.cputime, &cgrp->stat.prev_cputime,
&utime, &stime);
- mutex_unlock(&cgroup_stat_mutex);
+ mutex_unlock(&cgroup_rstat_mutex);
do_div(usage, NSEC_PER_USEC);
do_div(utime, NSEC_PER_USEC);
@@ -284,23 +286,23 @@ void cgroup_stat_show_cputime(struct seq_file *seq)
usage, utime, stime);
}
-int cgroup_stat_init(struct cgroup *cgrp)
+int cgroup_rstat_init(struct cgroup *cgrp)
{
int cpu;
- /* the root cgrp has cpu_stat preallocated */
- if (!cgrp->cpu_stat) {
- cgrp->cpu_stat = alloc_percpu(struct cgroup_cpu_stat);
- if (!cgrp->cpu_stat)
+ /* the root cgrp has rstat_cpu preallocated */
+ if (!cgrp->rstat_cpu) {
+ cgrp->rstat_cpu = alloc_percpu(struct cgroup_rstat_cpu);
+ if (!cgrp->rstat_cpu)
return -ENOMEM;
}
/* ->updated_children list is self terminated */
for_each_possible_cpu(cpu) {
- struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu);
+ struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
- cstat->updated_children = cgrp;
- u64_stats_init(&cstat->sync);
+ rstatc->updated_children = cgrp;
+ u64_stats_init(&rstatc->sync);
}
prev_cputime_init(&cgrp->stat.prev_cputime);
@@ -308,31 +310,31 @@ int cgroup_stat_init(struct cgroup *cgrp)
return 0;
}
-void cgroup_stat_exit(struct cgroup *cgrp)
+void cgroup_rstat_exit(struct cgroup *cgrp)
{
int cpu;
- cgroup_stat_flush(cgrp);
+ cgroup_rstat_flush(cgrp);
/* sanity check */
for_each_possible_cpu(cpu) {
- struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu);
+ struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
- if (WARN_ON_ONCE(cstat->updated_children != cgrp) ||
- WARN_ON_ONCE(cstat->updated_next))
+ if (WARN_ON_ONCE(rstatc->updated_children != cgrp) ||
+ WARN_ON_ONCE(rstatc->updated_next))
return;
}
- free_percpu(cgrp->cpu_stat);
- cgrp->cpu_stat = NULL;
+ free_percpu(cgrp->rstat_cpu);
+ cgrp->rstat_cpu = NULL;
}
-void __init cgroup_stat_boot(void)
+void __init cgroup_rstat_boot(void)
{
int cpu;
for_each_possible_cpu(cpu)
- raw_spin_lock_init(per_cpu_ptr(&cgroup_cpu_stat_lock, cpu));
+ raw_spin_lock_init(per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu));
- BUG_ON(cgroup_stat_init(&cgrp_dfl_root.cgrp));
+ BUG_ON(cgroup_rstat_init(&cgrp_dfl_root.cgrp));
}