summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-11-16 01:29:44 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2017-11-16 01:29:44 +0300
commit22714a2ba4b55737cd7d5299db7aaf1fa8287354 (patch)
tree32b25f2e3e40732156a8a8d0dcb2ddf38410776f /include/linux
parent766ec76a27aa9dfdfee3a80f29ddc1f7539c71f9 (diff)
parent5f2e673405b742be64e7c3604ed4ed3ac14f35ce (diff)
downloadlinux-22714a2ba4b55737cd7d5299db7aaf1fa8287354.tar.xz
Merge branch 'for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup updates from Tejun Heo: "Cgroup2 cpu controller support is finally merged. - Basic cpu statistics support to allow monitoring by default without the CPU controller enabled. - cgroup2 cpu controller support. - /sys/kernel/cgroup files to help dealing with new / optional features" * 'for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: cgroup: export list of cgroups v2 features using sysfs cgroup: export list of delegatable control files using sysfs cgroup: mark @cgrp __maybe_unused in cpu_stat_show() MAINTAINERS: relocate cpuset.c cgroup, sched: Move basic cpu stats from cgroup.stat to cpu.stat sched: Implement interface for cgroup unified hierarchy sched: Misc preps for cgroup unified hierarchy interface sched/cputime: Add dummy cputime_adjust() implementation for CONFIG_VIRT_CPU_ACCOUNTING_NATIVE cgroup: statically initialize init_css_set->dfl_cgrp cgroup: Implement cgroup2 basic CPU usage accounting cpuacct: Introduce cgroup_account_cputime[_field]() sched/cputime: Expose cputime_adjust()
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/cgroup-defs.h59
-rw-r--r--include/linux/cgroup.h58
-rw-r--r--include/linux/sched/cputime.h3
3 files changed, 119 insertions, 1 deletions
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 1dff0a478b45..8b7fd8eeccee 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -17,6 +17,7 @@
#include <linux/refcount.h>
#include <linux/percpu-refcount.h>
#include <linux/percpu-rwsem.h>
+#include <linux/u64_stats_sync.h>
#include <linux/workqueue.h>
#include <linux/bpf-cgroup.h>
@@ -255,6 +256,57 @@ struct css_set {
struct rcu_head rcu_head;
};
+/*
+ * cgroup basic resource usage statistics. Accounting is done per-cpu in
+ * cgroup_cpu_stat which is then lazily propagated up the hierarchy on
+ * reads.
+ *
+ * When a stat gets updated, the cgroup_cpu_stat and its ancestors are
+ * linked into the updated tree. On the following read, propagation only
+ * considers and consumes the updated tree. This makes reading O(the
+ * number of descendants which have been active since last read) instead of
+ * O(the total number of descendants).
+ *
+ * This is important because there can be a lot of (draining) cgroups which
+ * aren't active and stat may be read frequently. The combination can
+ * become very expensive. By propagating selectively, increasing reading
+ * frequency decreases the cost of each read.
+ */
+struct cgroup_cpu_stat {
+ /*
+ * ->sync protects all the current counters. These are the only
+ * fields which get updated in the hot path.
+ */
+ struct u64_stats_sync sync;
+ struct task_cputime cputime;
+
+ /*
+ * Snapshots at the last reading. These are used to calculate the
+ * deltas to propagate to the global counters.
+ */
+ struct task_cputime last_cputime;
+
+ /*
+ * Child cgroups with stat updates on this cpu since the last read
+ * are linked on the parent's ->updated_children through
+ * ->updated_next.
+ *
+ * In addition to being more compact, singly-linked list pointing
+ * to the cgroup makes it unnecessary for each per-cpu struct to
+ * point back to the associated cgroup.
+ *
+ * Protected by per-cpu cgroup_cpu_stat_lock.
+ */
+ struct cgroup *updated_children; /* terminated by self cgroup */
+ struct cgroup *updated_next; /* NULL iff not on the list */
+};
+
+struct cgroup_stat {
+ /* per-cpu statistics are collected into the folowing global counters */
+ struct task_cputime cputime;
+ struct prev_cputime prev_cputime;
+};
+
struct cgroup {
/* self css with NULL ->ss, points back to this cgroup */
struct cgroup_subsys_state self;
@@ -354,6 +406,11 @@ struct cgroup {
*/
struct cgroup *dom_cgrp;
+ /* cgroup basic resource statistics */
+ struct cgroup_cpu_stat __percpu *cpu_stat;
+ struct cgroup_stat pending_stat; /* pending from children */
+ struct cgroup_stat stat;
+
/*
* list of pidlists, up to two for each namespace (one for procs, one
* for tasks); created on demand.
@@ -513,6 +570,8 @@ struct cgroup_subsys {
void (*css_released)(struct cgroup_subsys_state *css);
void (*css_free)(struct cgroup_subsys_state *css);
void (*css_reset)(struct cgroup_subsys_state *css);
+ int (*css_extra_stat_show)(struct seq_file *seq,
+ struct cgroup_subsys_state *css);
int (*can_attach)(struct cgroup_taskset *tset);
void (*cancel_attach)(struct cgroup_taskset *tset);
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index dddbc29e2009..473e0c0abb86 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -23,6 +23,7 @@
#include <linux/nsproxy.h>
#include <linux/user_namespace.h>
#include <linux/refcount.h>
+#include <linux/kernel_stat.h>
#include <linux/cgroup-defs.h>
@@ -690,6 +691,63 @@ static inline void cgroup_path_from_kernfs_id(const union kernfs_node_id *id,
#endif /* !CONFIG_CGROUPS */
/*
+ * Basic resource stats.
+ */
+#ifdef CONFIG_CGROUPS
+
+#ifdef CONFIG_CGROUP_CPUACCT
+void cpuacct_charge(struct task_struct *tsk, u64 cputime);
+void cpuacct_account_field(struct task_struct *tsk, int index, u64 val);
+#else
+static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
+static inline void cpuacct_account_field(struct task_struct *tsk, int index,
+ u64 val) {}
+#endif
+
+void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec);
+void __cgroup_account_cputime_field(struct cgroup *cgrp,
+ enum cpu_usage_stat index, u64 delta_exec);
+
+static inline void cgroup_account_cputime(struct task_struct *task,
+ u64 delta_exec)
+{
+ struct cgroup *cgrp;
+
+ cpuacct_charge(task, delta_exec);
+
+ rcu_read_lock();
+ cgrp = task_dfl_cgroup(task);
+ if (cgroup_parent(cgrp))
+ __cgroup_account_cputime(cgrp, delta_exec);
+ rcu_read_unlock();
+}
+
+static inline void cgroup_account_cputime_field(struct task_struct *task,
+ enum cpu_usage_stat index,
+ u64 delta_exec)
+{
+ struct cgroup *cgrp;
+
+ cpuacct_account_field(task, index, delta_exec);
+
+ rcu_read_lock();
+ cgrp = task_dfl_cgroup(task);
+ if (cgroup_parent(cgrp))
+ __cgroup_account_cputime_field(cgrp, index, delta_exec);
+ rcu_read_unlock();
+}
+
+#else /* CONFIG_CGROUPS */
+
+static inline void cgroup_account_cputime(struct task_struct *task,
+ u64 delta_exec) {}
+static inline void cgroup_account_cputime_field(struct task_struct *task,
+ enum cpu_usage_stat index,
+ u64 delta_exec) {}
+
+#endif /* CONFIG_CGROUPS */
+
+/*
* sock->sk_cgrp_data handling. For more info, see sock_cgroup_data
* definition in cgroup-defs.h.
*/
diff --git a/include/linux/sched/cputime.h b/include/linux/sched/cputime.h
index d0677f6739f6..53f883f5a2fd 100644
--- a/include/linux/sched/cputime.h
+++ b/include/linux/sched/cputime.h
@@ -54,7 +54,8 @@ static inline void task_cputime_scaled(struct task_struct *t,
extern void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st);
extern void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st);
-
+extern void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev,
+ u64 *ut, u64 *st);
/*
* Thread group CPU time accounting.