diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-16 01:29:44 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-16 01:29:44 +0300 |
commit | 22714a2ba4b55737cd7d5299db7aaf1fa8287354 (patch) | |
tree | 32b25f2e3e40732156a8a8d0dcb2ddf38410776f /include/linux | |
parent | 766ec76a27aa9dfdfee3a80f29ddc1f7539c71f9 (diff) | |
parent | 5f2e673405b742be64e7c3604ed4ed3ac14f35ce (diff) | |
download | linux-22714a2ba4b55737cd7d5299db7aaf1fa8287354.tar.xz |
Merge branch 'for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup updates from Tejun Heo:
"Cgroup2 cpu controller support is finally merged.
- Basic cpu statistics support to allow monitoring by default without
the CPU controller enabled.
- cgroup2 cpu controller support.
- /sys/kernel/cgroup files to help dealing with new / optional
features"
* 'for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
cgroup: export list of cgroups v2 features using sysfs
cgroup: export list of delegatable control files using sysfs
cgroup: mark @cgrp __maybe_unused in cpu_stat_show()
MAINTAINERS: relocate cpuset.c
cgroup, sched: Move basic cpu stats from cgroup.stat to cpu.stat
sched: Implement interface for cgroup unified hierarchy
sched: Misc preps for cgroup unified hierarchy interface
sched/cputime: Add dummy cputime_adjust() implementation for CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
cgroup: statically initialize init_css_set->dfl_cgrp
cgroup: Implement cgroup2 basic CPU usage accounting
cpuacct: Introduce cgroup_account_cputime[_field]()
sched/cputime: Expose cputime_adjust()
Diffstat (limited to 'include/linux')
-rw-r--r-- | include/linux/cgroup-defs.h | 59 | ||||
-rw-r--r-- | include/linux/cgroup.h | 58 | ||||
-rw-r--r-- | include/linux/sched/cputime.h | 3 |
3 files changed, 119 insertions, 1 deletions
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 1dff0a478b45..8b7fd8eeccee 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -17,6 +17,7 @@ #include <linux/refcount.h> #include <linux/percpu-refcount.h> #include <linux/percpu-rwsem.h> +#include <linux/u64_stats_sync.h> #include <linux/workqueue.h> #include <linux/bpf-cgroup.h> @@ -255,6 +256,57 @@ struct css_set { struct rcu_head rcu_head; }; +/* + * cgroup basic resource usage statistics. Accounting is done per-cpu in + * cgroup_cpu_stat which is then lazily propagated up the hierarchy on + * reads. + * + * When a stat gets updated, the cgroup_cpu_stat and its ancestors are + * linked into the updated tree. On the following read, propagation only + * considers and consumes the updated tree. This makes reading O(the + * number of descendants which have been active since last read) instead of + * O(the total number of descendants). + * + * This is important because there can be a lot of (draining) cgroups which + * aren't active and stat may be read frequently. The combination can + * become very expensive. By propagating selectively, increasing reading + * frequency decreases the cost of each read. + */ +struct cgroup_cpu_stat { + /* + * ->sync protects all the current counters. These are the only + * fields which get updated in the hot path. + */ + struct u64_stats_sync sync; + struct task_cputime cputime; + + /* + * Snapshots at the last reading. These are used to calculate the + * deltas to propagate to the global counters. + */ + struct task_cputime last_cputime; + + /* + * Child cgroups with stat updates on this cpu since the last read + * are linked on the parent's ->updated_children through + * ->updated_next. + * + * In addition to being more compact, singly-linked list pointing + * to the cgroup makes it unnecessary for each per-cpu struct to + * point back to the associated cgroup. + * + * Protected by per-cpu cgroup_cpu_stat_lock. + */ + struct cgroup *updated_children; /* terminated by self cgroup */ + struct cgroup *updated_next; /* NULL iff not on the list */ +}; + +struct cgroup_stat { + /* per-cpu statistics are collected into the folowing global counters */ + struct task_cputime cputime; + struct prev_cputime prev_cputime; +}; + struct cgroup { /* self css with NULL ->ss, points back to this cgroup */ struct cgroup_subsys_state self; @@ -354,6 +406,11 @@ struct cgroup { */ struct cgroup *dom_cgrp; + /* cgroup basic resource statistics */ + struct cgroup_cpu_stat __percpu *cpu_stat; + struct cgroup_stat pending_stat; /* pending from children */ + struct cgroup_stat stat; + /* * list of pidlists, up to two for each namespace (one for procs, one * for tasks); created on demand. @@ -513,6 +570,8 @@ struct cgroup_subsys { void (*css_released)(struct cgroup_subsys_state *css); void (*css_free)(struct cgroup_subsys_state *css); void (*css_reset)(struct cgroup_subsys_state *css); + int (*css_extra_stat_show)(struct seq_file *seq, + struct cgroup_subsys_state *css); int (*can_attach)(struct cgroup_taskset *tset); void (*cancel_attach)(struct cgroup_taskset *tset); diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index dddbc29e2009..473e0c0abb86 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -23,6 +23,7 @@ #include <linux/nsproxy.h> #include <linux/user_namespace.h> #include <linux/refcount.h> +#include <linux/kernel_stat.h> #include <linux/cgroup-defs.h> @@ -690,6 +691,63 @@ static inline void cgroup_path_from_kernfs_id(const union kernfs_node_id *id, #endif /* !CONFIG_CGROUPS */ /* + * Basic resource stats. + */ +#ifdef CONFIG_CGROUPS + +#ifdef CONFIG_CGROUP_CPUACCT +void cpuacct_charge(struct task_struct *tsk, u64 cputime); +void cpuacct_account_field(struct task_struct *tsk, int index, u64 val); +#else +static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {} +static inline void cpuacct_account_field(struct task_struct *tsk, int index, + u64 val) {} +#endif + +void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec); +void __cgroup_account_cputime_field(struct cgroup *cgrp, + enum cpu_usage_stat index, u64 delta_exec); + +static inline void cgroup_account_cputime(struct task_struct *task, + u64 delta_exec) +{ + struct cgroup *cgrp; + + cpuacct_charge(task, delta_exec); + + rcu_read_lock(); + cgrp = task_dfl_cgroup(task); + if (cgroup_parent(cgrp)) + __cgroup_account_cputime(cgrp, delta_exec); + rcu_read_unlock(); +} + +static inline void cgroup_account_cputime_field(struct task_struct *task, + enum cpu_usage_stat index, + u64 delta_exec) +{ + struct cgroup *cgrp; + + cpuacct_account_field(task, index, delta_exec); + + rcu_read_lock(); + cgrp = task_dfl_cgroup(task); + if (cgroup_parent(cgrp)) + __cgroup_account_cputime_field(cgrp, index, delta_exec); + rcu_read_unlock(); +} + +#else /* CONFIG_CGROUPS */ + +static inline void cgroup_account_cputime(struct task_struct *task, + u64 delta_exec) {} +static inline void cgroup_account_cputime_field(struct task_struct *task, + enum cpu_usage_stat index, + u64 delta_exec) {} + +#endif /* CONFIG_CGROUPS */ + +/* * sock->sk_cgrp_data handling. For more info, see sock_cgroup_data * definition in cgroup-defs.h. */ diff --git a/include/linux/sched/cputime.h b/include/linux/sched/cputime.h index d0677f6739f6..53f883f5a2fd 100644 --- a/include/linux/sched/cputime.h +++ b/include/linux/sched/cputime.h @@ -54,7 +54,8 @@ static inline void task_cputime_scaled(struct task_struct *t, extern void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st); extern void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st); - +extern void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev, + u64 *ut, u64 *st); /* * Thread group CPU time accounting. |