From 344736f29b359790facd0b7a521e367f1715c11c Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Mon, 20 Oct 2014 15:50:30 +0400 Subject: cpuset: simplify cpuset_node_allowed API Current cpuset API for checking if a zone/node is allowed to allocate from looks rather awkward. We have hardwall and softwall versions of cpuset_node_allowed with the softwall version doing literally the same as the hardwall version if __GFP_HARDWALL is passed to it in gfp flags. If it isn't, the softwall version may check the given node against the enclosing hardwall cpuset, which it needs to take the callback lock to do. Such a distinction was introduced by commit 02a0e53d8227 ("cpuset: rework cpuset_zone_allowed api"). Before, we had the only version with the __GFP_HARDWALL flag determining its behavior. The purpose of the commit was to avoid sleep-in-atomic bugs when someone would mistakenly call the function without the __GFP_HARDWALL flag for an atomic allocation. The suffixes introduced were intended to make the callers think before using the function. However, since the callback lock was converted from mutex to spinlock by the previous patch, the softwall check function cannot sleep, and these precautions are no longer necessary. So let's simplify the API back to the single check. Suggested-by: David Rientjes Signed-off-by: Vladimir Davydov Acked-by: Christoph Lameter Acked-by: Zefan Li Signed-off-by: Tejun Heo --- include/linux/cpuset.h | 37 +++++++------------------------------ 1 file changed, 7 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 2f073db7392e..1b357997cac5 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -48,29 +48,16 @@ extern nodemask_t cpuset_mems_allowed(struct task_struct *p); void cpuset_init_current_mems_allowed(void); int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask); -extern int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask); -extern int __cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask); +extern int __cpuset_node_allowed(int node, gfp_t gfp_mask); -static inline int cpuset_node_allowed_softwall(int node, gfp_t gfp_mask) +static inline int cpuset_node_allowed(int node, gfp_t gfp_mask) { - return nr_cpusets() <= 1 || - __cpuset_node_allowed_softwall(node, gfp_mask); + return nr_cpusets() <= 1 || __cpuset_node_allowed(node, gfp_mask); } -static inline int cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask) +static inline int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) { - return nr_cpusets() <= 1 || - __cpuset_node_allowed_hardwall(node, gfp_mask); -} - -static inline int cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask) -{ - return cpuset_node_allowed_softwall(zone_to_nid(z), gfp_mask); -} - -static inline int cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask) -{ - return cpuset_node_allowed_hardwall(zone_to_nid(z), gfp_mask); + return cpuset_node_allowed(zone_to_nid(z), gfp_mask); } extern int cpuset_mems_allowed_intersects(const struct task_struct *tsk1, @@ -179,22 +166,12 @@ static inline int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask) return 1; } -static inline int cpuset_node_allowed_softwall(int node, gfp_t gfp_mask) -{ - return 1; -} - -static inline int cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask) -{ - return 1; -} - -static inline int cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask) +static inline int cpuset_node_allowed(int node, gfp_t gfp_mask) { return 1; } -static inline int cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask) +static inline int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) { return 1; } -- cgit v1.2.3 From 7d172cc89b8589e4173d0c73a1ddaae408f29c9d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 18 Nov 2014 02:49:51 -0500 Subject: cgroup: add cgroup_subsys->css_released() Add a new cgroup subsys callback css_released(). This is called when the reference count of the css (cgroup_subsys_state) reaches zero before RCU scheduling free. Signed-off-by: Tejun Heo Acked-by: Zefan Li --- include/linux/cgroup.h | 1 + kernel/cgroup.c | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 1d5196889048..e717a39f22ea 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -612,6 +612,7 @@ struct cgroup_subsys { struct cgroup_subsys_state *(*css_alloc)(struct cgroup_subsys_state *parent_css); int (*css_online)(struct cgroup_subsys_state *css); void (*css_offline)(struct cgroup_subsys_state *css); + void (*css_released)(struct cgroup_subsys_state *css); void (*css_free)(struct cgroup_subsys_state *css); void (*css_reset)(struct cgroup_subsys_state *css); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index dffa54041d4a..c8558693102b 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -4380,6 +4380,8 @@ static void css_release_work_fn(struct work_struct *work) if (ss) { /* css release path */ cgroup_idr_remove(&ss->css_idr, css->id); + if (ss->css_released) + ss->css_released(css); } else { /* cgroup release path */ cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id); -- cgit v1.2.3 From 56c807ba4e91f0980567b6a69de239677879b17f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 18 Nov 2014 02:49:51 -0500 Subject: cgroup: add cgroup_subsys->css_e_css_changed() Add a new cgroup_subsys operatoin ->css_e_css_changed(). This is invoked if any of the effective csses seen from the css's cgroup may have changed. This will be used to implement cgroup writeback support. Signed-off-by: Tejun Heo Acked-by: Zefan Li --- include/linux/cgroup.h | 1 + kernel/cgroup.c | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index e717a39f22ea..3a04aeb8b5a1 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -615,6 +615,7 @@ struct cgroup_subsys { void (*css_released)(struct cgroup_subsys_state *css); void (*css_free)(struct cgroup_subsys_state *css); void (*css_reset)(struct cgroup_subsys_state *css); + void (*css_e_css_changed)(struct cgroup_subsys_state *css); int (*can_attach)(struct cgroup_subsys_state *css, struct cgroup_taskset *tset); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index c8558693102b..69f033582a1a 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2836,6 +2836,24 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, } } + /* + * The effective csses of all the descendants (excluding @cgrp) may + * have changed. Subsystems can optionally subscribe to this event + * by implementing ->css_e_css_changed() which is invoked if any of + * the effective csses seen from the css's cgroup may have changed. + */ + for_each_subsys(ss, ssid) { + struct cgroup_subsys_state *this_css = cgroup_css(cgrp, ss); + struct cgroup_subsys_state *css; + + if (!ss->css_e_css_changed || !this_css) + continue; + + css_for_each_descendant_pre(css, this_css) + if (css != this_css) + ss->css_e_css_changed(css); + } + kernfs_activate(cgrp->kn); ret = 0; out_unlock: -- cgit v1.2.3 From eeecbd1971517103e06f11750dd1a9a1dc37e4e6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 18 Nov 2014 02:49:52 -0500 Subject: cgroup: implement cgroup_get_e_css() Implement cgroup_get_e_css() which finds and gets the effective css for the specified cgroup and subsystem combination. This function always returns a valid pinned css. This will be used by cgroup writeback support. While at it, add comment to cgroup_e_css() to explain why that function is different from cgroup_get_e_css() and has to test cgrp->child_subsys_mask instead of cgroup_css(cgrp, ss). Signed-off-by: Tejun Heo Acked-by: Zefan Li --- include/linux/cgroup.h | 2 ++ kernel/cgroup.c | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 3a04aeb8b5a1..9fd99f5e699f 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -910,6 +910,8 @@ void css_task_iter_end(struct css_task_iter *it); int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); +struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup, + struct cgroup_subsys *ss); struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, struct cgroup_subsys *ss); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 69f033582a1a..bb263d0caab3 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -277,6 +277,10 @@ static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp, if (!(cgrp->root->subsys_mask & (1 << ss->id))) return NULL; + /* + * This function is used while updating css associations and thus + * can't test the csses directly. Use ->child_subsys_mask. + */ while (cgroup_parent(cgrp) && !(cgroup_parent(cgrp)->child_subsys_mask & (1 << ss->id))) cgrp = cgroup_parent(cgrp); @@ -284,6 +288,39 @@ static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp, return cgroup_css(cgrp, ss); } +/** + * cgroup_get_e_css - get a cgroup's effective css for the specified subsystem + * @cgrp: the cgroup of interest + * @ss: the subsystem of interest + * + * Find and get the effective css of @cgrp for @ss. The effective css is + * defined as the matching css of the nearest ancestor including self which + * has @ss enabled. If @ss is not mounted on the hierarchy @cgrp is on, + * the root css is returned, so this function always returns a valid css. + * The returned css must be put using css_put(). + */ +struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgrp, + struct cgroup_subsys *ss) +{ + struct cgroup_subsys_state *css; + + rcu_read_lock(); + + do { + css = cgroup_css(cgrp, ss); + + if (css && css_tryget_online(css)) + goto out_unlock; + cgrp = cgroup_parent(cgrp); + } while (cgrp); + + css = init_css_set.subsys[ss->id]; + css_get(css); +out_unlock: + rcu_read_unlock(); + return css; +} + /* convenient tests for these bits */ static inline bool cgroup_is_dead(const struct cgroup *cgrp) { -- cgit v1.2.3