diff options
Diffstat (limited to 'kernel/cgroup')
-rw-r--r-- | kernel/cgroup/cgroup-internal.h | 2 | ||||
-rw-r--r-- | kernel/cgroup/cgroup-v1.c | 58 | ||||
-rw-r--r-- | kernel/cgroup/cgroup.c | 54 | ||||
-rw-r--r-- | kernel/cgroup/cpuset.c | 13 | ||||
-rw-r--r-- | kernel/cgroup/pids.c | 4 | ||||
-rw-r--r-- | kernel/cgroup/rdma.c | 5 | ||||
-rw-r--r-- | kernel/cgroup/rstat.c | 10 |
7 files changed, 58 insertions, 88 deletions
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index c950864016e2..c9a35f09e4b9 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -198,7 +198,7 @@ int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen, void cgroup_free_root(struct cgroup_root *root); void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts); -int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags); +int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask); int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask); struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags, struct cgroup_root *root, unsigned long magic, diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 583b969b0c0e..f94a7229974e 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -1116,13 +1116,11 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags, void *data, unsigned long magic, struct cgroup_namespace *ns) { - struct super_block *pinned_sb = NULL; struct cgroup_sb_opts opts; struct cgroup_root *root; struct cgroup_subsys *ss; struct dentry *dentry; int i, ret; - bool new_root = false; cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp); @@ -1184,29 +1182,6 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags, if (root->flags ^ opts.flags) pr_warn("new mount options do not match the existing superblock, will be ignored\n"); - /* - * We want to reuse @root whose lifetime is governed by its - * ->cgrp. Let's check whether @root is alive and keep it - * that way. As cgroup_kill_sb() can happen anytime, we - * want to block it by pinning the sb so that @root doesn't - * get killed before mount is complete. - * - * With the sb pinned, tryget_live can reliably indicate - * whether @root can be reused. If it's being killed, - * drain it. We can use wait_queue for the wait but this - * path is super cold. Let's just sleep a bit and retry. - */ - pinned_sb = kernfs_pin_sb(root->kf_root, NULL); - if (IS_ERR(pinned_sb) || - !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) { - mutex_unlock(&cgroup_mutex); - if (!IS_ERR_OR_NULL(pinned_sb)) - deactivate_super(pinned_sb); - msleep(10); - ret = restart_syscall(); - goto out_free; - } - ret = 0; goto out_unlock; } @@ -1232,15 +1207,20 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags, ret = -ENOMEM; goto out_unlock; } - new_root = true; init_cgroup_root(root, &opts); - ret = cgroup_setup_root(root, opts.subsys_mask, PERCPU_REF_INIT_DEAD); + ret = cgroup_setup_root(root, opts.subsys_mask); if (ret) cgroup_free_root(root); out_unlock: + if (!ret && !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) { + mutex_unlock(&cgroup_mutex); + msleep(10); + ret = restart_syscall(); + goto out_free; + } mutex_unlock(&cgroup_mutex); out_free: kfree(opts.release_agent); @@ -1252,25 +1232,13 @@ out_free: dentry = cgroup_do_mount(&cgroup_fs_type, flags, root, CGROUP_SUPER_MAGIC, ns); - /* - * There's a race window after we release cgroup_mutex and before - * allocating a superblock. Make sure a concurrent process won't - * be able to re-use the root during this window by delaying the - * initialization of root refcnt. - */ - if (new_root) { - mutex_lock(&cgroup_mutex); - percpu_ref_reinit(&root->cgrp.self.refcnt); - mutex_unlock(&cgroup_mutex); + if (!IS_ERR(dentry) && percpu_ref_is_dying(&root->cgrp.self.refcnt)) { + struct super_block *sb = dentry->d_sb; + dput(dentry); + deactivate_locked_super(sb); + msleep(10); + dentry = ERR_PTR(restart_syscall()); } - - /* - * If @pinned_sb, we're reusing an existing root and holding an - * extra ref on its sb. Mount is complete. Put the extra ref. - */ - if (pinned_sb) - deactivate_super(pinned_sb); - return dentry; } diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index f31bd61c9466..eef24a25bda7 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -197,7 +197,7 @@ static u64 css_serial_nr_next = 1; */ static u16 have_fork_callback __read_mostly; static u16 have_exit_callback __read_mostly; -static u16 have_free_callback __read_mostly; +static u16 have_release_callback __read_mostly; static u16 have_canfork_callback __read_mostly; /* cgroup namespace for init task */ @@ -1927,7 +1927,7 @@ void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts) set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags); } -int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags) +int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask) { LIST_HEAD(tmp_links); struct cgroup *root_cgrp = &root->cgrp; @@ -1944,7 +1944,7 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags) root_cgrp->ancestor_ids[0] = ret; ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release, - ref_flags, GFP_KERNEL); + 0, GFP_KERNEL); if (ret) goto out; @@ -2033,7 +2033,7 @@ struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags, struct cgroup_namespace *ns) { struct dentry *dentry; - bool new_sb; + bool new_sb = false; dentry = kernfs_mount(fs_type, flags, root->kf_root, magic, &new_sb); @@ -2043,6 +2043,7 @@ struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags, */ if (!IS_ERR(dentry) && ns != &init_cgroup_ns) { struct dentry *nsdentry; + struct super_block *sb = dentry->d_sb; struct cgroup *cgrp; mutex_lock(&cgroup_mutex); @@ -2053,12 +2054,14 @@ struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags, spin_unlock_irq(&css_set_lock); mutex_unlock(&cgroup_mutex); - nsdentry = kernfs_node_dentry(cgrp->kn, dentry->d_sb); + nsdentry = kernfs_node_dentry(cgrp->kn, sb); dput(dentry); + if (IS_ERR(nsdentry)) + deactivate_locked_super(sb); dentry = nsdentry; } - if (IS_ERR(dentry) || !new_sb) + if (!new_sb) cgroup_put(&root->cgrp); return dentry; @@ -2118,18 +2121,16 @@ static void cgroup_kill_sb(struct super_block *sb) struct cgroup_root *root = cgroup_root_from_kf(kf_root); /* - * If @root doesn't have any mounts or children, start killing it. + * If @root doesn't have any children, start killing it. * This prevents new mounts by disabling percpu_ref_tryget_live(). * cgroup_mount() may wait for @root's release. * * And don't kill the default root. */ - if (!list_empty(&root->cgrp.self.children) || - root == &cgrp_dfl_root) - cgroup_put(&root->cgrp); - else + if (list_empty(&root->cgrp.self.children) && root != &cgrp_dfl_root && + !percpu_ref_is_dying(&root->cgrp.self.refcnt)) percpu_ref_kill(&root->cgrp.self.refcnt); - + cgroup_put(&root->cgrp); kernfs_kill_sb(sb); } @@ -3533,6 +3534,16 @@ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, return ret ?: nbytes; } +static __poll_t cgroup_file_poll(struct kernfs_open_file *of, poll_table *pt) +{ + struct cftype *cft = of->kn->priv; + + if (cft->poll) + return cft->poll(of, pt); + + return kernfs_generic_poll(of, pt); +} + static void *cgroup_seqfile_start(struct seq_file *seq, loff_t *ppos) { return seq_cft(seq)->seq_start(seq, ppos); @@ -3571,6 +3582,7 @@ static struct kernfs_ops cgroup_kf_single_ops = { .open = cgroup_file_open, .release = cgroup_file_release, .write = cgroup_file_write, + .poll = cgroup_file_poll, .seq_show = cgroup_seqfile_show, }; @@ -3579,6 +3591,7 @@ static struct kernfs_ops cgroup_kf_ops = { .open = cgroup_file_open, .release = cgroup_file_release, .write = cgroup_file_write, + .poll = cgroup_file_poll, .seq_start = cgroup_seqfile_start, .seq_next = cgroup_seqfile_next, .seq_stop = cgroup_seqfile_stop, @@ -5313,7 +5326,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early) have_fork_callback |= (bool)ss->fork << ss->id; have_exit_callback |= (bool)ss->exit << ss->id; - have_free_callback |= (bool)ss->free << ss->id; + have_release_callback |= (bool)ss->release << ss->id; have_canfork_callback |= (bool)ss->can_fork << ss->id; /* At system boot, before all subsystems have been @@ -5399,7 +5412,7 @@ int __init cgroup_init(void) hash_add(css_set_table, &init_css_set.hlist, css_set_hash(init_css_set.subsys)); - BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0, 0)); + BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0)); mutex_unlock(&cgroup_mutex); @@ -5749,16 +5762,19 @@ void cgroup_exit(struct task_struct *tsk) } while_each_subsys_mask(); } -void cgroup_free(struct task_struct *task) +void cgroup_release(struct task_struct *task) { - struct css_set *cset = task_css_set(task); struct cgroup_subsys *ss; int ssid; - do_each_subsys_mask(ss, ssid, have_free_callback) { - ss->free(task); + do_each_subsys_mask(ss, ssid, have_release_callback) { + ss->release(task); } while_each_subsys_mask(); +} +void cgroup_free(struct task_struct *task) +{ + struct css_set *cset = task_css_set(task); put_css_set(cset); } @@ -5996,7 +6012,7 @@ int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, int ret; mutex_lock(&cgroup_mutex); - ret = __cgroup_bpf_detach(cgrp, prog, type, flags); + ret = __cgroup_bpf_detach(cgrp, prog, type); mutex_unlock(&cgroup_mutex); return ret; } diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 479743db6c37..72afd55f70c6 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -203,19 +203,6 @@ static inline struct cpuset *parent_cs(struct cpuset *cs) return css_cs(cs->css.parent); } -#ifdef CONFIG_NUMA -static inline bool task_has_mempolicy(struct task_struct *task) -{ - return task->mempolicy; -} -#else -static inline bool task_has_mempolicy(struct task_struct *task) -{ - return false; -} -#endif - - /* bits in struct cpuset flags field */ typedef enum { CS_ONLINE, diff --git a/kernel/cgroup/pids.c b/kernel/cgroup/pids.c index 9829c67ebc0a..c9960baaa14f 100644 --- a/kernel/cgroup/pids.c +++ b/kernel/cgroup/pids.c @@ -247,7 +247,7 @@ static void pids_cancel_fork(struct task_struct *task) pids_uncharge(pids, 1); } -static void pids_free(struct task_struct *task) +static void pids_release(struct task_struct *task) { struct pids_cgroup *pids = css_pids(task_css(task, pids_cgrp_id)); @@ -342,7 +342,7 @@ struct cgroup_subsys pids_cgrp_subsys = { .cancel_attach = pids_cancel_attach, .can_fork = pids_can_fork, .cancel_fork = pids_cancel_fork, - .free = pids_free, + .release = pids_release, .legacy_cftypes = pids_files, .dfl_cftypes = pids_files, .threaded = true, diff --git a/kernel/cgroup/rdma.c b/kernel/cgroup/rdma.c index d3bbb757ee49..1d75ae7f1cb7 100644 --- a/kernel/cgroup/rdma.c +++ b/kernel/cgroup/rdma.c @@ -313,10 +313,8 @@ EXPORT_SYMBOL(rdmacg_try_charge); * If IB stack wish a device to participate in rdma cgroup resource * tracking, it must invoke this API to register with rdma cgroup before * any user space application can start using the RDMA resources. - * Returns 0 on success or EINVAL when table length given is beyond - * supported size. */ -int rdmacg_register_device(struct rdmacg_device *device) +void rdmacg_register_device(struct rdmacg_device *device) { INIT_LIST_HEAD(&device->dev_node); INIT_LIST_HEAD(&device->rpools); @@ -324,7 +322,6 @@ int rdmacg_register_device(struct rdmacg_device *device) mutex_lock(&rdmacg_mutex); list_add_tail(&device->dev_node, &rdmacg_devices); mutex_unlock(&rdmacg_mutex); - return 0; } EXPORT_SYMBOL(rdmacg_register_device); diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index d503d1a9007c..bb95a35e8c2d 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -87,7 +87,6 @@ static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos, struct cgroup *root, int cpu) { struct cgroup_rstat_cpu *rstatc; - struct cgroup *parent; if (pos == root) return NULL; @@ -115,8 +114,8 @@ static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos, * However, due to the way we traverse, @pos will be the first * child in most cases. The only exception is @root. */ - parent = cgroup_parent(pos); - if (parent && rstatc->updated_next) { + if (rstatc->updated_next) { + struct cgroup *parent = cgroup_parent(pos); struct cgroup_rstat_cpu *prstatc = cgroup_rstat_cpu(parent, cpu); struct cgroup_rstat_cpu *nrstatc; struct cgroup **nextp; @@ -140,9 +139,12 @@ static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos, * updated stat. */ smp_mb(); + + return pos; } - return pos; + /* only happens for @root */ + return NULL; } /* see cgroup_rstat_flush() */ |