diff options
author | Roman Gushchin <guro@fb.com> | 2020-08-07 09:21:10 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-08-07 21:33:25 +0300 |
commit | 9855609bde03e2472b99a95e869d29ee1e78a751 (patch) | |
tree | efc534e04c3e1acbc1dcb4dcb681c30bd072fc59 /mm/memcontrol.c | |
parent | 0f876e4dc55db5fafef774917fd66e1373c0f390 (diff) | |
download | linux-9855609bde03e2472b99a95e869d29ee1e78a751.tar.xz |
mm: memcg/slab: use a single set of kmem_caches for all accounted allocations
This is fairly big but mostly red patch, which makes all accounted slab
allocations use a single set of kmem_caches instead of creating a separate
set for each memory cgroup.
Because the number of non-root kmem_caches is now capped by the number of
root kmem_caches, there is no need to shrink or destroy them prematurely.
They can be perfectly destroyed together with their root counterparts.
This allows to dramatically simplify the management of non-root
kmem_caches and delete a ton of code.
This patch performs the following changes:
1) introduces memcg_params.memcg_cache pointer to represent the
kmem_cache which will be used for all non-root allocations
2) reuses the existing memcg kmem_cache creation mechanism
to create memcg kmem_cache on the first allocation attempt
3) memcg kmem_caches are named <kmemcache_name>-memcg,
e.g. dentry-memcg
4) simplifies memcg_kmem_get_cache() to just return memcg kmem_cache
or schedule it's creation and return the root cache
5) removes almost all non-root kmem_cache management code
(separate refcounter, reparenting, shrinking, etc)
6) makes slab debugfs to display root_mem_cgroup css id and never
show :dead and :deact flags in the memcg_slabinfo attribute.
Following patches in the series will simplify the kmem_cache creation.
Signed-off-by: Roman Gushchin <guro@fb.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/20200623174037.3951353-13-guro@fb.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 163 |
1 files changed, 32 insertions, 131 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 5cb2a588cc10..874704c4a48a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -350,7 +350,7 @@ static void memcg_reparent_objcgs(struct mem_cgroup *memcg, } /* - * This will be the memcg's index in each cache's ->memcg_params.memcg_caches. + * This will be used as a shrinker list's index. * The main reason for not using cgroup id for this: * this works better in sparse environments, where we have a lot of memcgs, * but only a few kmem-limited. Or also, if we have, for instance, 200 @@ -569,20 +569,16 @@ ino_t page_cgroup_ino(struct page *page) unsigned long ino = 0; rcu_read_lock(); - if (PageSlab(page) && !PageTail(page)) { - memcg = memcg_from_slab_page(page); - } else { - memcg = page->mem_cgroup; + memcg = page->mem_cgroup; - /* - * The lowest bit set means that memcg isn't a valid - * memcg pointer, but a obj_cgroups pointer. - * In this case the page is shared and doesn't belong - * to any specific memory cgroup. - */ - if ((unsigned long) memcg & 0x1UL) - memcg = NULL; - } + /* + * The lowest bit set means that memcg isn't a valid + * memcg pointer, but a obj_cgroups pointer. + * In this case the page is shared and doesn't belong + * to any specific memory cgroup. + */ + if ((unsigned long) memcg & 0x1UL) + memcg = NULL; while (memcg && !(memcg->css.flags & CSS_ONLINE)) memcg = parent_mem_cgroup(memcg); @@ -2822,12 +2818,18 @@ struct mem_cgroup *mem_cgroup_from_obj(void *p) page = virt_to_head_page(p); /* - * Slab pages don't have page->mem_cgroup set because corresponding - * kmem caches can be reparented during the lifetime. That's why - * memcg_from_slab_page() should be used instead. + * Slab objects are accounted individually, not per-page. + * Memcg membership data for each individual object is saved in + * the page->obj_cgroups. */ - if (PageSlab(page)) - return memcg_from_slab_page(page); + if (page_has_obj_cgroups(page)) { + struct obj_cgroup *objcg; + unsigned int off; + + off = obj_to_index(page->slab_cache, page, p); + objcg = page_obj_cgroups(page)[off]; + return obj_cgroup_memcg(objcg); + } /* All other pages use page->mem_cgroup */ return page->mem_cgroup; @@ -2882,9 +2884,7 @@ static int memcg_alloc_cache_id(void) else if (size > MEMCG_CACHES_MAX_SIZE) size = MEMCG_CACHES_MAX_SIZE; - err = memcg_update_all_caches(size); - if (!err) - err = memcg_update_all_list_lrus(size); + err = memcg_update_all_list_lrus(size); if (!err) memcg_nr_cache_ids = size; @@ -2903,7 +2903,6 @@ static void memcg_free_cache_id(int id) } struct memcg_kmem_cache_create_work { - struct mem_cgroup *memcg; struct kmem_cache *cachep; struct work_struct work; }; @@ -2912,33 +2911,24 @@ static void memcg_kmem_cache_create_func(struct work_struct *w) { struct memcg_kmem_cache_create_work *cw = container_of(w, struct memcg_kmem_cache_create_work, work); - struct mem_cgroup *memcg = cw->memcg; struct kmem_cache *cachep = cw->cachep; - memcg_create_kmem_cache(memcg, cachep); + memcg_create_kmem_cache(cachep); - css_put(&memcg->css); kfree(cw); } /* * Enqueue the creation of a per-memcg kmem_cache. */ -static void memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg, - struct kmem_cache *cachep) +static void memcg_schedule_kmem_cache_create(struct kmem_cache *cachep) { struct memcg_kmem_cache_create_work *cw; - if (!css_tryget_online(&memcg->css)) - return; - cw = kmalloc(sizeof(*cw), GFP_NOWAIT | __GFP_NOWARN); - if (!cw) { - css_put(&memcg->css); + if (!cw) return; - } - cw->memcg = memcg; cw->cachep = cachep; INIT_WORK(&cw->work, memcg_kmem_cache_create_func); @@ -2946,102 +2936,26 @@ static void memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg, } /** - * memcg_kmem_get_cache: select the correct per-memcg cache for allocation + * memcg_kmem_get_cache: select memcg or root cache for allocation * @cachep: the original global kmem cache * * Return the kmem_cache we're supposed to use for a slab allocation. - * We try to use the current memcg's version of the cache. * * If the cache does not exist yet, if we are the first user of it, we * create it asynchronously in a workqueue and let the current allocation * go through with the original cache. - * - * This function takes a reference to the cache it returns to assure it - * won't get destroyed while we are working with it. Once the caller is - * done with it, memcg_kmem_put_cache() must be called to release the - * reference. */ -struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep, - struct obj_cgroup **objcgp) +struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep) { - struct mem_cgroup *memcg; struct kmem_cache *memcg_cachep; - struct memcg_cache_array *arr; - int kmemcg_id; - VM_BUG_ON(!is_root_cache(cachep)); - - if (memcg_kmem_bypass()) + memcg_cachep = READ_ONCE(cachep->memcg_params.memcg_cache); + if (unlikely(!memcg_cachep)) { + memcg_schedule_kmem_cache_create(cachep); return cachep; - - rcu_read_lock(); - - if (unlikely(current->active_memcg)) - memcg = current->active_memcg; - else - memcg = mem_cgroup_from_task(current); - - if (!memcg || memcg == root_mem_cgroup) - goto out_unlock; - - kmemcg_id = READ_ONCE(memcg->kmemcg_id); - if (kmemcg_id < 0) - goto out_unlock; - - arr = rcu_dereference(cachep->memcg_params.memcg_caches); - - /* - * Make sure we will access the up-to-date value. The code updating - * memcg_caches issues a write barrier to match the data dependency - * barrier inside READ_ONCE() (see memcg_create_kmem_cache()). - */ - memcg_cachep = READ_ONCE(arr->entries[kmemcg_id]); - - /* - * If we are in a safe context (can wait, and not in interrupt - * context), we could be be predictable and return right away. - * This would guarantee that the allocation being performed - * already belongs in the new cache. - * - * However, there are some clashes that can arrive from locking. - * For instance, because we acquire the slab_mutex while doing - * memcg_create_kmem_cache, this means no further allocation - * could happen with the slab_mutex held. So it's better to - * defer everything. - * - * If the memcg is dying or memcg_cache is about to be released, - * don't bother creating new kmem_caches. Because memcg_cachep - * is ZEROed as the fist step of kmem offlining, we don't need - * percpu_ref_tryget_live() here. css_tryget_online() check in - * memcg_schedule_kmem_cache_create() will prevent us from - * creation of a new kmem_cache. - */ - if (unlikely(!memcg_cachep)) - memcg_schedule_kmem_cache_create(memcg, cachep); - else if (percpu_ref_tryget(&memcg_cachep->memcg_params.refcnt)) { - struct obj_cgroup *objcg = rcu_dereference(memcg->objcg); - - if (!objcg || !obj_cgroup_tryget(objcg)) { - percpu_ref_put(&memcg_cachep->memcg_params.refcnt); - goto out_unlock; - } - - *objcgp = objcg; - cachep = memcg_cachep; } -out_unlock: - rcu_read_unlock(); - return cachep; -} -/** - * memcg_kmem_put_cache: drop reference taken by memcg_kmem_get_cache - * @cachep: the cache returned by memcg_kmem_get_cache - */ -void memcg_kmem_put_cache(struct kmem_cache *cachep) -{ - if (!is_root_cache(cachep)) - percpu_ref_put(&cachep->memcg_params.refcnt); + return memcg_cachep; } /** @@ -3731,7 +3645,6 @@ static int memcg_online_kmem(struct mem_cgroup *memcg) */ memcg->kmemcg_id = memcg_id; memcg->kmem_state = KMEM_ONLINE; - INIT_LIST_HEAD(&memcg->kmem_caches); return 0; } @@ -3744,22 +3657,13 @@ static void memcg_offline_kmem(struct mem_cgroup *memcg) if (memcg->kmem_state != KMEM_ONLINE) return; - /* - * Clear the online state before clearing memcg_caches array - * entries. The slab_mutex in memcg_deactivate_kmem_caches() - * guarantees that no cache will be created for this cgroup - * after we are done (see memcg_create_kmem_cache()). - */ + memcg->kmem_state = KMEM_ALLOCATED; parent = parent_mem_cgroup(memcg); if (!parent) parent = root_mem_cgroup; - /* - * Deactivate and reparent kmem_caches and objcgs. - */ - memcg_deactivate_kmem_caches(memcg, parent); memcg_reparent_objcgs(memcg, parent); kmemcg_id = memcg->kmemcg_id; @@ -5384,9 +5288,6 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) /* The following stuff does not apply to the root */ if (!parent) { -#ifdef CONFIG_MEMCG_KMEM - INIT_LIST_HEAD(&memcg->kmem_caches); -#endif root_mem_cgroup = memcg; return &memcg->css; } |