From bcfe06bf2622f7c4899468e427683aec49070687 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 1 Dec 2020 13:58:27 -0800 Subject: mm: memcontrol: Use helpers to read page's memcg data Patch series "mm: allow mapping accounted kernel pages to userspace", v6. Currently a non-slab kernel page which has been charged to a memory cgroup can't be mapped to userspace. The underlying reason is simple: PageKmemcg flag is defined as a page type (like buddy, offline, etc), so it takes a bit from a page->mapped counter. Pages with a type set can't be mapped to userspace. But in general the kmemcg flag has nothing to do with mapping to userspace. It only means that the page has been accounted by the page allocator, so it has to be properly uncharged on release. Some bpf maps are mapping the vmalloc-based memory to userspace, and their memory can't be accounted because of this implementation detail. This patchset removes this limitation by moving the PageKmemcg flag into one of the free bits of the page->mem_cgroup pointer. Also it formalizes accesses to the page->mem_cgroup and page->obj_cgroups using new helpers, adds several checks and removes a couple of obsolete functions. As the result the code became more robust with fewer open-coded bit tricks. This patch (of 4): Currently there are many open-coded reads of the page->mem_cgroup pointer, as well as a couple of read helpers, which are barely used. It creates an obstacle on a way to reuse some bits of the pointer for storing additional bits of information. In fact, we already do this for slab pages, where the last bit indicates that a pointer has an attached vector of objcg pointers instead of a regular memcg pointer. This commits uses 2 existing helpers and introduces a new helper to converts all read sides to calls of these helpers: struct mem_cgroup *page_memcg(struct page *page); struct mem_cgroup *page_memcg_rcu(struct page *page); struct mem_cgroup *page_memcg_check(struct page *page); page_memcg_check() is intended to be used in cases when the page can be a slab page and have a memcg pointer pointing at objcg vector. It does check the lowest bit, and if set, returns NULL. page_memcg() contains a VM_BUG_ON_PAGE() check for the page not being a slab page. To make sure nobody uses a direct access, struct page's mem_cgroup/obj_cgroups is converted to unsigned long memcg_data. Signed-off-by: Roman Gushchin Signed-off-by: Andrew Morton Signed-off-by: Alexei Starovoitov Reviewed-by: Shakeel Butt Acked-by: Johannes Weiner Acked-by: Michal Hocko Link: https://lkml.kernel.org/r/20201027001657.3398190-1-guro@fb.com Link: https://lkml.kernel.org/r/20201027001657.3398190-2-guro@fb.com Link: https://lore.kernel.org/bpf/20201201215900.3569844-2-guro@fb.com --- mm/slab.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'mm/slab.h') diff --git a/mm/slab.h b/mm/slab.h index 6d7c6a5056ba..e2535cee0d33 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -242,18 +242,17 @@ static inline bool kmem_cache_debug_flags(struct kmem_cache *s, slab_flags_t fla static inline struct obj_cgroup **page_obj_cgroups(struct page *page) { /* - * page->mem_cgroup and page->obj_cgroups are sharing the same + * Page's memory cgroup and obj_cgroups vector are sharing the same * space. To distinguish between them in case we don't know for sure * that the page is a slab page (e.g. page_cgroup_ino()), let's * always set the lowest bit of obj_cgroups. */ - return (struct obj_cgroup **) - ((unsigned long)page->obj_cgroups & ~0x1UL); + return (struct obj_cgroup **)(page->memcg_data & ~0x1UL); } static inline bool page_has_obj_cgroups(struct page *page) { - return ((unsigned long)page->obj_cgroups & 0x1UL); + return page->memcg_data & 0x1UL; } int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s, @@ -262,7 +261,7 @@ int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s, static inline void memcg_free_page_obj_cgroups(struct page *page) { kfree(page_obj_cgroups(page)); - page->obj_cgroups = NULL; + page->memcg_data = 0; } static inline size_t obj_full_size(struct kmem_cache *s) -- cgit v1.2.3 From 270c6a71460e12b07b1dcadf7457ff95b6c6e8f4 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 1 Dec 2020 13:58:28 -0800 Subject: mm: memcontrol/slab: Use helpers to access slab page's memcg_data To gather all direct accesses to struct page's memcg_data field in one place, let's introduce 3 new helpers to use in the slab accounting code: struct obj_cgroup **page_objcgs(struct page *page); struct obj_cgroup **page_objcgs_check(struct page *page); bool set_page_objcgs(struct page *page, struct obj_cgroup **objcgs); They are similar to the corresponding API for generic pages, except that the setter can return false, indicating that the value has been already set from a different thread. Signed-off-by: Roman Gushchin Signed-off-by: Andrew Morton Signed-off-by: Alexei Starovoitov Reviewed-by: Shakeel Butt Acked-by: Johannes Weiner Link: https://lkml.kernel.org/r/20201027001657.3398190-3-guro@fb.com Link: https://lore.kernel.org/bpf/20201201215900.3569844-3-guro@fb.com --- include/linux/memcontrol.h | 64 ++++++++++++++++++++++++++++++++++++++++++++++ mm/memcontrol.c | 6 ++--- mm/slab.h | 35 ++++++------------------- 3 files changed, 75 insertions(+), 30 deletions(-) (limited to 'mm/slab.h') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index f95c1433461c..c7ac0a5b8989 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -416,6 +416,70 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page) return (struct mem_cgroup *)memcg_data; } +#ifdef CONFIG_MEMCG_KMEM +/* + * page_objcgs - get the object cgroups vector associated with a page + * @page: a pointer to the page struct + * + * Returns a pointer to the object cgroups vector associated with the page, + * or NULL. This function assumes that the page is known to have an + * associated object cgroups vector. It's not safe to call this function + * against pages, which might have an associated memory cgroup: e.g. + * kernel stack pages. + */ +static inline struct obj_cgroup **page_objcgs(struct page *page) +{ + return (struct obj_cgroup **)(READ_ONCE(page->memcg_data) & ~0x1UL); +} + +/* + * page_objcgs_check - get the object cgroups vector associated with a page + * @page: a pointer to the page struct + * + * Returns a pointer to the object cgroups vector associated with the page, + * or NULL. This function is safe to use if the page can be directly associated + * with a memory cgroup. + */ +static inline struct obj_cgroup **page_objcgs_check(struct page *page) +{ + unsigned long memcg_data = READ_ONCE(page->memcg_data); + + if (memcg_data && (memcg_data & 0x1UL)) + return (struct obj_cgroup **)(memcg_data & ~0x1UL); + + return NULL; +} + +/* + * set_page_objcgs - associate a page with a object cgroups vector + * @page: a pointer to the page struct + * @objcgs: a pointer to the object cgroups vector + * + * Atomically associates a page with a vector of object cgroups. + */ +static inline bool set_page_objcgs(struct page *page, + struct obj_cgroup **objcgs) +{ + return !cmpxchg(&page->memcg_data, 0, (unsigned long)objcgs | 0x1UL); +} +#else +static inline struct obj_cgroup **page_objcgs(struct page *page) +{ + return NULL; +} + +static inline struct obj_cgroup **page_objcgs_check(struct page *page) +{ + return NULL; +} + +static inline bool set_page_objcgs(struct page *page, + struct obj_cgroup **objcgs) +{ + return true; +} +#endif + static __always_inline bool memcg_stat_item_in_bytes(int idx) { if (idx == MEMCG_PERCPU_B) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 3968d68503cb..0054b4846770 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2899,7 +2899,7 @@ int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s, if (!vec) return -ENOMEM; - if (cmpxchg(&page->memcg_data, 0, (unsigned long)vec | 0x1UL)) + if (!set_page_objcgs(page, vec)) kfree(vec); else kmemleak_not_leak(vec); @@ -2933,12 +2933,12 @@ struct mem_cgroup *mem_cgroup_from_obj(void *p) * Memcg membership data for each individual object is saved in * the page->obj_cgroups. */ - if (page_has_obj_cgroups(page)) { + if (page_objcgs_check(page)) { struct obj_cgroup *objcg; unsigned int off; off = obj_to_index(page->slab_cache, page, p); - objcg = page_obj_cgroups(page)[off]; + objcg = page_objcgs(page)[off]; if (objcg) return obj_cgroup_memcg(objcg); diff --git a/mm/slab.h b/mm/slab.h index e2535cee0d33..9a54a0cb5cca 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -239,28 +239,12 @@ static inline bool kmem_cache_debug_flags(struct kmem_cache *s, slab_flags_t fla } #ifdef CONFIG_MEMCG_KMEM -static inline struct obj_cgroup **page_obj_cgroups(struct page *page) -{ - /* - * Page's memory cgroup and obj_cgroups vector are sharing the same - * space. To distinguish between them in case we don't know for sure - * that the page is a slab page (e.g. page_cgroup_ino()), let's - * always set the lowest bit of obj_cgroups. - */ - return (struct obj_cgroup **)(page->memcg_data & ~0x1UL); -} - -static inline bool page_has_obj_cgroups(struct page *page) -{ - return page->memcg_data & 0x1UL; -} - int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s, gfp_t gfp); static inline void memcg_free_page_obj_cgroups(struct page *page) { - kfree(page_obj_cgroups(page)); + kfree(page_objcgs(page)); page->memcg_data = 0; } @@ -322,7 +306,7 @@ static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s, if (likely(p[i])) { page = virt_to_head_page(p[i]); - if (!page_has_obj_cgroups(page) && + if (!page_objcgs(page) && memcg_alloc_page_obj_cgroups(page, s, flags)) { obj_cgroup_uncharge(objcg, obj_full_size(s)); continue; @@ -330,7 +314,7 @@ static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s, off = obj_to_index(s, page, p[i]); obj_cgroup_get(objcg); - page_obj_cgroups(page)[off] = objcg; + page_objcgs(page)[off] = objcg; mod_objcg_state(objcg, page_pgdat(page), cache_vmstat_idx(s), obj_full_size(s)); } else { @@ -344,6 +328,7 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s_orig, void **p, int objects) { struct kmem_cache *s; + struct obj_cgroup **objcgs; struct obj_cgroup *objcg; struct page *page; unsigned int off; @@ -357,7 +342,8 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s_orig, continue; page = virt_to_head_page(p[i]); - if (!page_has_obj_cgroups(page)) + objcgs = page_objcgs(page); + if (!objcgs) continue; if (!s_orig) @@ -366,11 +352,11 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s_orig, s = s_orig; off = obj_to_index(s, page, p[i]); - objcg = page_obj_cgroups(page)[off]; + objcg = objcgs[off]; if (!objcg) continue; - page_obj_cgroups(page)[off] = NULL; + objcgs[off] = NULL; obj_cgroup_uncharge(objcg, obj_full_size(s)); mod_objcg_state(objcg, page_pgdat(page), cache_vmstat_idx(s), -obj_full_size(s)); @@ -379,11 +365,6 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s_orig, } #else /* CONFIG_MEMCG_KMEM */ -static inline bool page_has_obj_cgroups(struct page *page) -{ - return false; -} - static inline struct mem_cgroup *memcg_from_slab_obj(void *ptr) { return NULL; -- cgit v1.2.3