From 4835f747d3ed181bf2c67930fe06b2c01a5d2323 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Wed, 23 Oct 2024 10:07:59 -0700 Subject: alloc_tag: support for page allocation tag compression Implement support for storing page allocation tag references directly in the page flags instead of page extensions. sysctl.vm.mem_profiling boot parameter it extended to provide a way for a user to request this mode. Enabling compression eliminates memory overhead caused by page_ext and results in better performance for page allocations. However this mode will not work if the number of available page flag bits is insufficient to address all kernel allocations. Such condition can happen during boot or when loading a module. If this condition is detected, memory allocation profiling gets disabled with an appropriate warning. By default compression mode is disabled. Link: https://lkml.kernel.org/r/20241023170759.999909-7-surenb@google.com Signed-off-by: Suren Baghdasaryan Reviewed-by: Pasha Tatashin Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Borislav Petkov (AMD) Cc: Christoph Hellwig Cc: Daniel Gomez Cc: David Hildenbrand Cc: Davidlohr Bueso Cc: David Rientjes Cc: Dennis Zhou Cc: Johannes Weiner Cc: John Hubbard Cc: Jonathan Corbet Cc: Joonsoo Kim Cc: Kalesh Singh Cc: Kees Cook Cc: Kent Overstreet Cc: Liam R. Howlett Cc: Luis Chamberlain Cc: Matthew Wilcox Cc: Michal Hocko Cc: Mike Rapoport (Microsoft) Cc: Minchan Kim Cc: Paul E. McKenney Cc: Petr Pavlu Cc: Roman Gushchin Cc: Sami Tolvanen Cc: Sourav Panda Cc: Steven Rostedt (Google) Cc: Thomas Gleixner Cc: Thomas Huth Cc: Uladzislau Rezki (Sony) Cc: Vlastimil Babka Cc: Xiongwei Song Cc: Yu Zhao Signed-off-by: Andrew Morton --- include/linux/alloc_tag.h | 10 ++- include/linux/codetag.h | 3 + include/linux/page-flags-layout.h | 7 ++ include/linux/pgalloc_tag.h | 145 +++++++++++++++++++++++++++++++++----- 4 files changed, 147 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h index 55d30543c4c7..7c0786bdf9af 100644 --- a/include/linux/alloc_tag.h +++ b/include/linux/alloc_tag.h @@ -30,8 +30,16 @@ struct alloc_tag { struct alloc_tag_counters __percpu *counters; } __aligned(8); +struct alloc_tag_kernel_section { + struct alloc_tag *first_tag; + unsigned long count; +}; + struct alloc_tag_module_section { - unsigned long start_addr; + union { + unsigned long start_addr; + struct alloc_tag *first_tag; + }; unsigned long end_addr; /* used size */ unsigned long size; diff --git a/include/linux/codetag.h b/include/linux/codetag.h index d10bd9810d32..d14dbd26b370 100644 --- a/include/linux/codetag.h +++ b/include/linux/codetag.h @@ -13,6 +13,9 @@ struct codetag_module; struct seq_buf; struct module; +#define CODETAG_SECTION_START_PREFIX "__start_" +#define CODETAG_SECTION_STOP_PREFIX "__stop_" + /* * An instance of this structure is created in a special ELF section at every * code location being tagged. At runtime, the special section is treated as diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h index 7d79818dc065..4f5c9e979bb9 100644 --- a/include/linux/page-flags-layout.h +++ b/include/linux/page-flags-layout.h @@ -111,5 +111,12 @@ ZONES_WIDTH - LRU_GEN_WIDTH - SECTIONS_WIDTH - \ NODES_WIDTH - KASAN_TAG_WIDTH - LAST_CPUPID_WIDTH) +#define NR_NON_PAGEFLAG_BITS (SECTIONS_WIDTH + NODES_WIDTH + ZONES_WIDTH + \ + LAST_CPUPID_SHIFT + KASAN_TAG_WIDTH + \ + LRU_GEN_WIDTH + LRU_REFS_WIDTH) + +#define NR_UNUSED_PAGEFLAG_BITS (BITS_PER_LONG - \ + (NR_NON_PAGEFLAG_BITS + NR_PAGEFLAGS)) + #endif #endif /* _LINUX_PAGE_FLAGS_LAYOUT */ diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h index b13cd3313a88..1fe63b52e5e5 100644 --- a/include/linux/pgalloc_tag.h +++ b/include/linux/pgalloc_tag.h @@ -11,29 +11,118 @@ #include +extern struct page_ext_operations page_alloc_tagging_ops; +extern unsigned long alloc_tag_ref_mask; +extern int alloc_tag_ref_offs; +extern struct alloc_tag_kernel_section kernel_tags; + +DECLARE_STATIC_KEY_FALSE(mem_profiling_compressed); + +typedef u16 pgalloc_tag_idx; + union pgtag_ref_handle { union codetag_ref *ref; /* reference in page extension */ + struct page *page; /* reference in page flags */ }; -extern struct page_ext_operations page_alloc_tagging_ops; +/* Reserved indexes */ +#define CODETAG_ID_NULL 0 +#define CODETAG_ID_EMPTY 1 +#define CODETAG_ID_FIRST 2 + +#ifdef CONFIG_MODULES + +extern struct alloc_tag_module_section module_tags; + +static inline struct alloc_tag *module_idx_to_tag(pgalloc_tag_idx idx) +{ + return &module_tags.first_tag[idx - kernel_tags.count]; +} + +static inline pgalloc_tag_idx module_tag_to_idx(struct alloc_tag *tag) +{ + return CODETAG_ID_FIRST + kernel_tags.count + (tag - module_tags.first_tag); +} + +#else /* CONFIG_MODULES */ + +static inline struct alloc_tag *module_idx_to_tag(pgalloc_tag_idx idx) +{ + pr_warn("invalid page tag reference %lu\n", (unsigned long)idx); + return NULL; +} + +static inline pgalloc_tag_idx module_tag_to_idx(struct alloc_tag *tag) +{ + pr_warn("invalid page tag 0x%lx\n", (unsigned long)tag); + return CODETAG_ID_NULL; +} + +#endif /* CONFIG_MODULES */ + +static inline void idx_to_ref(pgalloc_tag_idx idx, union codetag_ref *ref) +{ + switch (idx) { + case (CODETAG_ID_NULL): + ref->ct = NULL; + break; + case (CODETAG_ID_EMPTY): + set_codetag_empty(ref); + break; + default: + idx -= CODETAG_ID_FIRST; + ref->ct = idx < kernel_tags.count ? + &kernel_tags.first_tag[idx].ct : + &module_idx_to_tag(idx)->ct; + break; + } +} + +static inline pgalloc_tag_idx ref_to_idx(union codetag_ref *ref) +{ + struct alloc_tag *tag; + + if (!ref->ct) + return CODETAG_ID_NULL; + + if (is_codetag_empty(ref)) + return CODETAG_ID_EMPTY; + + tag = ct_to_alloc_tag(ref->ct); + if (tag >= kernel_tags.first_tag && tag < kernel_tags.first_tag + kernel_tags.count) + return CODETAG_ID_FIRST + (tag - kernel_tags.first_tag); + + return module_tag_to_idx(tag); +} + + /* Should be called only if mem_alloc_profiling_enabled() */ static inline bool get_page_tag_ref(struct page *page, union codetag_ref *ref, union pgtag_ref_handle *handle) { - struct page_ext *page_ext; - union codetag_ref *tmp; - if (!page) return false; - page_ext = page_ext_get(page); - if (!page_ext) - return false; + if (static_key_enabled(&mem_profiling_compressed)) { + pgalloc_tag_idx idx; + + idx = (page->flags >> alloc_tag_ref_offs) & alloc_tag_ref_mask; + idx_to_ref(idx, ref); + handle->page = page; + } else { + struct page_ext *page_ext; + union codetag_ref *tmp; + + page_ext = page_ext_get(page); + if (!page_ext) + return false; + + tmp = (union codetag_ref *)page_ext_data(page_ext, &page_alloc_tagging_ops); + ref->ct = tmp->ct; + handle->ref = tmp; + } - tmp = (union codetag_ref *)page_ext_data(page_ext, &page_alloc_tagging_ops); - ref->ct = tmp->ct; - handle->ref = tmp; return true; } @@ -42,16 +131,35 @@ static inline void put_page_tag_ref(union pgtag_ref_handle handle) if (WARN_ON(!handle.ref)) return; - page_ext_put((void *)handle.ref - page_alloc_tagging_ops.offset); + if (!static_key_enabled(&mem_profiling_compressed)) + page_ext_put((void *)handle.ref - page_alloc_tagging_ops.offset); } -static inline void update_page_tag_ref(union pgtag_ref_handle handle, - union codetag_ref *ref) +static inline void update_page_tag_ref(union pgtag_ref_handle handle, union codetag_ref *ref) { - if (WARN_ON(!handle.ref || !ref)) - return; - - handle.ref->ct = ref->ct; + if (static_key_enabled(&mem_profiling_compressed)) { + struct page *page = handle.page; + unsigned long old_flags; + unsigned long flags; + unsigned long idx; + + if (WARN_ON(!page || !ref)) + return; + + idx = (unsigned long)ref_to_idx(ref); + idx = (idx & alloc_tag_ref_mask) << alloc_tag_ref_offs; + do { + old_flags = READ_ONCE(page->flags); + flags = old_flags; + flags &= ~(alloc_tag_ref_mask << alloc_tag_ref_offs); + flags |= idx; + } while (unlikely(!try_cmpxchg(&page->flags, &old_flags, flags))); + } else { + if (WARN_ON(!handle.ref || !ref)) + return; + + handle.ref->ct = ref->ct; + } } static inline void clear_page_tag_ref(struct page *page) @@ -122,6 +230,8 @@ static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) this_cpu_sub(tag->counters->bytes, PAGE_SIZE * nr); } +void __init alloc_tag_sec_init(void); + #else /* CONFIG_MEM_ALLOC_PROFILING */ static inline void clear_page_tag_ref(struct page *page) {} @@ -130,6 +240,7 @@ static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {} static inline struct alloc_tag *pgalloc_tag_get(struct page *page) { return NULL; } static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) {} +static inline void alloc_tag_sec_init(void) {} #endif /* CONFIG_MEM_ALLOC_PROFILING */ -- cgit v1.2.3