diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-02-17 00:41:38 +0300 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-02-17 00:41:38 +0300 |
| commit | 9702969978695d9a699a1f34771580cdbb153b33 (patch) | |
| tree | 7f34b1e6a45f7f1131e2362420113210308cd714 | |
| parent | 45a43ac5acc90b8f4835eea92692f620e561a06b (diff) | |
| parent | 27125df9a5d3b4cfd03bce3a8ec405a368cc9aae (diff) | |
| download | linux-9702969978695d9a699a1f34771580cdbb153b33.tar.xz | |
Merge tag 'slab-for-7.0-part2' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab
Pull more slab updates from Vlastimil Babka:
- Two stable fixes for kmalloc_nolock() usage from NMI context (Harry
Yoo)
- Allow kmalloc_nolock() allocations to be freed with kfree() and thus
also kfree_rcu() and simplify slabobj_ext handling - we no longer
need to track how it was allocated to use the matching freeing
function (Harry Yoo)
* tag 'slab-for-7.0-part2' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab:
mm/slab: drop the OBJEXTS_NOSPIN_ALLOC flag from enum objext_flags
mm/slab: allow freeing kmalloc_nolock()'d objects using kfree[_rcu]()
mm/slab: use prandom if !allow_spin
mm/slab: do not access current->mems_allowed_seq if !allow_spin
| -rw-r--r-- | include/linux/memcontrol.h | 3 | ||||
| -rw-r--r-- | include/linux/rcupdate.h | 4 | ||||
| -rw-r--r-- | mm/kmemleak.c | 22 | ||||
| -rw-r--r-- | mm/slub.c | 80 |
4 files changed, 76 insertions, 33 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 1baee139999f..52bfe4157623 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -359,8 +359,7 @@ enum objext_flags { * MEMCG_DATA_OBJEXTS. */ OBJEXTS_ALLOC_FAIL = __OBJEXTS_ALLOC_FAIL, - /* slabobj_ext vector allocated with kmalloc_nolock() */ - OBJEXTS_NOSPIN_ALLOC = __FIRST_OBJEXT_FLAG, + __OBJEXTS_FLAG_UNUSED = __FIRST_OBJEXT_FLAG, /* the next bit after the last actual flag */ __NR_OBJEXTS_FLAGS = (__FIRST_OBJEXT_FLAG << 1), }; diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 7729fef249e1..04f3f86a4145 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -1074,8 +1074,8 @@ static inline void rcu_read_unlock_migrate(void) * either fall back to use of call_rcu() or rearrange the structure to * position the rcu_head structure into the first 4096 bytes. * - * The object to be freed can be allocated either by kmalloc() or - * kmem_cache_alloc(). + * The object to be freed can be allocated either by kmalloc(), + * kmalloc_nolock(), or kmem_cache_alloc(). * * Note that the allowable offset might decrease in the future. * diff --git a/mm/kmemleak.c b/mm/kmemleak.c index fe33f2edfe07..d79acf5c5100 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -837,13 +837,12 @@ static void delete_object_full(unsigned long ptr, unsigned int objflags) struct kmemleak_object *object; object = find_and_remove_object(ptr, 0, objflags); - if (!object) { -#ifdef DEBUG - kmemleak_warn("Freeing unknown object at 0x%08lx\n", - ptr); -#endif + if (!object) + /* + * kmalloc_nolock() -> kfree() calls kmemleak_free() + * without kmemleak_alloc(). + */ return; - } __delete_object(object); } @@ -926,13 +925,12 @@ static void paint_ptr(unsigned long ptr, int color, unsigned int objflags) struct kmemleak_object *object; object = __find_and_get_object(ptr, 0, objflags); - if (!object) { - kmemleak_warn("Trying to color unknown object at 0x%08lx as %s\n", - ptr, - (color == KMEMLEAK_GREY) ? "Grey" : - (color == KMEMLEAK_BLACK) ? "Black" : "Unknown"); + if (!object) + /* + * kmalloc_nolock() -> kfree_rcu() calls kmemleak_ignore() + * without kmemleak_alloc(). + */ return; - } paint_it(object, color); put_object(object); } diff --git a/mm/slub.c b/mm/slub.c index 42df791279d9..865bc050f654 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -43,6 +43,7 @@ #include <linux/prefetch.h> #include <linux/memcontrol.h> #include <linux/random.h> +#include <linux/prandom.h> #include <kunit/test.h> #include <kunit/test-bug.h> #include <linux/sort.h> @@ -2189,8 +2190,6 @@ int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, virt_to_slab(vec)->slab_cache == s); new_exts = (unsigned long)vec; - if (unlikely(!allow_spin)) - new_exts |= OBJEXTS_NOSPIN_ALLOC; #ifdef CONFIG_MEMCG new_exts |= MEMCG_DATA_OBJEXTS; #endif @@ -2228,7 +2227,7 @@ retry: return 0; } -static inline void free_slab_obj_exts(struct slab *slab) +static inline void free_slab_obj_exts(struct slab *slab, bool allow_spin) { struct slabobj_ext *obj_exts; @@ -2256,10 +2255,10 @@ static inline void free_slab_obj_exts(struct slab *slab) * the extension for obj_exts is expected to be NULL. */ mark_objexts_empty(obj_exts); - if (unlikely(READ_ONCE(slab->obj_exts) & OBJEXTS_NOSPIN_ALLOC)) - kfree_nolock(obj_exts); - else + if (allow_spin) kfree(obj_exts); + else + kfree_nolock(obj_exts); slab->obj_exts = 0; } @@ -2323,7 +2322,7 @@ static int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, return 0; } -static inline void free_slab_obj_exts(struct slab *slab) +static inline void free_slab_obj_exts(struct slab *slab, bool allow_spin) { } @@ -2584,6 +2583,24 @@ struct rcu_delayed_free { * Returns true if freeing of the object can proceed, false if its reuse * was delayed by CONFIG_SLUB_RCU_DEBUG or KASAN quarantine, or it was returned * to KFENCE. + * + * For objects allocated via kmalloc_nolock(), only a subset of alloc hooks + * are invoked, so some free hooks must handle asymmetric hook calls. + * + * Alloc hooks called for kmalloc_nolock(): + * - kmsan_slab_alloc() + * - kasan_slab_alloc() + * - memcg_slab_post_alloc_hook() + * - alloc_tagging_slab_alloc_hook() + * + * Free hooks that must handle missing corresponding alloc hooks: + * - kmemleak_free_recursive() + * - kfence_free() + * + * Free hooks that have no alloc hook counterpart, and thus safe to call: + * - debug_check_no_locks_freed() + * - debug_check_no_obj_freed() + * - __kcsan_check_access() */ static __always_inline bool slab_free_hook(struct kmem_cache *s, void *x, bool init, @@ -3311,8 +3328,11 @@ static void *next_freelist_entry(struct kmem_cache *s, return (char *)start + idx; } +static DEFINE_PER_CPU(struct rnd_state, slab_rnd_state); + /* Shuffle the single linked freelist based on a random pre-computed sequence */ -static bool shuffle_freelist(struct kmem_cache *s, struct slab *slab) +static bool shuffle_freelist(struct kmem_cache *s, struct slab *slab, + bool allow_spin) { void *start; void *cur; @@ -3323,7 +3343,19 @@ static bool shuffle_freelist(struct kmem_cache *s, struct slab *slab) return false; freelist_count = oo_objects(s->oo); - pos = get_random_u32_below(freelist_count); + if (allow_spin) { + pos = get_random_u32_below(freelist_count); + } else { + struct rnd_state *state; + + /* + * An interrupt or NMI handler might interrupt and change + * the state in the middle, but that's safe. + */ + state = &get_cpu_var(slab_rnd_state); + pos = prandom_u32_state(state) % freelist_count; + put_cpu_var(slab_rnd_state); + } page_limit = slab->objects * s->size; start = fixup_red_left(s, slab_address(slab)); @@ -3350,7 +3382,8 @@ static inline int init_cache_random_seq(struct kmem_cache *s) return 0; } static inline void init_freelist_randomization(void) { } -static inline bool shuffle_freelist(struct kmem_cache *s, struct slab *slab) +static inline bool shuffle_freelist(struct kmem_cache *s, struct slab *slab, + bool allow_spin) { return false; } @@ -3369,14 +3402,14 @@ static __always_inline void account_slab(struct slab *slab, int order, } static __always_inline void unaccount_slab(struct slab *slab, int order, - struct kmem_cache *s) + struct kmem_cache *s, bool allow_spin) { /* * The slab object extensions should now be freed regardless of * whether mem_alloc_profiling_enabled() or not because profiling * might have been disabled after slab->obj_exts got allocated. */ - free_slab_obj_exts(slab); + free_slab_obj_exts(slab, allow_spin); mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s), -(PAGE_SIZE << order)); @@ -3441,7 +3474,7 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) alloc_slab_obj_exts_early(s, slab); account_slab(slab, oo_order(oo), s, flags); - shuffle = shuffle_freelist(s, slab); + shuffle = shuffle_freelist(s, slab, allow_spin); if (!shuffle) { start = fixup_red_left(s, start); @@ -3480,7 +3513,7 @@ static void __free_slab(struct kmem_cache *s, struct slab *slab, bool allow_spin page->mapping = NULL; __ClearPageSlab(page); mm_account_reclaimed_pages(pages); - unaccount_slab(slab, order, s); + unaccount_slab(slab, order, s, allow_spin); if (allow_spin) free_frozen_pages(page, order); else @@ -3791,6 +3824,7 @@ static void *get_from_any_partial(struct kmem_cache *s, struct partial_context * struct zone *zone; enum zone_type highest_zoneidx = gfp_zone(pc->flags); unsigned int cpuset_mems_cookie; + bool allow_spin = gfpflags_allow_spinning(pc->flags); /* * The defrag ratio allows a configuration of the tradeoffs between @@ -3815,7 +3849,15 @@ static void *get_from_any_partial(struct kmem_cache *s, struct partial_context * return NULL; do { - cpuset_mems_cookie = read_mems_allowed_begin(); + /* + * read_mems_allowed_begin() accesses current->mems_allowed_seq, + * a seqcount_spinlock_t that is not NMI-safe. Do not access + * current->mems_allowed_seq and avoid retry when GFP flags + * indicate spinning is not allowed. + */ + if (allow_spin) + cpuset_mems_cookie = read_mems_allowed_begin(); + zonelist = node_zonelist(mempolicy_slab_node(), pc->flags); for_each_zone_zonelist(zone, z, zonelist, highest_zoneidx) { struct kmem_cache_node *n; @@ -3839,7 +3881,7 @@ static void *get_from_any_partial(struct kmem_cache *s, struct partial_context * } } } - } while (read_mems_allowed_retry(cpuset_mems_cookie)); + } while (allow_spin && read_mems_allowed_retry(cpuset_mems_cookie)); #endif /* CONFIG_NUMA */ return NULL; } @@ -6372,7 +6414,7 @@ void kvfree_rcu_cb(struct rcu_head *head) /** * kfree - free previously allocated memory - * @object: pointer returned by kmalloc() or kmem_cache_alloc() + * @object: pointer returned by kmalloc(), kmalloc_nolock(), or kmem_cache_alloc() * * If @object is NULL, no operation is performed. */ @@ -6391,6 +6433,7 @@ void kfree(const void *object) page = virt_to_page(object); slab = page_slab(page); if (!slab) { + /* kmalloc_nolock() doesn't support large kmalloc */ free_large_kmalloc(page, (void *)object); return; } @@ -8337,6 +8380,9 @@ void __init kmem_cache_init_late(void) flushwq = alloc_workqueue("slub_flushwq", WQ_MEM_RECLAIM | WQ_PERCPU, 0); WARN_ON(!flushwq); +#ifdef CONFIG_SLAB_FREELIST_RANDOM + prandom_init_once(&slab_rnd_state); +#endif } int do_kmem_cache_create(struct kmem_cache *s, const char *name, |
