diff options
Diffstat (limited to 'mm/kfence/core.c')
-rw-r--r-- | mm/kfence/core.c | 156 |
1 files changed, 142 insertions, 14 deletions
diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 84555b8233ef..51ea9193cecb 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -10,12 +10,15 @@ #include <linux/atomic.h> #include <linux/bug.h> #include <linux/debugfs.h> +#include <linux/hash.h> #include <linux/irq_work.h> +#include <linux/jhash.h> #include <linux/kcsan-checks.h> #include <linux/kfence.h> #include <linux/kmemleak.h> #include <linux/list.h> #include <linux/lockdep.h> +#include <linux/log2.h> #include <linux/memblock.h> #include <linux/moduleparam.h> #include <linux/random.h> @@ -82,6 +85,10 @@ static const struct kernel_param_ops sample_interval_param_ops = { }; module_param_cb(sample_interval, &sample_interval_param_ops, &kfence_sample_interval, 0600); +/* Pool usage% threshold when currently covered allocations are skipped. */ +static unsigned long kfence_skip_covered_thresh __read_mostly = 75; +module_param_named(skip_covered_thresh, kfence_skip_covered_thresh, ulong, 0644); + /* The pool of pages used for guard pages and objects. */ char *__kfence_pool __ro_after_init; EXPORT_SYMBOL(__kfence_pool); /* Export for test modules. */ @@ -106,6 +113,32 @@ DEFINE_STATIC_KEY_FALSE(kfence_allocation_key); /* Gates the allocation, ensuring only one succeeds in a given period. */ atomic_t kfence_allocation_gate = ATOMIC_INIT(1); +/* + * A Counting Bloom filter of allocation coverage: limits currently covered + * allocations of the same source filling up the pool. + * + * Assuming a range of 15%-85% unique allocations in the pool at any point in + * time, the below parameters provide a probablity of 0.02-0.33 for false + * positive hits respectively: + * + * P(alloc_traces) = (1 - e^(-HNUM * (alloc_traces / SIZE)) ^ HNUM + */ +#define ALLOC_COVERED_HNUM 2 +#define ALLOC_COVERED_ORDER (const_ilog2(CONFIG_KFENCE_NUM_OBJECTS) + 2) +#define ALLOC_COVERED_SIZE (1 << ALLOC_COVERED_ORDER) +#define ALLOC_COVERED_HNEXT(h) hash_32(h, ALLOC_COVERED_ORDER) +#define ALLOC_COVERED_MASK (ALLOC_COVERED_SIZE - 1) +static atomic_t alloc_covered[ALLOC_COVERED_SIZE]; + +/* Stack depth used to determine uniqueness of an allocation. */ +#define UNIQUE_ALLOC_STACK_DEPTH ((size_t)8) + +/* + * Randomness for stack hashes, making the same collisions across reboots and + * different machines less likely. + */ +static u32 stack_hash_seed __ro_after_init; + /* Statistics counters for debugfs. */ enum kfence_counter_id { KFENCE_COUNTER_ALLOCATED, @@ -113,6 +146,9 @@ enum kfence_counter_id { KFENCE_COUNTER_FREES, KFENCE_COUNTER_ZOMBIES, KFENCE_COUNTER_BUGS, + KFENCE_COUNTER_SKIP_INCOMPAT, + KFENCE_COUNTER_SKIP_CAPACITY, + KFENCE_COUNTER_SKIP_COVERED, KFENCE_COUNTER_COUNT, }; static atomic_long_t counters[KFENCE_COUNTER_COUNT]; @@ -122,11 +158,59 @@ static const char *const counter_names[] = { [KFENCE_COUNTER_FREES] = "total frees", [KFENCE_COUNTER_ZOMBIES] = "zombie allocations", [KFENCE_COUNTER_BUGS] = "total bugs", + [KFENCE_COUNTER_SKIP_INCOMPAT] = "skipped allocations (incompatible)", + [KFENCE_COUNTER_SKIP_CAPACITY] = "skipped allocations (capacity)", + [KFENCE_COUNTER_SKIP_COVERED] = "skipped allocations (covered)", }; static_assert(ARRAY_SIZE(counter_names) == KFENCE_COUNTER_COUNT); /* === Internals ============================================================ */ +static inline bool should_skip_covered(void) +{ + unsigned long thresh = (CONFIG_KFENCE_NUM_OBJECTS * kfence_skip_covered_thresh) / 100; + + return atomic_long_read(&counters[KFENCE_COUNTER_ALLOCATED]) > thresh; +} + +static u32 get_alloc_stack_hash(unsigned long *stack_entries, size_t num_entries) +{ + num_entries = min(num_entries, UNIQUE_ALLOC_STACK_DEPTH); + num_entries = filter_irq_stacks(stack_entries, num_entries); + return jhash(stack_entries, num_entries * sizeof(stack_entries[0]), stack_hash_seed); +} + +/* + * Adds (or subtracts) count @val for allocation stack trace hash + * @alloc_stack_hash from Counting Bloom filter. + */ +static void alloc_covered_add(u32 alloc_stack_hash, int val) +{ + int i; + + for (i = 0; i < ALLOC_COVERED_HNUM; i++) { + atomic_add(val, &alloc_covered[alloc_stack_hash & ALLOC_COVERED_MASK]); + alloc_stack_hash = ALLOC_COVERED_HNEXT(alloc_stack_hash); + } +} + +/* + * Returns true if the allocation stack trace hash @alloc_stack_hash is + * currently contained (non-zero count) in Counting Bloom filter. + */ +static bool alloc_covered_contains(u32 alloc_stack_hash) +{ + int i; + + for (i = 0; i < ALLOC_COVERED_HNUM; i++) { + if (!atomic_read(&alloc_covered[alloc_stack_hash & ALLOC_COVERED_MASK])) + return false; + alloc_stack_hash = ALLOC_COVERED_HNEXT(alloc_stack_hash); + } + + return true; +} + static bool kfence_protect(unsigned long addr) { return !KFENCE_WARN_ON(!kfence_protect_page(ALIGN_DOWN(addr, PAGE_SIZE), true)); @@ -184,19 +268,26 @@ static inline unsigned long metadata_to_pageaddr(const struct kfence_metadata *m * Update the object's metadata state, including updating the alloc/free stacks * depending on the state transition. */ -static noinline void metadata_update_state(struct kfence_metadata *meta, - enum kfence_object_state next) +static noinline void +metadata_update_state(struct kfence_metadata *meta, enum kfence_object_state next, + unsigned long *stack_entries, size_t num_stack_entries) { struct kfence_track *track = next == KFENCE_OBJECT_FREED ? &meta->free_track : &meta->alloc_track; lockdep_assert_held(&meta->lock); - /* - * Skip over 1 (this) functions; noinline ensures we do not accidentally - * skip over the caller by never inlining. - */ - track->num_stack_entries = stack_trace_save(track->stack_entries, KFENCE_STACK_DEPTH, 1); + if (stack_entries) { + memcpy(track->stack_entries, stack_entries, + num_stack_entries * sizeof(stack_entries[0])); + } else { + /* + * Skip over 1 (this) functions; noinline ensures we do not + * accidentally skip over the caller by never inlining. + */ + num_stack_entries = stack_trace_save(track->stack_entries, KFENCE_STACK_DEPTH, 1); + } + track->num_stack_entries = num_stack_entries; track->pid = task_pid_nr(current); track->cpu = raw_smp_processor_id(); track->ts_nsec = local_clock(); /* Same source as printk timestamps. */ @@ -258,7 +349,9 @@ static __always_inline void for_each_canary(const struct kfence_metadata *meta, } } -static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t gfp) +static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t gfp, + unsigned long *stack_entries, size_t num_stack_entries, + u32 alloc_stack_hash) { struct kfence_metadata *meta = NULL; unsigned long flags; @@ -272,8 +365,10 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g list_del_init(&meta->list); } raw_spin_unlock_irqrestore(&kfence_freelist_lock, flags); - if (!meta) + if (!meta) { + atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_CAPACITY]); return NULL; + } if (unlikely(!raw_spin_trylock_irqsave(&meta->lock, flags))) { /* @@ -315,10 +410,12 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g addr = (void *)meta->addr; /* Update remaining metadata. */ - metadata_update_state(meta, KFENCE_OBJECT_ALLOCATED); + metadata_update_state(meta, KFENCE_OBJECT_ALLOCATED, stack_entries, num_stack_entries); /* Pairs with READ_ONCE() in kfence_shutdown_cache(). */ WRITE_ONCE(meta->cache, cache); meta->size = size; + meta->alloc_stack_hash = alloc_stack_hash; + for_each_canary(meta, set_canary_byte); /* Set required struct page fields. */ @@ -331,6 +428,8 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g raw_spin_unlock_irqrestore(&meta->lock, flags); + alloc_covered_add(alloc_stack_hash, 1); + /* Memory initialization. */ /* @@ -395,10 +494,12 @@ static void kfence_guarded_free(void *addr, struct kfence_metadata *meta, bool z memzero_explicit(addr, meta->size); /* Mark the object as freed. */ - metadata_update_state(meta, KFENCE_OBJECT_FREED); + metadata_update_state(meta, KFENCE_OBJECT_FREED, NULL, 0); raw_spin_unlock_irqrestore(&meta->lock, flags); + alloc_covered_add(meta->alloc_stack_hash, -1); + /* Protect to detect use-after-frees. */ kfence_protect((unsigned long)addr); @@ -665,6 +766,7 @@ void __init kfence_init(void) if (!kfence_sample_interval) return; + stack_hash_seed = (u32)random_get_entropy(); if (!kfence_init_pool()) { pr_err("%s failed\n", __func__); return; @@ -740,12 +842,18 @@ void kfence_shutdown_cache(struct kmem_cache *s) void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) { + unsigned long stack_entries[KFENCE_STACK_DEPTH]; + size_t num_stack_entries; + u32 alloc_stack_hash; + /* * Perform size check before switching kfence_allocation_gate, so that * we don't disable KFENCE without making an allocation. */ - if (size > PAGE_SIZE) + if (size > PAGE_SIZE) { + atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_INCOMPAT]); return NULL; + } /* * Skip allocations from non-default zones, including DMA. We cannot @@ -753,8 +861,10 @@ void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) * properties (e.g. reside in DMAable memory). */ if ((flags & GFP_ZONEMASK) || - (s->flags & (SLAB_CACHE_DMA | SLAB_CACHE_DMA32))) + (s->flags & (SLAB_CACHE_DMA | SLAB_CACHE_DMA32))) { + atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_INCOMPAT]); return NULL; + } if (atomic_inc_return(&kfence_allocation_gate) > 1) return NULL; @@ -775,7 +885,25 @@ void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) if (!READ_ONCE(kfence_enabled)) return NULL; - return kfence_guarded_alloc(s, size, flags); + num_stack_entries = stack_trace_save(stack_entries, KFENCE_STACK_DEPTH, 0); + + /* + * Do expensive check for coverage of allocation in slow-path after + * allocation_gate has already become non-zero, even though it might + * mean not making any allocation within a given sample interval. + * + * This ensures reasonable allocation coverage when the pool is almost + * full, including avoiding long-lived allocations of the same source + * filling up the pool (e.g. pagecache allocations). + */ + alloc_stack_hash = get_alloc_stack_hash(stack_entries, num_stack_entries); + if (should_skip_covered() && alloc_covered_contains(alloc_stack_hash)) { + atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_COVERED]); + return NULL; + } + + return kfence_guarded_alloc(s, size, flags, stack_entries, num_stack_entries, + alloc_stack_hash); } size_t kfence_ksize(const void *addr) |