diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-06-30 03:29:11 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-06-30 03:29:11 +0300 |
commit | 65090f30ab791810a3dc840317e57df05018559c (patch) | |
tree | f417526656da37109777e89613e140ffc59228bc /mm/slub.c | |
parent | 349a2d52ffe59b7a0c5876fa7ee9f3eaf188b830 (diff) | |
parent | 0ed950d1f28142ccd9a9453c60df87853530d778 (diff) | |
download | linux-65090f30ab791810a3dc840317e57df05018559c.tar.xz |
Merge branch 'akpm' (patches from Andrew)
Merge misc updates from Andrew Morton:
"191 patches.
Subsystems affected by this patch series: kthread, ia64, scripts,
ntfs, squashfs, ocfs2, kernel/watchdog, and mm (gup, pagealloc, slab,
slub, kmemleak, dax, debug, pagecache, gup, swap, memcg, pagemap,
mprotect, bootmem, dma, tracing, vmalloc, kasan, initialization,
pagealloc, and memory-failure)"
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (191 commits)
mm,hwpoison: make get_hwpoison_page() call get_any_page()
mm,hwpoison: send SIGBUS with error virutal address
mm/page_alloc: split pcp->high across all online CPUs for cpuless nodes
mm/page_alloc: allow high-order pages to be stored on the per-cpu lists
mm: replace CONFIG_FLAT_NODE_MEM_MAP with CONFIG_FLATMEM
mm: replace CONFIG_NEED_MULTIPLE_NODES with CONFIG_NUMA
docs: remove description of DISCONTIGMEM
arch, mm: remove stale mentions of DISCONIGMEM
mm: remove CONFIG_DISCONTIGMEM
m68k: remove support for DISCONTIGMEM
arc: remove support for DISCONTIGMEM
arc: update comment about HIGHMEM implementation
alpha: remove DISCONTIGMEM and NUMA
mm/page_alloc: move free_the_page
mm/page_alloc: fix counting of managed_pages
mm/page_alloc: improve memmap_pages dbg msg
mm: drop SECTION_SHIFT in code comments
mm/page_alloc: introduce vm.percpu_pagelist_high_fraction
mm/page_alloc: limit the number of pages on PCP lists when reclaim is active
mm/page_alloc: scale the number of pages that are batch freed
...
Diffstat (limited to 'mm/slub.c')
-rw-r--r-- | mm/slub.c | 418 |
1 files changed, 252 insertions, 166 deletions
diff --git a/mm/slub.c b/mm/slub.c index 61bd40e3eb9a..3bc8b940c933 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -36,7 +36,9 @@ #include <linux/prefetch.h> #include <linux/memcontrol.h> #include <linux/random.h> +#include <kunit/test.h> +#include <linux/debugfs.h> #include <trace/events/kmem.h> #include "internal.h" @@ -117,12 +119,26 @@ */ #ifdef CONFIG_SLUB_DEBUG + #ifdef CONFIG_SLUB_DEBUG_ON DEFINE_STATIC_KEY_TRUE(slub_debug_enabled); #else DEFINE_STATIC_KEY_FALSE(slub_debug_enabled); #endif -#endif + +static inline bool __slub_debug_enabled(void) +{ + return static_branch_unlikely(&slub_debug_enabled); +} + +#else /* CONFIG_SLUB_DEBUG */ + +static inline bool __slub_debug_enabled(void) +{ + return false; +} + +#endif /* CONFIG_SLUB_DEBUG */ static inline bool kmem_cache_debug(struct kmem_cache *s) { @@ -154,9 +170,6 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s) * - Variable sizing of the per node arrays */ -/* Enable to test recovery from slab corruption on boot */ -#undef SLUB_RESILIENCY_TEST - /* Enable to log cmpxchg failures */ #undef SLUB_DEBUG_CMPXCHG @@ -226,6 +239,12 @@ static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p) { return 0; } #endif +#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_SLUB_DEBUG) +static void debugfs_slab_add(struct kmem_cache *); +#else +static inline void debugfs_slab_add(struct kmem_cache *s) { } +#endif + static inline void stat(const struct kmem_cache *s, enum stat_item si) { #ifdef CONFIG_SLUB_STATS @@ -449,6 +468,26 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page, static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)]; static DEFINE_SPINLOCK(object_map_lock); +#if IS_ENABLED(CONFIG_KUNIT) +static bool slab_add_kunit_errors(void) +{ + struct kunit_resource *resource; + + if (likely(!current->kunit_test)) + return false; + + resource = kunit_find_named_resource(current->kunit_test, "slab_errors"); + if (!resource) + return false; + + (*(int *)resource->data)++; + kunit_put_resource(resource); + return true; +} +#else +static inline bool slab_add_kunit_errors(void) { return false; } +#endif + /* * Determine a map of object in use on a page. * @@ -669,16 +708,18 @@ static void slab_bug(struct kmem_cache *s, char *fmt, ...) pr_err("=============================================================================\n"); pr_err("BUG %s (%s): %pV\n", s->name, print_tainted(), &vaf); pr_err("-----------------------------------------------------------------------------\n\n"); - - add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); va_end(args); } +__printf(2, 3) static void slab_fix(struct kmem_cache *s, char *fmt, ...) { struct va_format vaf; va_list args; + if (slab_add_kunit_errors()) + return; + va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; @@ -742,8 +783,12 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) void object_err(struct kmem_cache *s, struct page *page, u8 *object, char *reason) { + if (slab_add_kunit_errors()) + return; + slab_bug(s, "%s", reason); print_trailer(s, page, object); + add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); } static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page, @@ -752,12 +797,16 @@ static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page, va_list args; char buf[100]; + if (slab_add_kunit_errors()) + return; + va_start(args, fmt); vsnprintf(buf, sizeof(buf), fmt, args); va_end(args); slab_bug(s, "%s", buf); print_page_info(page); dump_stack(); + add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); } static void init_object(struct kmem_cache *s, void *object, u8 val) @@ -779,7 +828,7 @@ static void init_object(struct kmem_cache *s, void *object, u8 val) static void restore_bytes(struct kmem_cache *s, char *message, u8 data, void *from, void *to) { - slab_fix(s, "Restoring 0x%p-0x%p=0x%x\n", from, to - 1, data); + slab_fix(s, "Restoring %s 0x%p-0x%p=0x%x", message, from, to - 1, data); memset(from, data, to - from); } @@ -801,12 +850,17 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page, while (end > fault && end[-1] == value) end--; + if (slab_add_kunit_errors()) + goto skip_bug_print; + slab_bug(s, "%s overwritten", what); pr_err("0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n", fault, end - 1, fault - addr, fault[0], value); print_trailer(s, page, object); + add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); +skip_bug_print: restore_bytes(s, what, value, fault, end); return 0; } @@ -1028,13 +1082,13 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search) slab_err(s, page, "Wrong number of objects. Found %d but should be %d", page->objects, max_objects); page->objects = max_objects; - slab_fix(s, "Number of objects adjusted."); + slab_fix(s, "Number of objects adjusted"); } if (page->inuse != page->objects - nr) { slab_err(s, page, "Wrong object count. Counter is %d but counted were %d", page->inuse, page->objects - nr); page->inuse = page->objects - nr; - slab_fix(s, "Object count adjusted."); + slab_fix(s, "Object count adjusted"); } return search == NULL; } @@ -1398,6 +1452,8 @@ static int __init setup_slub_debug(char *str) out: if (slub_debug != 0 || slub_debug_string) static_branch_enable(&slub_debug_enabled); + else + static_branch_disable(&slub_debug_enabled); if ((static_branch_unlikely(&init_on_alloc) || static_branch_unlikely(&init_on_free)) && (slub_debug & SLAB_POISON)) @@ -4453,6 +4509,10 @@ void __init kmem_cache_init(void) if (debug_guardpage_minorder()) slub_max_order = 0; + /* Print slub debugging pointers without hashing */ + if (__slub_debug_enabled()) + no_hash_pointers_enable(NULL); + kmem_cache_node = &boot_kmem_cache_node; kmem_cache = &boot_kmem_cache; @@ -4541,6 +4601,9 @@ int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags) if (err) __kmem_cache_release(s); + if (s->flags & SLAB_STORE_USER) + debugfs_slab_add(s); + return err; } @@ -4649,9 +4712,11 @@ static int validate_slab_node(struct kmem_cache *s, validate_slab(s, page); count++; } - if (count != n->nr_partial) + if (count != n->nr_partial) { pr_err("SLUB %s: %ld partial slabs counted but counter=%ld\n", s->name, count, n->nr_partial); + slab_add_kunit_errors(); + } if (!(s->flags & SLAB_STORE_USER)) goto out; @@ -4660,16 +4725,18 @@ static int validate_slab_node(struct kmem_cache *s, validate_slab(s, page); count++; } - if (count != atomic_long_read(&n->nr_slabs)) + if (count != atomic_long_read(&n->nr_slabs)) { pr_err("SLUB: %s %ld slabs counted but counter=%ld\n", s->name, count, atomic_long_read(&n->nr_slabs)); + slab_add_kunit_errors(); + } out: spin_unlock_irqrestore(&n->list_lock, flags); return count; } -static long validate_slab_cache(struct kmem_cache *s) +long validate_slab_cache(struct kmem_cache *s) { int node; unsigned long count = 0; @@ -4681,6 +4748,9 @@ static long validate_slab_cache(struct kmem_cache *s) return count; } +EXPORT_SYMBOL(validate_slab_cache); + +#ifdef CONFIG_DEBUG_FS /* * Generate lists of code addresses where slabcache objects are allocated * and freed. @@ -4704,6 +4774,8 @@ struct loc_track { struct location *loc; }; +static struct dentry *slab_debugfs_root; + static void free_loc_track(struct loc_track *t) { if (t->max) @@ -4820,144 +4892,9 @@ static void process_slab(struct loc_track *t, struct kmem_cache *s, add_location(t, s, get_track(s, p, alloc)); put_map(map); } - -static int list_locations(struct kmem_cache *s, char *buf, - enum track_item alloc) -{ - int len = 0; - unsigned long i; - struct loc_track t = { 0, 0, NULL }; - int node; - struct kmem_cache_node *n; - - if (!alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location), - GFP_KERNEL)) { - return sysfs_emit(buf, "Out of memory\n"); - } - /* Push back cpu slabs */ - flush_all(s); - - for_each_kmem_cache_node(s, node, n) { - unsigned long flags; - struct page *page; - - if (!atomic_long_read(&n->nr_slabs)) - continue; - - spin_lock_irqsave(&n->list_lock, flags); - list_for_each_entry(page, &n->partial, slab_list) - process_slab(&t, s, page, alloc); - list_for_each_entry(page, &n->full, slab_list) - process_slab(&t, s, page, alloc); - spin_unlock_irqrestore(&n->list_lock, flags); - } - - for (i = 0; i < t.count; i++) { - struct location *l = &t.loc[i]; - - len += sysfs_emit_at(buf, len, "%7ld ", l->count); - - if (l->addr) - len += sysfs_emit_at(buf, len, "%pS", (void *)l->addr); - else - len += sysfs_emit_at(buf, len, "<not-available>"); - - if (l->sum_time != l->min_time) - len += sysfs_emit_at(buf, len, " age=%ld/%ld/%ld", - l->min_time, - (long)div_u64(l->sum_time, - l->count), - l->max_time); - else - len += sysfs_emit_at(buf, len, " age=%ld", l->min_time); - - if (l->min_pid != l->max_pid) - len += sysfs_emit_at(buf, len, " pid=%ld-%ld", - l->min_pid, l->max_pid); - else - len += sysfs_emit_at(buf, len, " pid=%ld", - l->min_pid); - - if (num_online_cpus() > 1 && - !cpumask_empty(to_cpumask(l->cpus))) - len += sysfs_emit_at(buf, len, " cpus=%*pbl", - cpumask_pr_args(to_cpumask(l->cpus))); - - if (nr_online_nodes > 1 && !nodes_empty(l->nodes)) - len += sysfs_emit_at(buf, len, " nodes=%*pbl", - nodemask_pr_args(&l->nodes)); - - len += sysfs_emit_at(buf, len, "\n"); - } - - free_loc_track(&t); - if (!t.count) - len += sysfs_emit_at(buf, len, "No data\n"); - - return len; -} +#endif /* CONFIG_DEBUG_FS */ #endif /* CONFIG_SLUB_DEBUG */ -#ifdef SLUB_RESILIENCY_TEST -static void __init resiliency_test(void) -{ - u8 *p; - int type = KMALLOC_NORMAL; - - BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || KMALLOC_SHIFT_HIGH < 10); - - pr_err("SLUB resiliency testing\n"); - pr_err("-----------------------\n"); - pr_err("A. Corruption after allocation\n"); - - p = kzalloc(16, GFP_KERNEL); - p[16] = 0x12; - pr_err("\n1. kmalloc-16: Clobber Redzone/next pointer 0x12->0x%p\n\n", - p + 16); - - validate_slab_cache(kmalloc_caches[type][4]); - - /* Hmmm... The next two are dangerous */ - p = kzalloc(32, GFP_KERNEL); - p[32 + sizeof(void *)] = 0x34; - pr_err("\n2. kmalloc-32: Clobber next pointer/next slab 0x34 -> -0x%p\n", - p); - pr_err("If allocated object is overwritten then not detectable\n\n"); - - validate_slab_cache(kmalloc_caches[type][5]); - p = kzalloc(64, GFP_KERNEL); - p += 64 + (get_cycles() & 0xff) * sizeof(void *); - *p = 0x56; - pr_err("\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n", - p); - pr_err("If allocated object is overwritten then not detectable\n\n"); - validate_slab_cache(kmalloc_caches[type][6]); - - pr_err("\nB. Corruption after free\n"); - p = kzalloc(128, GFP_KERNEL); - kfree(p); - *p = 0x78; - pr_err("1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p); - validate_slab_cache(kmalloc_caches[type][7]); - - p = kzalloc(256, GFP_KERNEL); - kfree(p); - p[50] = 0x9a; - pr_err("\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", p); - validate_slab_cache(kmalloc_caches[type][8]); - - p = kzalloc(512, GFP_KERNEL); - kfree(p); - p[512] = 0xab; - pr_err("\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p); - validate_slab_cache(kmalloc_caches[type][9]); -} -#else -#ifdef CONFIG_SYSFS -static void resiliency_test(void) {}; -#endif -#endif /* SLUB_RESILIENCY_TEST */ - #ifdef CONFIG_SYSFS enum slab_stat_type { SL_ALL, /* All slabs */ @@ -5345,21 +5282,6 @@ static ssize_t validate_store(struct kmem_cache *s, } SLAB_ATTR(validate); -static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf) -{ - if (!(s->flags & SLAB_STORE_USER)) - return -ENOSYS; - return list_locations(s, buf, TRACK_ALLOC); -} -SLAB_ATTR_RO(alloc_calls); - -static ssize_t free_calls_show(struct kmem_cache *s, char *buf) -{ - if (!(s->flags & SLAB_STORE_USER)) - return -ENOSYS; - return list_locations(s, buf, TRACK_FREE); -} -SLAB_ATTR_RO(free_calls); #endif /* CONFIG_SLUB_DEBUG */ #ifdef CONFIG_FAILSLAB @@ -5523,8 +5445,6 @@ static struct attribute *slab_attrs[] = { &poison_attr.attr, &store_user_attr.attr, &validate_attr.attr, - &alloc_calls_attr.attr, - &free_calls_attr.attr, #endif #ifdef CONFIG_ZONE_DMA &cache_dma_attr.attr, @@ -5806,13 +5726,179 @@ static int __init slab_sysfs_init(void) } mutex_unlock(&slab_mutex); - resiliency_test(); return 0; } __initcall(slab_sysfs_init); #endif /* CONFIG_SYSFS */ +#if defined(CONFIG_SLUB_DEBUG) && defined(CONFIG_DEBUG_FS) +static int slab_debugfs_show(struct seq_file *seq, void *v) +{ + + struct location *l; + unsigned int idx = *(unsigned int *)v; + struct loc_track *t = seq->private; + + if (idx < t->count) { + l = &t->loc[idx]; + + seq_printf(seq, "%7ld ", l->count); + + if (l->addr) + seq_printf(seq, "%pS", (void *)l->addr); + else + seq_puts(seq, "<not-available>"); + + if (l->sum_time != l->min_time) { + seq_printf(seq, " age=%ld/%llu/%ld", + l->min_time, div_u64(l->sum_time, l->count), + l->max_time); + } else + seq_printf(seq, " age=%ld", l->min_time); + + if (l->min_pid != l->max_pid) + seq_printf(seq, " pid=%ld-%ld", l->min_pid, l->max_pid); + else + seq_printf(seq, " pid=%ld", + l->min_pid); + + if (num_online_cpus() > 1 && !cpumask_empty(to_cpumask(l->cpus))) + seq_printf(seq, " cpus=%*pbl", + cpumask_pr_args(to_cpumask(l->cpus))); + + if (nr_online_nodes > 1 && !nodes_empty(l->nodes)) + seq_printf(seq, " nodes=%*pbl", + nodemask_pr_args(&l->nodes)); + + seq_puts(seq, "\n"); + } + + if (!idx && !t->count) + seq_puts(seq, "No data\n"); + + return 0; +} + +static void slab_debugfs_stop(struct seq_file *seq, void *v) +{ +} + +static void *slab_debugfs_next(struct seq_file *seq, void *v, loff_t *ppos) +{ + struct loc_track *t = seq->private; + + v = ppos; + ++*ppos; + if (*ppos <= t->count) + return v; + + return NULL; +} + +static void *slab_debugfs_start(struct seq_file *seq, loff_t *ppos) +{ + return ppos; +} + +static const struct seq_operations slab_debugfs_sops = { + .start = slab_debugfs_start, + .next = slab_debugfs_next, + .stop = slab_debugfs_stop, + .show = slab_debugfs_show, +}; + +static int slab_debug_trace_open(struct inode *inode, struct file *filep) +{ + + struct kmem_cache_node *n; + enum track_item alloc; + int node; + struct loc_track *t = __seq_open_private(filep, &slab_debugfs_sops, + sizeof(struct loc_track)); + struct kmem_cache *s = file_inode(filep)->i_private; + + if (strcmp(filep->f_path.dentry->d_name.name, "alloc_traces") == 0) + alloc = TRACK_ALLOC; + else + alloc = TRACK_FREE; + + if (!alloc_loc_track(t, PAGE_SIZE / sizeof(struct location), GFP_KERNEL)) + return -ENOMEM; + + /* Push back cpu slabs */ + flush_all(s); + + for_each_kmem_cache_node(s, node, n) { + unsigned long flags; + struct page *page; + + if (!atomic_long_read(&n->nr_slabs)) + continue; + + spin_lock_irqsave(&n->list_lock, flags); + list_for_each_entry(page, &n->partial, slab_list) + process_slab(t, s, page, alloc); + list_for_each_entry(page, &n->full, slab_list) + process_slab(t, s, page, alloc); + spin_unlock_irqrestore(&n->list_lock, flags); + } + + return 0; +} + +static int slab_debug_trace_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct loc_track *t = seq->private; + + free_loc_track(t); + return seq_release_private(inode, file); +} + +static const struct file_operations slab_debugfs_fops = { + .open = slab_debug_trace_open, + .read = seq_read, + .llseek = seq_lseek, + .release = slab_debug_trace_release, +}; + +static void debugfs_slab_add(struct kmem_cache *s) +{ + struct dentry *slab_cache_dir; + + if (unlikely(!slab_debugfs_root)) + return; + + slab_cache_dir = debugfs_create_dir(s->name, slab_debugfs_root); + + debugfs_create_file("alloc_traces", 0400, + slab_cache_dir, s, &slab_debugfs_fops); + + debugfs_create_file("free_traces", 0400, + slab_cache_dir, s, &slab_debugfs_fops); +} + +void debugfs_slab_release(struct kmem_cache *s) +{ + debugfs_remove_recursive(debugfs_lookup(s->name, slab_debugfs_root)); +} + +static int __init slab_debugfs_init(void) +{ + struct kmem_cache *s; + + slab_debugfs_root = debugfs_create_dir("slab", NULL); + + list_for_each_entry(s, &slab_caches, list) + if (s->flags & SLAB_STORE_USER) + debugfs_slab_add(s); + + return 0; + +} +__initcall(slab_debugfs_init); +#endif /* * The /proc/slabinfo ABI */ |