summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/dev-tools/kasan.rst3
-rw-r--r--fs/direct-io.c3
-rw-r--r--fs/fs-writeback.c2
-rw-r--r--fs/libfs.c3
-rw-r--r--include/linux/bitmap.h3
-rw-r--r--include/linux/page_ext.h10
-rw-r--r--include/linux/xarray.h4
-rw-r--r--lib/generic-radix-tree.c32
-rw-r--r--lib/test_meminit.c27
-rw-r--r--mm/compaction.c7
-rw-r--r--mm/memory-failure.c22
-rw-r--r--mm/page_alloc.c6
-rw-r--r--mm/page_ext.c23
-rw-r--r--mm/page_owner.c55
-rw-r--r--mm/slab.c3
-rw-r--r--mm/slub.c35
16 files changed, 152 insertions, 86 deletions
diff --git a/Documentation/dev-tools/kasan.rst b/Documentation/dev-tools/kasan.rst
index b72d07d70239..525296121d89 100644
--- a/Documentation/dev-tools/kasan.rst
+++ b/Documentation/dev-tools/kasan.rst
@@ -41,6 +41,9 @@ smaller binary while the latter is 1.1 - 2 times faster.
Both KASAN modes work with both SLUB and SLAB memory allocators.
For better bug detection and nicer reporting, enable CONFIG_STACKTRACE.
+To augment reports with last allocation and freeing stack of the physical page,
+it is recommended to enable also CONFIG_PAGE_OWNER and boot with page_owner=on.
+
To disable instrumentation for specific files or directories, add a line
similar to the following to the respective kernel Makefile:
diff --git a/fs/direct-io.c b/fs/direct-io.c
index ae196784f487..9329ced91f1d 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -241,9 +241,8 @@ void dio_warn_stale_pagecache(struct file *filp)
}
}
-/**
+/*
* dio_complete() - called when all DIO BIO I/O has been completed
- * @offset: the byte offset in the file of the completed operation
*
* This drops i_dio_count, lets interested parties know that a DIO operation
* has completed, and calculates the resulting return code for the operation.
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index e88421d9a48d..8461a6322039 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -905,7 +905,7 @@ restart:
* cgroup_writeback_by_id - initiate cgroup writeback from bdi and memcg IDs
* @bdi_id: target bdi id
* @memcg_id: target memcg css id
- * @nr_pages: number of pages to write, 0 for best-effort dirty flushing
+ * @nr: number of pages to write, 0 for best-effort dirty flushing
* @reason: reason why some writeback work initiated
* @done: target wb_completion
*
diff --git a/fs/libfs.c b/fs/libfs.c
index 540611b99b9a..1463b038ffc4 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -473,8 +473,7 @@ EXPORT_SYMBOL(simple_write_begin);
/**
* simple_write_end - .write_end helper for non-block-device FSes
- * @available: See .write_end of address_space_operations
- * @file: "
+ * @file: See .write_end of address_space_operations
* @mapping: "
* @pos: "
* @len: "
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 90528f12bdfa..29fc933df3bf 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -326,10 +326,11 @@ static inline int bitmap_equal(const unsigned long *src1,
}
/**
- * bitmap_or_equal - Check whether the or of two bitnaps is equal to a third
+ * bitmap_or_equal - Check whether the or of two bitmaps is equal to a third
* @src1: Pointer to bitmap 1
* @src2: Pointer to bitmap 2 will be or'ed with bitmap 1
* @src3: Pointer to bitmap 3. Compare to the result of *@src1 | *@src2
+ * @nbits: number of bits in each of these bitmaps
*
* Returns: True if (*@src1 | *@src2) == *@src3, false otherwise
*/
diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h
index 682fd465df06..cfce186f0c4e 100644
--- a/include/linux/page_ext.h
+++ b/include/linux/page_ext.h
@@ -18,7 +18,7 @@ struct page_ext_operations {
enum page_ext_flags {
PAGE_EXT_OWNER,
- PAGE_EXT_OWNER_ACTIVE,
+ PAGE_EXT_OWNER_ALLOCATED,
#if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT)
PAGE_EXT_YOUNG,
PAGE_EXT_IDLE,
@@ -36,6 +36,7 @@ struct page_ext {
unsigned long flags;
};
+extern unsigned long page_ext_size;
extern void pgdat_page_ext_init(struct pglist_data *pgdat);
#ifdef CONFIG_SPARSEMEM
@@ -52,6 +53,13 @@ static inline void page_ext_init(void)
struct page_ext *lookup_page_ext(const struct page *page);
+static inline struct page_ext *page_ext_next(struct page_ext *curr)
+{
+ void *next = curr;
+ next += page_ext_size;
+ return next;
+}
+
#else /* !CONFIG_PAGE_EXTENSION */
struct page_ext;
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 5921599b6dc4..86eecbd98e84 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -230,8 +230,8 @@ static inline int xa_err(void *entry)
* This structure is used either directly or via the XA_LIMIT() macro
* to communicate the range of IDs that are valid for allocation.
* Two common ranges are predefined for you:
- * * xa_limit_32b - [0 - UINT_MAX]
- * * xa_limit_31b - [0 - INT_MAX]
+ * * xa_limit_32b - [0 - UINT_MAX]
+ * * xa_limit_31b - [0 - INT_MAX]
*/
struct xa_limit {
u32 max;
diff --git a/lib/generic-radix-tree.c b/lib/generic-radix-tree.c
index ae25e2fa2187..f25eb111c051 100644
--- a/lib/generic-radix-tree.c
+++ b/lib/generic-radix-tree.c
@@ -2,6 +2,7 @@
#include <linux/export.h>
#include <linux/generic-radix-tree.h>
#include <linux/gfp.h>
+#include <linux/kmemleak.h>
#define GENRADIX_ARY (PAGE_SIZE / sizeof(struct genradix_node *))
#define GENRADIX_ARY_SHIFT ilog2(GENRADIX_ARY)
@@ -75,6 +76,27 @@ void *__genradix_ptr(struct __genradix *radix, size_t offset)
}
EXPORT_SYMBOL(__genradix_ptr);
+static inline struct genradix_node *genradix_alloc_node(gfp_t gfp_mask)
+{
+ struct genradix_node *node;
+
+ node = (struct genradix_node *)__get_free_page(gfp_mask|__GFP_ZERO);
+
+ /*
+ * We're using pages (not slab allocations) directly for kernel data
+ * structures, so we need to explicitly inform kmemleak of them in order
+ * to avoid false positive memory leak reports.
+ */
+ kmemleak_alloc(node, PAGE_SIZE, 1, gfp_mask);
+ return node;
+}
+
+static inline void genradix_free_node(struct genradix_node *node)
+{
+ kmemleak_free(node);
+ free_page((unsigned long)node);
+}
+
/*
* Returns pointer to the specified byte @offset within @radix, allocating it if
* necessary - newly allocated slots are always zeroed out:
@@ -97,8 +119,7 @@ void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset,
break;
if (!new_node) {
- new_node = (void *)
- __get_free_page(gfp_mask|__GFP_ZERO);
+ new_node = genradix_alloc_node(gfp_mask);
if (!new_node)
return NULL;
}
@@ -121,8 +142,7 @@ void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset,
n = READ_ONCE(*p);
if (!n) {
if (!new_node) {
- new_node = (void *)
- __get_free_page(gfp_mask|__GFP_ZERO);
+ new_node = genradix_alloc_node(gfp_mask);
if (!new_node)
return NULL;
}
@@ -133,7 +153,7 @@ void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset,
}
if (new_node)
- free_page((unsigned long) new_node);
+ genradix_free_node(new_node);
return &n->data[offset];
}
@@ -191,7 +211,7 @@ static void genradix_free_recurse(struct genradix_node *n, unsigned level)
genradix_free_recurse(n->children[i], level - 1);
}
- free_page((unsigned long) n);
+ genradix_free_node(n);
}
int __genradix_prealloc(struct __genradix *radix, size_t size,
diff --git a/lib/test_meminit.c b/lib/test_meminit.c
index 9729f271d150..9742e5cb853a 100644
--- a/lib/test_meminit.c
+++ b/lib/test_meminit.c
@@ -297,6 +297,32 @@ out:
return 1;
}
+static int __init do_kmem_cache_size_bulk(int size, int *total_failures)
+{
+ struct kmem_cache *c;
+ int i, iter, maxiter = 1024;
+ int num, bytes;
+ bool fail = false;
+ void *objects[10];
+
+ c = kmem_cache_create("test_cache", size, size, 0, NULL);
+ for (iter = 0; (iter < maxiter) && !fail; iter++) {
+ num = kmem_cache_alloc_bulk(c, GFP_KERNEL, ARRAY_SIZE(objects),
+ objects);
+ for (i = 0; i < num; i++) {
+ bytes = count_nonzero_bytes(objects[i], size);
+ if (bytes)
+ fail = true;
+ fill_with_garbage(objects[i], size);
+ }
+
+ if (num)
+ kmem_cache_free_bulk(c, num, objects);
+ }
+ *total_failures += fail;
+ return 1;
+}
+
/*
* Test kmem_cache allocation by creating caches of different sizes, with and
* without constructors, with and without SLAB_TYPESAFE_BY_RCU.
@@ -318,6 +344,7 @@ static int __init test_kmemcache(int *total_failures)
num_tests += do_kmem_cache_size(size, ctor, rcu, zero,
&failures);
}
+ num_tests += do_kmem_cache_size_bulk(size, &failures);
}
REPORT_FAILURES_IN_FN();
*total_failures += failures;
diff --git a/mm/compaction.c b/mm/compaction.c
index ce08b39d85d4..672d3c78c6ab 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -270,14 +270,15 @@ __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source,
/* Ensure the start of the pageblock or zone is online and valid */
block_pfn = pageblock_start_pfn(pfn);
- block_page = pfn_to_online_page(max(block_pfn, zone->zone_start_pfn));
+ block_pfn = max(block_pfn, zone->zone_start_pfn);
+ block_page = pfn_to_online_page(block_pfn);
if (block_page) {
page = block_page;
pfn = block_pfn;
}
/* Ensure the end of the pageblock or zone is online and valid */
- block_pfn += pageblock_nr_pages;
+ block_pfn = pageblock_end_pfn(pfn) - 1;
block_pfn = min(block_pfn, zone_end_pfn(zone) - 1);
end_page = pfn_to_online_page(block_pfn);
if (!end_page)
@@ -303,7 +304,7 @@ __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source,
page += (1 << PAGE_ALLOC_COSTLY_ORDER);
pfn += (1 << PAGE_ALLOC_COSTLY_ORDER);
- } while (page < end_page);
+ } while (page <= end_page);
return false;
}
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 7ef849da8278..0ae72b6acee7 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -199,7 +199,6 @@ struct to_kill {
struct task_struct *tsk;
unsigned long addr;
short size_shift;
- char addr_valid;
};
/*
@@ -324,22 +323,27 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,
}
}
tk->addr = page_address_in_vma(p, vma);
- tk->addr_valid = 1;
if (is_zone_device_page(p))
tk->size_shift = dev_pagemap_mapping_shift(p, vma);
else
tk->size_shift = compound_order(compound_head(p)) + PAGE_SHIFT;
/*
- * In theory we don't have to kill when the page was
- * munmaped. But it could be also a mremap. Since that's
- * likely very rare kill anyways just out of paranoia, but use
- * a SIGKILL because the error is not contained anymore.
+ * Send SIGKILL if "tk->addr == -EFAULT". Also, as
+ * "tk->size_shift" is always non-zero for !is_zone_device_page(),
+ * so "tk->size_shift == 0" effectively checks no mapping on
+ * ZONE_DEVICE. Indeed, when a devdax page is mmapped N times
+ * to a process' address space, it's possible not all N VMAs
+ * contain mappings for the page, but at least one VMA does.
+ * Only deliver SIGBUS with payload derived from the VMA that
+ * has a mapping for the page.
*/
- if (tk->addr == -EFAULT || tk->size_shift == 0) {
+ if (tk->addr == -EFAULT) {
pr_info("Memory failure: Unable to find user space address %lx in %s\n",
page_to_pfn(p), tsk->comm);
- tk->addr_valid = 0;
+ } else if (tk->size_shift == 0) {
+ kfree(tk);
+ return;
}
get_task_struct(tsk);
tk->tsk = tsk;
@@ -366,7 +370,7 @@ static void kill_procs(struct list_head *to_kill, int forcekill, bool fail,
* make sure the process doesn't catch the
* signal and then access the memory. Just kill it.
*/
- if (fail || tk->addr_valid == 0) {
+ if (fail || tk->addr == -EFAULT) {
pr_err("Memory failure: %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n",
pfn, tk->tsk->comm, tk->tsk->pid);
do_send_sig_info(SIGKILL, SEND_SIG_PRIV,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c0b2e0306720..ecc3dbad606b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4473,12 +4473,14 @@ retry_cpuset:
if (page)
goto got_pg;
- if (order >= pageblock_order && (gfp_mask & __GFP_IO)) {
+ if (order >= pageblock_order && (gfp_mask & __GFP_IO) &&
+ !(gfp_mask & __GFP_RETRY_MAYFAIL)) {
/*
* If allocating entire pageblock(s) and compaction
* failed because all zones are below low watermarks
* or is prohibited because it recently failed at this
- * order, fail immediately.
+ * order, fail immediately unless the allocator has
+ * requested compaction and reclaim retry.
*
* Reclaim is
* - potentially very expensive because zones are far
diff --git a/mm/page_ext.c b/mm/page_ext.c
index 5f5769c7db3b..4ade843ff588 100644
--- a/mm/page_ext.c
+++ b/mm/page_ext.c
@@ -67,8 +67,9 @@ static struct page_ext_operations *page_ext_ops[] = {
#endif
};
+unsigned long page_ext_size = sizeof(struct page_ext);
+
static unsigned long total_usage;
-static unsigned long extra_mem;
static bool __init invoke_need_callbacks(void)
{
@@ -78,9 +79,8 @@ static bool __init invoke_need_callbacks(void)
for (i = 0; i < entries; i++) {
if (page_ext_ops[i]->need && page_ext_ops[i]->need()) {
- page_ext_ops[i]->offset = sizeof(struct page_ext) +
- extra_mem;
- extra_mem += page_ext_ops[i]->size;
+ page_ext_ops[i]->offset = page_ext_size;
+ page_ext_size += page_ext_ops[i]->size;
need = true;
}
}
@@ -99,14 +99,9 @@ static void __init invoke_init_callbacks(void)
}
}
-static unsigned long get_entry_size(void)
-{
- return sizeof(struct page_ext) + extra_mem;
-}
-
static inline struct page_ext *get_entry(void *base, unsigned long index)
{
- return base + get_entry_size() * index;
+ return base + page_ext_size * index;
}
#if !defined(CONFIG_SPARSEMEM)
@@ -156,7 +151,7 @@ static int __init alloc_node_page_ext(int nid)
!IS_ALIGNED(node_end_pfn(nid), MAX_ORDER_NR_PAGES))
nr_pages += MAX_ORDER_NR_PAGES;
- table_size = get_entry_size() * nr_pages;
+ table_size = page_ext_size * nr_pages;
base = memblock_alloc_try_nid(
table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS),
@@ -234,7 +229,7 @@ static int __meminit init_section_page_ext(unsigned long pfn, int nid)
if (section->page_ext)
return 0;
- table_size = get_entry_size() * PAGES_PER_SECTION;
+ table_size = page_ext_size * PAGES_PER_SECTION;
base = alloc_page_ext(table_size, nid);
/*
@@ -254,7 +249,7 @@ static int __meminit init_section_page_ext(unsigned long pfn, int nid)
* we need to apply a mask.
*/
pfn &= PAGE_SECTION_MASK;
- section->page_ext = (void *)base - get_entry_size() * pfn;
+ section->page_ext = (void *)base - page_ext_size * pfn;
total_usage += table_size;
return 0;
}
@@ -267,7 +262,7 @@ static void free_page_ext(void *addr)
struct page *page = virt_to_page(addr);
size_t table_size;
- table_size = get_entry_size() * PAGES_PER_SECTION;
+ table_size = page_ext_size * PAGES_PER_SECTION;
BUG_ON(PageReserved(page));
kmemleak_free(addr);
diff --git a/mm/page_owner.c b/mm/page_owner.c
index dee931184788..e327bcd0380e 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -24,12 +24,10 @@ struct page_owner {
short last_migrate_reason;
gfp_t gfp_mask;
depot_stack_handle_t handle;
-#ifdef CONFIG_DEBUG_PAGEALLOC
depot_stack_handle_t free_handle;
-#endif
};
-static bool page_owner_disabled = true;
+static bool page_owner_enabled = false;
DEFINE_STATIC_KEY_FALSE(page_owner_inited);
static depot_stack_handle_t dummy_handle;
@@ -44,7 +42,7 @@ static int __init early_page_owner_param(char *buf)
return -EINVAL;
if (strcmp(buf, "on") == 0)
- page_owner_disabled = false;
+ page_owner_enabled = true;
return 0;
}
@@ -52,10 +50,7 @@ early_param("page_owner", early_page_owner_param);
static bool need_page_owner(void)
{
- if (page_owner_disabled)
- return false;
-
- return true;
+ return page_owner_enabled;
}
static __always_inline depot_stack_handle_t create_dummy_stack(void)
@@ -84,7 +79,7 @@ static noinline void register_early_stack(void)
static void init_page_owner(void)
{
- if (page_owner_disabled)
+ if (!page_owner_enabled)
return;
register_dummy_stack();
@@ -148,25 +143,19 @@ void __reset_page_owner(struct page *page, unsigned int order)
{
int i;
struct page_ext *page_ext;
-#ifdef CONFIG_DEBUG_PAGEALLOC
depot_stack_handle_t handle = 0;
struct page_owner *page_owner;
- if (debug_pagealloc_enabled())
- handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
-#endif
+ handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
+ page_ext = lookup_page_ext(page);
+ if (unlikely(!page_ext))
+ return;
for (i = 0; i < (1 << order); i++) {
- page_ext = lookup_page_ext(page + i);
- if (unlikely(!page_ext))
- continue;
- __clear_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags);
-#ifdef CONFIG_DEBUG_PAGEALLOC
- if (debug_pagealloc_enabled()) {
- page_owner = get_page_owner(page_ext);
- page_owner->free_handle = handle;
- }
-#endif
+ __clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
+ page_owner = get_page_owner(page_ext);
+ page_owner->free_handle = handle;
+ page_ext = page_ext_next(page_ext);
}
}
@@ -184,9 +173,9 @@ static inline void __set_page_owner_handle(struct page *page,
page_owner->gfp_mask = gfp_mask;
page_owner->last_migrate_reason = -1;
__set_bit(PAGE_EXT_OWNER, &page_ext->flags);
- __set_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags);
+ __set_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
- page_ext = lookup_page_ext(page + i);
+ page_ext = page_ext_next(page_ext);
}
}
@@ -224,12 +213,10 @@ void __split_page_owner(struct page *page, unsigned int order)
if (unlikely(!page_ext))
return;
- page_owner = get_page_owner(page_ext);
- page_owner->order = 0;
- for (i = 1; i < (1 << order); i++) {
- page_ext = lookup_page_ext(page + i);
+ for (i = 0; i < (1 << order); i++) {
page_owner = get_page_owner(page_ext);
page_owner->order = 0;
+ page_ext = page_ext_next(page_ext);
}
}
@@ -260,7 +247,7 @@ void __copy_page_owner(struct page *oldpage, struct page *newpage)
* the new page, which will be freed.
*/
__set_bit(PAGE_EXT_OWNER, &new_ext->flags);
- __set_bit(PAGE_EXT_OWNER_ACTIVE, &new_ext->flags);
+ __set_bit(PAGE_EXT_OWNER_ALLOCATED, &new_ext->flags);
}
void pagetypeinfo_showmixedcount_print(struct seq_file *m,
@@ -320,7 +307,7 @@ void pagetypeinfo_showmixedcount_print(struct seq_file *m,
if (unlikely(!page_ext))
continue;
- if (!test_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags))
+ if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
continue;
page_owner = get_page_owner(page_ext);
@@ -435,7 +422,7 @@ void __dump_page_owner(struct page *page)
return;
}
- if (test_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags))
+ if (test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
pr_alert("page_owner tracks the page as allocated\n");
else
pr_alert("page_owner tracks the page as freed\n");
@@ -451,7 +438,6 @@ void __dump_page_owner(struct page *page)
stack_trace_print(entries, nr_entries, 0);
}
-#ifdef CONFIG_DEBUG_PAGEALLOC
handle = READ_ONCE(page_owner->free_handle);
if (!handle) {
pr_alert("page_owner free stack trace missing\n");
@@ -460,7 +446,6 @@ void __dump_page_owner(struct page *page)
pr_alert("page last free stack trace:\n");
stack_trace_print(entries, nr_entries, 0);
}
-#endif
if (page_owner->last_migrate_reason != -1)
pr_alert("page has been migrated, last migrate reason: %s\n",
@@ -527,7 +512,7 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
* Although we do have the info about past allocation of free
* pages, it's not relevant for current memory usage.
*/
- if (!test_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags))
+ if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
continue;
page_owner = get_page_owner(page_ext);
diff --git a/mm/slab.c b/mm/slab.c
index 9df370558e5d..66e5d8032bae 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -4206,9 +4206,12 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
/**
* __ksize -- Uninstrumented ksize.
+ * @objp: pointer to the object
*
* Unlike ksize(), __ksize() is uninstrumented, and does not provide the same
* safety checks as ksize() with KASAN instrumentation enabled.
+ *
+ * Return: size of the actual memory used by @objp in bytes
*/
size_t __ksize(const void *objp)
{
diff --git a/mm/slub.c b/mm/slub.c
index 3d63ae320d31..b25c807a111f 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2672,6 +2672,17 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
}
/*
+ * If the object has been wiped upon free, make sure it's fully initialized by
+ * zeroing out freelist pointer.
+ */
+static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
+ void *obj)
+{
+ if (unlikely(slab_want_init_on_free(s)) && obj)
+ memset((void *)((char *)obj + s->offset), 0, sizeof(void *));
+}
+
+/*
* Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)
* have the fastpath folded into their functions. So no function call
* overhead for requests that can be satisfied on the fastpath.
@@ -2759,12 +2770,8 @@ redo:
prefetch_freepointer(s, next_object);
stat(s, ALLOC_FASTPATH);
}
- /*
- * If the object has been wiped upon free, make sure it's fully
- * initialized by zeroing out freelist pointer.
- */
- if (unlikely(slab_want_init_on_free(s)) && object)
- memset(object + s->offset, 0, sizeof(void *));
+
+ maybe_wipe_obj_freeptr(s, object);
if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object)
memset(object, 0, s->object_size);
@@ -3178,10 +3185,13 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
goto error;
c = this_cpu_ptr(s->cpu_slab);
+ maybe_wipe_obj_freeptr(s, p[i]);
+
continue; /* goto for-loop */
}
c->freelist = get_freepointer(s, object);
p[i] = object;
+ maybe_wipe_obj_freeptr(s, p[i]);
}
c->tid = next_tid(c->tid);
local_irq_enable();
@@ -4846,7 +4856,17 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
}
}
- get_online_mems();
+ /*
+ * It is impossible to take "mem_hotplug_lock" here with "kernfs_mutex"
+ * already held which will conflict with an existing lock order:
+ *
+ * mem_hotplug_lock->slab_mutex->kernfs_mutex
+ *
+ * We don't really need mem_hotplug_lock (to hold off
+ * slab_mem_going_offline_callback) here because slab's memory hot
+ * unplug code doesn't destroy the kmem_cache->node[] data.
+ */
+
#ifdef CONFIG_SLUB_DEBUG
if (flags & SO_ALL) {
struct kmem_cache_node *n;
@@ -4887,7 +4907,6 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
x += sprintf(buf + x, " N%d=%lu",
node, nodes[node]);
#endif
- put_online_mems();
kfree(nodes);
return x + sprintf(buf + x, "\n");
}