summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt9
-rw-r--r--drivers/infiniband/core/uverbs_ioctl.c2
-rw-r--r--include/linux/mm.h24
-rw-r--r--mm/dmapool.c4
-rw-r--r--mm/page_alloc.c71
-rw-r--r--mm/slab.c16
-rw-r--r--mm/slab.h20
-rw-r--r--mm/slub.c40
-rw-r--r--net/core/sock.c2
-rw-r--r--security/Kconfig.hardening29
10 files changed, 199 insertions, 18 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index aa4e7e7b87c2..099c5a4be95b 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1668,6 +1668,15 @@
initrd= [BOOT] Specify the location of the initial ramdisk
+ init_on_alloc= [MM] Fill newly allocated pages and heap objects with
+ zeroes.
+ Format: 0 | 1
+ Default set by CONFIG_INIT_ON_ALLOC_DEFAULT_ON.
+
+ init_on_free= [MM] Fill freed pages and heap objects with zeroes.
+ Format: 0 | 1
+ Default set by CONFIG_INIT_ON_FREE_DEFAULT_ON.
+
init_pkru= [x86] Specify the default memory protection keys rights
register contents for all processes. 0x55555554 by
default (disallow access to all but pkey 0). Can
diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index 829b0c6944d8..61758201d9b2 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -127,7 +127,7 @@ __malloc void *_uverbs_alloc(struct uverbs_attr_bundle *bundle, size_t size,
res = (void *)pbundle->internal_buffer + pbundle->internal_used;
pbundle->internal_used =
ALIGN(new_used, sizeof(*pbundle->internal_buffer));
- if (flags & __GFP_ZERO)
+ if (want_init_on_alloc(flags))
memset(res, 0, size);
return res;
}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index bb242ad810eb..f88f0eabcc5e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2700,6 +2700,30 @@ static inline void kernel_poison_pages(struct page *page, int numpages,
int enable) { }
#endif
+#ifdef CONFIG_INIT_ON_ALLOC_DEFAULT_ON
+DECLARE_STATIC_KEY_TRUE(init_on_alloc);
+#else
+DECLARE_STATIC_KEY_FALSE(init_on_alloc);
+#endif
+static inline bool want_init_on_alloc(gfp_t flags)
+{
+ if (static_branch_unlikely(&init_on_alloc) &&
+ !page_poisoning_enabled())
+ return true;
+ return flags & __GFP_ZERO;
+}
+
+#ifdef CONFIG_INIT_ON_FREE_DEFAULT_ON
+DECLARE_STATIC_KEY_TRUE(init_on_free);
+#else
+DECLARE_STATIC_KEY_FALSE(init_on_free);
+#endif
+static inline bool want_init_on_free(void)
+{
+ return static_branch_unlikely(&init_on_free) &&
+ !page_poisoning_enabled();
+}
+
#ifdef CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT
DECLARE_STATIC_KEY_TRUE(_debug_pagealloc_enabled);
#else
diff --git a/mm/dmapool.c b/mm/dmapool.c
index 8c94c89a6f7e..fe5d33060415 100644
--- a/mm/dmapool.c
+++ b/mm/dmapool.c
@@ -378,7 +378,7 @@ void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags,
#endif
spin_unlock_irqrestore(&pool->lock, flags);
- if (mem_flags & __GFP_ZERO)
+ if (want_init_on_alloc(mem_flags))
memset(retval, 0, pool->size);
return retval;
@@ -428,6 +428,8 @@ void dma_pool_free(struct dma_pool *pool, void *vaddr, dma_addr_t dma)
}
offset = vaddr - page->vaddr;
+ if (want_init_on_free())
+ memset(vaddr, 0, pool->size);
#ifdef DMAPOOL_DEBUG
if ((dma - page->dma) != offset) {
spin_unlock_irqrestore(&pool->lock, flags);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3a555ce69006..dbd0d5cbbcbb 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -135,6 +135,55 @@ unsigned long totalcma_pages __read_mostly;
int percpu_pagelist_fraction;
gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
+#ifdef CONFIG_INIT_ON_ALLOC_DEFAULT_ON
+DEFINE_STATIC_KEY_TRUE(init_on_alloc);
+#else
+DEFINE_STATIC_KEY_FALSE(init_on_alloc);
+#endif
+EXPORT_SYMBOL(init_on_alloc);
+
+#ifdef CONFIG_INIT_ON_FREE_DEFAULT_ON
+DEFINE_STATIC_KEY_TRUE(init_on_free);
+#else
+DEFINE_STATIC_KEY_FALSE(init_on_free);
+#endif
+EXPORT_SYMBOL(init_on_free);
+
+static int __init early_init_on_alloc(char *buf)
+{
+ int ret;
+ bool bool_result;
+
+ if (!buf)
+ return -EINVAL;
+ ret = kstrtobool(buf, &bool_result);
+ if (bool_result && page_poisoning_enabled())
+ pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, will take precedence over init_on_alloc\n");
+ if (bool_result)
+ static_branch_enable(&init_on_alloc);
+ else
+ static_branch_disable(&init_on_alloc);
+ return ret;
+}
+early_param("init_on_alloc", early_init_on_alloc);
+
+static int __init early_init_on_free(char *buf)
+{
+ int ret;
+ bool bool_result;
+
+ if (!buf)
+ return -EINVAL;
+ ret = kstrtobool(buf, &bool_result);
+ if (bool_result && page_poisoning_enabled())
+ pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, will take precedence over init_on_free\n");
+ if (bool_result)
+ static_branch_enable(&init_on_free);
+ else
+ static_branch_disable(&init_on_free);
+ return ret;
+}
+early_param("init_on_free", early_init_on_free);
/*
* A cached value of the page's pageblock's migratetype, used when the page is
@@ -1067,6 +1116,14 @@ out:
return ret;
}
+static void kernel_init_free_pages(struct page *page, int numpages)
+{
+ int i;
+
+ for (i = 0; i < numpages; i++)
+ clear_highpage(page + i);
+}
+
static __always_inline bool free_pages_prepare(struct page *page,
unsigned int order, bool check_free)
{
@@ -1118,6 +1175,9 @@ static __always_inline bool free_pages_prepare(struct page *page,
PAGE_SIZE << order);
}
arch_free_page(page, order);
+ if (want_init_on_free())
+ kernel_init_free_pages(page, 1 << order);
+
kernel_poison_pages(page, 1 << order, 0);
if (debug_pagealloc_enabled())
kernel_map_pages(page, 1 << order, 0);
@@ -2019,8 +2079,8 @@ static inline int check_new_page(struct page *page)
static inline bool free_pages_prezeroed(void)
{
- return IS_ENABLED(CONFIG_PAGE_POISONING_ZERO) &&
- page_poisoning_enabled();
+ return (IS_ENABLED(CONFIG_PAGE_POISONING_ZERO) &&
+ page_poisoning_enabled()) || want_init_on_free();
}
#ifdef CONFIG_DEBUG_VM
@@ -2090,13 +2150,10 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
unsigned int alloc_flags)
{
- int i;
-
post_alloc_hook(page, order, gfp_flags);
- if (!free_pages_prezeroed() && (gfp_flags & __GFP_ZERO))
- for (i = 0; i < (1 << order); i++)
- clear_highpage(page + i);
+ if (!free_pages_prezeroed() && want_init_on_alloc(gfp_flags))
+ kernel_init_free_pages(page, 1 << order);
if (order && (gfp_flags & __GFP_COMP))
prep_compound_page(page, order);
diff --git a/mm/slab.c b/mm/slab.c
index e9d90b0da47b..9df370558e5d 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1811,6 +1811,14 @@ static bool set_objfreelist_slab_cache(struct kmem_cache *cachep,
cachep->num = 0;
+ /*
+ * If slab auto-initialization on free is enabled, store the freelist
+ * off-slab, so that its contents don't end up in one of the allocated
+ * objects.
+ */
+ if (unlikely(slab_want_init_on_free(cachep)))
+ return false;
+
if (cachep->ctor || flags & SLAB_TYPESAFE_BY_RCU)
return false;
@@ -3248,7 +3256,7 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
local_irq_restore(save_flags);
ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
- if (unlikely(flags & __GFP_ZERO) && ptr)
+ if (unlikely(slab_want_init_on_alloc(flags, cachep)) && ptr)
memset(ptr, 0, cachep->object_size);
slab_post_alloc_hook(cachep, flags, 1, &ptr);
@@ -3305,7 +3313,7 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller)
objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
prefetchw(objp);
- if (unlikely(flags & __GFP_ZERO) && objp)
+ if (unlikely(slab_want_init_on_alloc(flags, cachep)) && objp)
memset(objp, 0, cachep->object_size);
slab_post_alloc_hook(cachep, flags, 1, &objp);
@@ -3426,6 +3434,8 @@ void ___cache_free(struct kmem_cache *cachep, void *objp,
struct array_cache *ac = cpu_cache_get(cachep);
check_irq_off();
+ if (unlikely(slab_want_init_on_free(cachep)))
+ memset(objp, 0, cachep->object_size);
kmemleak_free_recursive(objp, cachep->flags);
objp = cache_free_debugcheck(cachep, objp, caller);
@@ -3513,7 +3523,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
cache_alloc_debugcheck_after_bulk(s, flags, size, p, _RET_IP_);
/* Clear memory outside IRQ disabled section */
- if (unlikely(flags & __GFP_ZERO))
+ if (unlikely(slab_want_init_on_alloc(flags, s)))
for (i = 0; i < size; i++)
memset(p[i], 0, s->object_size);
diff --git a/mm/slab.h b/mm/slab.h
index a62372d0f271..9057b8056b07 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -607,4 +607,24 @@ static inline int cache_random_seq_create(struct kmem_cache *cachep,
static inline void cache_random_seq_destroy(struct kmem_cache *cachep) { }
#endif /* CONFIG_SLAB_FREELIST_RANDOM */
+static inline bool slab_want_init_on_alloc(gfp_t flags, struct kmem_cache *c)
+{
+ if (static_branch_unlikely(&init_on_alloc)) {
+ if (c->ctor)
+ return false;
+ if (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON))
+ return flags & __GFP_ZERO;
+ return true;
+ }
+ return flags & __GFP_ZERO;
+}
+
+static inline bool slab_want_init_on_free(struct kmem_cache *c)
+{
+ if (static_branch_unlikely(&init_on_free))
+ return !(c->ctor ||
+ (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)));
+ return false;
+}
+
#endif /* MM_SLAB_H */
diff --git a/mm/slub.c b/mm/slub.c
index c9541a480627..e6c030e47364 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1279,6 +1279,10 @@ check_slabs:
if (*str == ',')
slub_debug_slabs = str + 1;
out:
+ if ((static_branch_unlikely(&init_on_alloc) ||
+ static_branch_unlikely(&init_on_free)) &&
+ (slub_debug & SLAB_POISON))
+ pr_info("mem auto-init: SLAB_POISON will take precedence over init_on_alloc/init_on_free\n");
return 1;
}
@@ -1422,6 +1426,28 @@ static __always_inline bool slab_free_hook(struct kmem_cache *s, void *x)
static inline bool slab_free_freelist_hook(struct kmem_cache *s,
void **head, void **tail)
{
+
+ void *object;
+ void *next = *head;
+ void *old_tail = *tail ? *tail : *head;
+ int rsize;
+
+ if (slab_want_init_on_free(s))
+ do {
+ object = next;
+ next = get_freepointer(s, object);
+ /*
+ * Clear the object and the metadata, but don't touch
+ * the redzone.
+ */
+ memset(object, 0, s->object_size);
+ rsize = (s->flags & SLAB_RED_ZONE) ? s->red_left_pad
+ : 0;
+ memset((char *)object + s->inuse, 0,
+ s->size - s->inuse - rsize);
+ set_freepointer(s, object, next);
+ } while (object != old_tail);
+
/*
* Compiler cannot detect this function can be removed if slab_free_hook()
* evaluates to nothing. Thus, catch all relevant config debug options here.
@@ -1431,9 +1457,7 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s,
defined(CONFIG_DEBUG_OBJECTS_FREE) || \
defined(CONFIG_KASAN)
- void *object;
- void *next = *head;
- void *old_tail = *tail ? *tail : *head;
+ next = *head;
/* Head and tail of the reconstructed freelist */
*head = NULL;
@@ -2729,8 +2753,14 @@ redo:
prefetch_freepointer(s, next_object);
stat(s, ALLOC_FASTPATH);
}
+ /*
+ * If the object has been wiped upon free, make sure it's fully
+ * initialized by zeroing out freelist pointer.
+ */
+ if (unlikely(slab_want_init_on_free(s)) && object)
+ memset(object + s->offset, 0, sizeof(void *));
- if (unlikely(gfpflags & __GFP_ZERO) && object)
+ if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object)
memset(object, 0, s->object_size);
slab_post_alloc_hook(s, gfpflags, 1, &object);
@@ -3151,7 +3181,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
local_irq_enable();
/* Clear memory outside IRQ disabled fastpath loop */
- if (unlikely(flags & __GFP_ZERO)) {
+ if (unlikely(slab_want_init_on_alloc(flags, s))) {
int j;
for (j = 0; j < i; j++)
diff --git a/net/core/sock.c b/net/core/sock.c
index 3e073ca6138f..d57b0cc995a0 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1597,7 +1597,7 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
if (!sk)
return sk;
- if (priority & __GFP_ZERO)
+ if (want_init_on_alloc(priority))
sk_prot_clear_nulls(sk, prot->obj_size);
} else
sk = kmalloc(prot->obj_size, priority);
diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening
index c6cb2d9b2905..a1ffe2eb4d5f 100644
--- a/security/Kconfig.hardening
+++ b/security/Kconfig.hardening
@@ -160,6 +160,35 @@ config STACKLEAK_RUNTIME_DISABLE
runtime to control kernel stack erasing for kernels built with
CONFIG_GCC_PLUGIN_STACKLEAK.
+config INIT_ON_ALLOC_DEFAULT_ON
+ bool "Enable heap memory zeroing on allocation by default"
+ help
+ This has the effect of setting "init_on_alloc=1" on the kernel
+ command line. This can be disabled with "init_on_alloc=0".
+ When "init_on_alloc" is enabled, all page allocator and slab
+ allocator memory will be zeroed when allocated, eliminating
+ many kinds of "uninitialized heap memory" flaws, especially
+ heap content exposures. The performance impact varies by
+ workload, but most cases see <1% impact. Some synthetic
+ workloads have measured as high as 7%.
+
+config INIT_ON_FREE_DEFAULT_ON
+ bool "Enable heap memory zeroing on free by default"
+ help
+ This has the effect of setting "init_on_free=1" on the kernel
+ command line. This can be disabled with "init_on_free=0".
+ Similar to "init_on_alloc", when "init_on_free" is enabled,
+ all page allocator and slab allocator memory will be zeroed
+ when freed, eliminating many kinds of "uninitialized heap memory"
+ flaws, especially heap content exposures. The primary difference
+ with "init_on_free" is that data lifetime in memory is reduced,
+ as anything freed is wiped immediately, making live forensics or
+ cold boot memory attacks unable to recover freed memory contents.
+ The performance impact varies by workload, but is more expensive
+ than "init_on_alloc" due to the negative cache effects of
+ touching "cold" memory areas. Most cases see 3-5% impact. Some
+ synthetic workloads have measured as high as 8%.
+
endmenu
endmenu