From 1db491f77b6ed0f32f1d4a3ac40a5be9524f1914 Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Thu, 15 Jan 2015 20:30:01 -0800
Subject: x86/mm: Reduce PAE-mode per task pgd allocation overhead from 4K to
 32 bytes

With more embedded systems emerging using Quark, among other
things, 32-bit kernel matters again. 32-bit machine and kernel
uses PAE paging, which currently wastes at least 4K of memory
per process on Linux where we have to reserve an entire page to
support a single 32-byte PGD structure. It would be a very good
thing if we could eliminate that wastage.

PAE paging is used to access more than 4GB memory on x86-32. And
it is required for NX.

In this patch, we still allocate one page for pgd for a Xen
domain and 64-bit kernel because one page pgd is assumed in
these cases. But we can save memory space by only allocating
32-byte pgd for 32-bit PAE kernel when it is not running as a
Xen domain.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Christoph Lameter <cl@linux.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Glenn Williamson <glenn.p.williamson@intel.com>
Cc: H. Peter Anvin <hpa@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1421382601-46912-1-git-send-email-fenghua.yu@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/pgtable.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 78 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 6fb6927f9e76..d223e1fe1dd2 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -271,12 +271,87 @@ static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
 	}
 }
 
+/*
+ * Xen paravirt assumes pgd table should be in one page. 64 bit kernel also
+ * assumes that pgd should be in one page.
+ *
+ * But kernel with PAE paging that is not running as a Xen domain
+ * only needs to allocate 32 bytes for pgd instead of one page.
+ */
+#ifdef CONFIG_X86_PAE
+
+#include <linux/slab.h>
+
+#define PGD_SIZE	(PTRS_PER_PGD * sizeof(pgd_t))
+#define PGD_ALIGN	32
+
+static struct kmem_cache *pgd_cache;
+
+static int __init pgd_cache_init(void)
+{
+	/*
+	 * When PAE kernel is running as a Xen domain, it does not use
+	 * shared kernel pmd. And this requires a whole page for pgd.
+	 */
+	if (!SHARED_KERNEL_PMD)
+		return 0;
+
+	/*
+	 * when PAE kernel is not running as a Xen domain, it uses
+	 * shared kernel pmd. Shared kernel pmd does not require a whole
+	 * page for pgd. We are able to just allocate a 32-byte for pgd.
+	 * During boot time, we create a 32-byte slab for pgd table allocation.
+	 */
+	pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_ALIGN,
+				      SLAB_PANIC, NULL);
+	if (!pgd_cache)
+		return -ENOMEM;
+
+	return 0;
+}
+core_initcall(pgd_cache_init);
+
+static inline pgd_t *_pgd_alloc(void)
+{
+	/*
+	 * If no SHARED_KERNEL_PMD, PAE kernel is running as a Xen domain.
+	 * We allocate one page for pgd.
+	 */
+	if (!SHARED_KERNEL_PMD)
+		return (pgd_t *)__get_free_page(PGALLOC_GFP);
+
+	/*
+	 * Now PAE kernel is not running as a Xen domain. We can allocate
+	 * a 32-byte slab for pgd to save memory space.
+	 */
+	return kmem_cache_alloc(pgd_cache, PGALLOC_GFP);
+}
+
+static inline void _pgd_free(pgd_t *pgd)
+{
+	if (!SHARED_KERNEL_PMD)
+		free_page((unsigned long)pgd);
+	else
+		kmem_cache_free(pgd_cache, pgd);
+}
+#else
+static inline pgd_t *_pgd_alloc(void)
+{
+	return (pgd_t *)__get_free_page(PGALLOC_GFP);
+}
+
+static inline void _pgd_free(pgd_t *pgd)
+{
+	free_page((unsigned long)pgd);
+}
+#endif /* CONFIG_X86_PAE */
+
 pgd_t *pgd_alloc(struct mm_struct *mm)
 {
 	pgd_t *pgd;
 	pmd_t *pmds[PREALLOCATED_PMDS];
 
-	pgd = (pgd_t *)__get_free_page(PGALLOC_GFP);
+	pgd = _pgd_alloc();
 
 	if (pgd == NULL)
 		goto out;
@@ -306,7 +381,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 out_free_pmds:
 	free_pmds(pmds);
 out_free_pgd:
-	free_page((unsigned long)pgd);
+	_pgd_free(pgd);
 out:
 	return NULL;
 }
@@ -316,7 +391,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	pgd_mop_up_pmds(mm, pgd);
 	pgd_dtor(pgd);
 	paravirt_pgd_free(mm, pgd);
-	free_page((unsigned long)pgd);
+	_pgd_free(pgd);
 }
 
 /*
-- 
cgit v1.2.3


From 1f40a8bfa91adfa8d1ac5013c08c76f6fd6691ad Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@ucw.cz>
Date: Sun, 28 Dec 2014 17:15:24 +0100
Subject: x86/mm/pat: Ensure different messages in STRICT_DEVMEM and PAT cases

STRICT_DEVMEM and PAT produce same failure accessing /dev/mem,
which is quite confusing to the user. Make printk messages
different to lessen confusion.

Signed-off-by: Pavel Machek <pavel@ucw.cz>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/pat.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 7ac68698406c..35af6771a95a 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -610,7 +610,7 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 }
 
 #ifdef CONFIG_STRICT_DEVMEM
-/* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM*/
+/* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM */
 static inline int range_is_allowed(unsigned long pfn, unsigned long size)
 {
 	return 1;
@@ -628,8 +628,8 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
 
 	while (cursor < to) {
 		if (!devmem_is_allowed(pfn)) {
-			printk(KERN_INFO "Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx]\n",
-				current->comm, from, to - 1);
+			printk(KERN_INFO "Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx], PAT prevents it\n",
+			       current->comm, from, to - 1);
 			return 0;
 		}
 		cursor += PAGE_SIZE;
-- 
cgit v1.2.3


From 8d4a40bc0651ea51c196a3d3016d041c41ec19a2 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Tue, 24 Feb 2015 10:13:28 +0100
Subject: x86/mm: Use early_memunmap() instead of early_iounmap()

Memory mapped via early_memremap() should be unmapped with
early_memunmap() instead of early_iounmap().

Signed-off-by: Juergen Gross <jgross@suse.com>
Cc: matt.fleming@intel.com
Link: http://lkml.kernel.org/r/1424769211-11378-2-git-send-email-jgross@suse.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/devicetree.c | 4 ++--
 arch/x86/kernel/e820.c       | 2 +-
 arch/x86/kernel/setup.c      | 8 ++++----
 3 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 3d3503351242..6367a780cc8c 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -286,13 +286,13 @@ static void __init x86_flattree_get_config(void)
 	initial_boot_params = dt = early_memremap(initial_dtb, map_len);
 	size = of_get_flat_dt_size();
 	if (map_len < size) {
-		early_iounmap(dt, map_len);
+		early_memunmap(dt, map_len);
 		initial_boot_params = dt = early_memremap(initial_dtb, size);
 		map_len = size;
 	}
 
 	unflatten_and_copy_device_tree();
-	early_iounmap(dt, map_len);
+	early_memunmap(dt, map_len);
 }
 #else
 static inline void x86_flattree_get_config(void) { }
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 46201deee923..7d46bb260334 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -661,7 +661,7 @@ void __init parse_e820_ext(u64 phys_addr, u32 data_len)
 	extmap = (struct e820entry *)(sdata->data);
 	__append_e820_map(extmap, entries);
 	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
-	early_iounmap(sdata, data_len);
+	early_memunmap(sdata, data_len);
 	printk(KERN_INFO "e820: extended physical RAM map:\n");
 	e820_print_map("extended");
 }
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 98dc9317286e..733864a653ab 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -356,7 +356,7 @@ static void __init relocate_initrd(void)
 		mapaddr = ramdisk_image & PAGE_MASK;
 		p = early_memremap(mapaddr, clen+slop);
 		memcpy(q, p+slop, clen);
-		early_iounmap(p, clen+slop);
+		early_memunmap(p, clen+slop);
 		q += clen;
 		ramdisk_image += clen;
 		ramdisk_size  -= clen;
@@ -445,7 +445,7 @@ static void __init parse_setup_data(void)
 		data_len = data->len + sizeof(struct setup_data);
 		data_type = data->type;
 		pa_next = data->next;
-		early_iounmap(data, sizeof(*data));
+		early_memunmap(data, sizeof(*data));
 
 		switch (data_type) {
 		case SETUP_E820_EXT:
@@ -480,7 +480,7 @@ static void __init e820_reserve_setup_data(void)
 			 E820_RAM, E820_RESERVED_KERN);
 		found = 1;
 		pa_data = data->next;
-		early_iounmap(data, sizeof(*data));
+		early_memunmap(data, sizeof(*data));
 	}
 	if (!found)
 		return;
@@ -501,7 +501,7 @@ static void __init memblock_x86_reserve_range_setup_data(void)
 		data = early_memremap(pa_data, sizeof(*data));
 		memblock_reserve(pa_data, sizeof(*data) + data->len);
 		pa_data = data->next;
-		early_iounmap(data, sizeof(*data));
+		early_memunmap(data, sizeof(*data));
 	}
 }
 
-- 
cgit v1.2.3


From 954e12f7a800ce38b4722ca1d7a6d0293d377b55 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Tue, 24 Feb 2015 10:13:31 +0100
Subject: x86/mm, efi: Use early_ioremap() in arch/x86/platform/efi/efi-bgrt.c

Use early_ioremap() to map an I/O-area instead of
early_memremap().

Signed-off-by: Juergen Gross <jgross@suse.com>
Cc: matt.fleming@intel.com
Link: http://lkml.kernel.org/r/1424769211-11378-5-git-send-email-jgross@suse.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/platform/efi/efi-bgrt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c
index d143d216d52b..d7f997f7c26d 100644
--- a/arch/x86/platform/efi/efi-bgrt.c
+++ b/arch/x86/platform/efi/efi-bgrt.c
@@ -67,7 +67,7 @@ void __init efi_bgrt_init(void)
 
 	image = efi_lookup_mapped_addr(bgrt_tab->image_address);
 	if (!image) {
-		image = early_memremap(bgrt_tab->image_address,
+		image = early_ioremap(bgrt_tab->image_address,
 				       sizeof(bmp_header));
 		ioremapped = true;
 		if (!image) {
@@ -89,7 +89,7 @@ void __init efi_bgrt_init(void)
 	}
 
 	if (ioremapped) {
-		image = early_memremap(bgrt_tab->image_address,
+		image = early_ioremap(bgrt_tab->image_address,
 				       bmp_header.size);
 		if (!image) {
 			pr_err("Ignoring BGRT: failed to map image memory\n");
-- 
cgit v1.2.3


From 6bbb614ec478961c7443086bdf7fd6784479c14a Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 27 Feb 2015 15:55:40 +0100
Subject: x86/mm: Unexport set_memory_ro() and set_memory_rw()

This effectively unexports set_memory_ro() and set_memory_rw()
functions, and thus reverts:

  a03352d2c1dc ("x86: export set_memory_ro and set_memory_rw").

They have been introduced for debugging purposes in e1000e, but
no module user is in mainline kernel (anymore?) and we
explicitly do not want modules to use these functions, as they
i.e. protect eBPF (interpreted & JIT'ed) images from malicious
modifications or bugs.

Outside of eBPF scope, I believe also other set_memory_*()
functions should be unexported on x86 for modules.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Bruce Allan <bruce.w.allan@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jesse Brandeburg <jesse.brandeburg@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: davem@davemloft.net
Link: http://lkml.kernel.org/r/a064393a0a5d319eebde5c761cfd743132d4f213.1425040940.git.daniel@iogearbox.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/pageattr.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 536ea2fb6e33..81e8282d8c2f 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -1654,13 +1654,11 @@ int set_memory_ro(unsigned long addr, int numpages)
 {
 	return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0);
 }
-EXPORT_SYMBOL_GPL(set_memory_ro);
 
 int set_memory_rw(unsigned long addr, int numpages)
 {
 	return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0);
 }
-EXPORT_SYMBOL_GPL(set_memory_rw);
 
 int set_memory_np(unsigned long addr, int numpages)
 {
-- 
cgit v1.2.3


From d9fd579c218e22c897f0f1b9e132af9b436cf445 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@suse.com>
Date: Wed, 4 Mar 2015 17:24:11 -0800
Subject: x86/mm: Use IS_ENABLED() for direct_gbpages

Replace #ifdef eyesore with IS_ENABLED() use.

Signed-off-by: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Dexuan Cui <decui@microsoft.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: JBeulich@suse.com
Cc: Jan Beulich <JBeulich@suse.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Lindgren <tony@atomide.com>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: julia.lawall@lip6.fr
Link: http://lkml.kernel.org/r/1425518654-3403-2-git-send-email-mcgrof@do-not-panic.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/init.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index a110efca6d06..74f2b37fd073 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -131,11 +131,7 @@ void  __init early_alloc_pgt_buf(void)
 
 int after_bootmem;
 
-int direct_gbpages
-#ifdef CONFIG_DIRECT_GBPAGES
-				= 1
-#endif
-;
+int direct_gbpages = IS_ENABLED(CONFIG_DIRECT_GBPAGES);
 
 static void __init init_gbpages(void)
 {
-- 
cgit v1.2.3


From e5008abe929c160d36e44b8c2b644d4330d2e389 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@suse.com>
Date: Wed, 4 Mar 2015 17:24:12 -0800
Subject: x86/mm: Simplify enabling direct_gbpages

direct_gbpages can be force enabled as an early parameter
but not really have taken effect when DEBUG_PAGEALLOC
or KMEMCHECK is enabled. You can also enable direct_gbpages
right now if you have an x86_64 architecture but your CPU
doesn't really have support for this feature. In both cases
PG_LEVEL_1G won't actually be enabled but direct_gbpages is used
in other areas under the assumptions that PG_LEVEL_1G
was set. Fix this by putting together all requirements
which make this feature sensible to enable under, and only
enable both finally flipping on PG_LEVEL_1G and leaving
PG_LEVEL_1G set when this is true.

We only enable this feature then to be possible on sensible
builds defined by the new ENABLE_DIRECT_GBPAGES. If the
CPU has support for it you can either enable this by using
the DIRECT_GBPAGES option or using the early kernel parameter.
If a platform had support for this you can always force disable
it as well.

Signed-off-by: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Dexuan Cui <decui@microsoft.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: JBeulich@suse.com
Cc: Jan Beulich <JBeulich@suse.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Lindgren <tony@atomide.com>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: julia.lawall@lip6.fr
Link: http://lkml.kernel.org/r/1425518654-3403-3-git-send-email-mcgrof@do-not-panic.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/Kconfig       | 18 +++++++++++++-----
 arch/x86/mm/init.c     | 17 +++++++++--------
 arch/x86/mm/pageattr.c |  2 --
 3 files changed, 22 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c2fb8a87dccb..4d06e1c8294a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1299,14 +1299,22 @@ config ARCH_DMA_ADDR_T_64BIT
 	def_bool y
 	depends on X86_64 || HIGHMEM64G
 
+config ENABLE_DIRECT_GBPAGES
+	def_bool y
+	depends on X86_64 && !DEBUG_PAGEALLOC && !KMEMCHECK
+
 config DIRECT_GBPAGES
 	bool "Enable 1GB pages for kernel pagetables" if EXPERT
 	default y
-	depends on X86_64
-	---help---
-	  Allow the kernel linear mapping to use 1GB pages on CPUs that
-	  support it. This can improve the kernel's performance a tiny bit by
-	  reducing TLB pressure. If in doubt, say "Y".
+	depends on ENABLE_DIRECT_GBPAGES
+	---help---
+	  Enable by default the kernel linear mapping to use 1GB pages on CPUs
+	  that support it. This can improve the kernel's performance a tiny bit
+	  by reducing TLB pressure. If in doubt, say "Y". If you've disabled
+	  option but your platform is capable of handling support for this
+	  you can use the gbpages kernel parameter. Likewise if you've enabled
+	  this but you'd like to force disable this option you can use the
+	  nogbpages kernel parameter.
 
 # Common NUMA Features
 config NUMA
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 74f2b37fd073..2ce2c8e8c99c 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -131,16 +131,21 @@ void  __init early_alloc_pgt_buf(void)
 
 int after_bootmem;
 
+static int page_size_mask;
+
 int direct_gbpages = IS_ENABLED(CONFIG_DIRECT_GBPAGES);
 
 static void __init init_gbpages(void)
 {
-#ifdef CONFIG_X86_64
-	if (direct_gbpages && cpu_has_gbpages)
+	if (!IS_ENABLED(CONFIG_ENABLE_DIRECT_GBPAGES)) {
+		direct_gbpages = 0;
+		return;
+	}
+	if (direct_gbpages && cpu_has_gbpages) {
 		printk(KERN_INFO "Using GB pages for direct mapping\n");
-	else
+		page_size_mask |= 1 << PG_LEVEL_1G;
+	} else
 		direct_gbpages = 0;
-#endif
 }
 
 struct map_range {
@@ -149,8 +154,6 @@ struct map_range {
 	unsigned page_size_mask;
 };
 
-static int page_size_mask;
-
 static void __init probe_page_size_mask(void)
 {
 	init_gbpages();
@@ -161,8 +164,6 @@ static void __init probe_page_size_mask(void)
 	 * This will simplify cpa(), which otherwise needs to support splitting
 	 * large pages into small in interrupt context, etc.
 	 */
-	if (direct_gbpages)
-		page_size_mask |= 1 << PG_LEVEL_1G;
 	if (cpu_has_pse)
 		page_size_mask |= 1 << PG_LEVEL_2M;
 #endif
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 81e8282d8c2f..89af288ec674 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -81,11 +81,9 @@ void arch_report_meminfo(struct seq_file *m)
 	seq_printf(m, "DirectMap4M:    %8lu kB\n",
 			direct_pages_count[PG_LEVEL_2M] << 12);
 #endif
-#ifdef CONFIG_X86_64
 	if (direct_gbpages)
 		seq_printf(m, "DirectMap1G:    %8lu kB\n",
 			direct_pages_count[PG_LEVEL_1G] << 20);
-#endif
 }
 #else
 static inline void split_page_count(int level) { }
-- 
cgit v1.2.3


From 73c8c861dc5bddf1b24c6aeffee2292c96cf8db2 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@suse.com>
Date: Wed, 4 Mar 2015 17:24:14 -0800
Subject: x86/mm: Use early_param_on_off() for direct_gbpages

The enabler / disabler is pretty simple, just use the
provided wrappers, this lets us easily relate the variable
to the associated Kconfig entry.

Signed-off-by: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Dexuan Cui <decui@microsoft.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: JBeulich@suse.com
Cc: Jan Beulich <JBeulich@suse.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Lindgren <tony@atomide.com>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: julia.lawall@lip6.fr
Link: http://lkml.kernel.org/r/1425518654-3403-5-git-send-email-mcgrof@do-not-panic.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/init.c    |  3 ++-
 arch/x86/mm/init_64.c | 14 --------------
 2 files changed, 2 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 2ce2c8e8c99c..c35ba8bce7cb 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -133,7 +133,8 @@ int after_bootmem;
 
 static int page_size_mask;
 
-int direct_gbpages = IS_ENABLED(CONFIG_DIRECT_GBPAGES);
+early_param_on_off("gbpages", "nogbpages",
+		   direct_gbpages, CONFIG_DIRECT_GBPAGES);
 
 static void __init init_gbpages(void)
 {
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 30eb05ae7061..3fba623e3ba5 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -130,20 +130,6 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
 	return 0;
 }
 
-static int __init parse_direct_gbpages_off(char *arg)
-{
-	direct_gbpages = 0;
-	return 0;
-}
-early_param("nogbpages", parse_direct_gbpages_off);
-
-static int __init parse_direct_gbpages_on(char *arg)
-{
-	direct_gbpages = 1;
-	return 0;
-}
-early_param("gbpages", parse_direct_gbpages_on);
-
 /*
  * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
  * physical space so we can cache the place of the first one and move
-- 
cgit v1.2.3


From 10971ab269bbf22120edac95fcfa3c873a549bea Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 5 Mar 2015 08:18:23 +0100
Subject: x86/mm: Further simplify 1 GB kernel linear mappings handling

It's a bit pointless to allow Kconfig configuration for 1GB kernel
mappings, it's already hidden behind a 'default y' and CONFIG_EXPERT.

Remove this complication and simplify the code by renaming
CONFIG_ENABLE_DIRECT_GBPAGES to CONFIG_X86_DIRECT_GBPAGES and
document the DEBUG_PAGE_ALLOC and KMEMCHECK quirks.

Cc: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Dexuan Cui <decui@microsoft.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: JBeulich@suse.com
Cc: Jan Beulich <JBeulich@suse.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Lindgren <tony@atomide.com>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: julia.lawall@lip6.fr
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/Kconfig   | 20 ++++++--------------
 arch/x86/mm/init.c |  7 +------
 2 files changed, 7 insertions(+), 20 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4d06e1c8294a..d03847513b6d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1299,22 +1299,14 @@ config ARCH_DMA_ADDR_T_64BIT
 	def_bool y
 	depends on X86_64 || HIGHMEM64G
 
-config ENABLE_DIRECT_GBPAGES
+config X86_DIRECT_GBPAGES
 	def_bool y
 	depends on X86_64 && !DEBUG_PAGEALLOC && !KMEMCHECK
-
-config DIRECT_GBPAGES
-	bool "Enable 1GB pages for kernel pagetables" if EXPERT
-	default y
-	depends on ENABLE_DIRECT_GBPAGES
-	---help---
-	  Enable by default the kernel linear mapping to use 1GB pages on CPUs
-	  that support it. This can improve the kernel's performance a tiny bit
-	  by reducing TLB pressure. If in doubt, say "Y". If you've disabled
-	  option but your platform is capable of handling support for this
-	  you can use the gbpages kernel parameter. Likewise if you've enabled
-	  this but you'd like to force disable this option you can use the
-	  nogbpages kernel parameter.
+	---help---
+	  Certain kernel features effectively disable kernel
+	  linear 1 GB mappings (even if the CPU otherwise
+	  supports them), so don't confuse the user by printing
+	  that we have them enabled.
 
 # Common NUMA Features
 config NUMA
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index c35ba8bce7cb..8704153f2675 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -133,15 +133,10 @@ int after_bootmem;
 
 static int page_size_mask;
 
-early_param_on_off("gbpages", "nogbpages",
-		   direct_gbpages, CONFIG_DIRECT_GBPAGES);
+early_param_on_off("gbpages", "nogbpages", direct_gbpages, CONFIG_X86_DIRECT_GBPAGES);
 
 static void __init init_gbpages(void)
 {
-	if (!IS_ENABLED(CONFIG_ENABLE_DIRECT_GBPAGES)) {
-		direct_gbpages = 0;
-		return;
-	}
 	if (direct_gbpages && cpu_has_gbpages) {
 		printk(KERN_INFO "Using GB pages for direct mapping\n");
 		page_size_mask |= 1 << PG_LEVEL_1G;
-- 
cgit v1.2.3


From e61980a70245715ab39cbee2b9d6e6afc1ec37d4 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 5 Mar 2015 08:25:01 +0100
Subject: x86/mm: Simplify probe_page_size_mask()

Now that we've simplified the gbpages config space, move the
'page_size_mask' initialization into probe_page_size_mask(),
right next to the PSE and PGE enablement lines.

Cc: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Dexuan Cui <decui@microsoft.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: JBeulich@suse.com
Cc: Jan Beulich <JBeulich@suse.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Lindgren <tony@atomide.com>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: julia.lawall@lip6.fr
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/init.c | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 8704153f2675..6dc85d51cd98 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -131,29 +131,18 @@ void  __init early_alloc_pgt_buf(void)
 
 int after_bootmem;
 
-static int page_size_mask;
-
 early_param_on_off("gbpages", "nogbpages", direct_gbpages, CONFIG_X86_DIRECT_GBPAGES);
 
-static void __init init_gbpages(void)
-{
-	if (direct_gbpages && cpu_has_gbpages) {
-		printk(KERN_INFO "Using GB pages for direct mapping\n");
-		page_size_mask |= 1 << PG_LEVEL_1G;
-	} else
-		direct_gbpages = 0;
-}
-
 struct map_range {
 	unsigned long start;
 	unsigned long end;
 	unsigned page_size_mask;
 };
 
+static int page_size_mask;
+
 static void __init probe_page_size_mask(void)
 {
-	init_gbpages();
-
 #if !defined(CONFIG_DEBUG_PAGEALLOC) && !defined(CONFIG_KMEMCHECK)
 	/*
 	 * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
@@ -173,6 +162,14 @@ static void __init probe_page_size_mask(void)
 		cr4_set_bits_and_update_boot(X86_CR4_PGE);
 		__supported_pte_mask |= _PAGE_GLOBAL;
 	}
+
+	/* Enable 1 GB linear kernel mappings if available: */
+	if (direct_gbpages && cpu_has_gbpages) {
+		printk(KERN_INFO "Using GB pages for direct mapping\n");
+		page_size_mask |= 1 << PG_LEVEL_1G;
+	} else {
+		direct_gbpages = 0;
+	}
 }
 
 #ifdef CONFIG_X86_32
-- 
cgit v1.2.3


From c709feda56886c38af3116254f84cbe6a78b3a5d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 5 Mar 2015 08:58:44 +0100
Subject: x86/mm/pat: Initialize __cachemode2pte_tbl[] and
 __pte2cachemode_tbl[] in a bit more readable fashion

The initialization of these two arrays is a bit difficult to follow:
restructure it optically so that a 2D structure shows which bit in
the PTE is set and which not.

Also improve on comments a bit.

No code or data changed:

  # arch/x86/mm/init.o:

   text    data     bss     dec     hex filename
   4585     424   29776   34785    87e1 init.o.before
   4585     424   29776   34785    87e1 init.o.after

md5:
   a82e11ff58bcfd0af3a94662a701f65d  init.o.before.asm
   a82e11ff58bcfd0af3a94662a701f65d  init.o.after.asm

Reviewed-by: Juergen Gross <jgross@suse.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Jan Beulich <JBeulich@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: linux-kernel@vger.kernel.org
Link: http://lkml.kernel.org/r/20150305082135.GB5969@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/init.c | 40 ++++++++++++++++++++++------------------
 1 file changed, 22 insertions(+), 18 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 6dc85d51cd98..4469563f8c3b 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -29,29 +29,33 @@
 
 /*
  * Tables translating between page_cache_type_t and pte encoding.
- * Minimal supported modes are defined statically, modified if more supported
- * cache modes are available.
- * Index into __cachemode2pte_tbl is the cachemode.
- * Index into __pte2cachemode_tbl are the caching attribute bits of the pte
- * (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT) at index bit positions 0, 1, 2.
+ *
+ * Minimal supported modes are defined statically, they are modified
+ * during bootup if more supported cache modes are available.
+ *
+ *   Index into __cachemode2pte_tbl[] is the cachemode.
+ *
+ *   Index into __pte2cachemode_tbl[] are the caching attribute bits of the pte
+ *   (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT) at index bit positions 0, 1, 2.
  */
 uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM] = {
-	[_PAGE_CACHE_MODE_WB]		= 0,
-	[_PAGE_CACHE_MODE_WC]		= _PAGE_PWT,
-	[_PAGE_CACHE_MODE_UC_MINUS]	= _PAGE_PCD,
-	[_PAGE_CACHE_MODE_UC]		= _PAGE_PCD | _PAGE_PWT,
-	[_PAGE_CACHE_MODE_WT]		= _PAGE_PCD,
-	[_PAGE_CACHE_MODE_WP]		= _PAGE_PCD,
+	[_PAGE_CACHE_MODE_WB      ]	= 0         | 0        ,
+	[_PAGE_CACHE_MODE_WC      ]	= _PAGE_PWT | 0        ,
+	[_PAGE_CACHE_MODE_UC_MINUS]	= 0         | _PAGE_PCD,
+	[_PAGE_CACHE_MODE_UC      ]	= _PAGE_PWT | _PAGE_PCD,
+	[_PAGE_CACHE_MODE_WT      ]	= 0         | _PAGE_PCD,
+	[_PAGE_CACHE_MODE_WP      ]	= 0         | _PAGE_PCD,
 };
 EXPORT_SYMBOL(__cachemode2pte_tbl);
+
 uint8_t __pte2cachemode_tbl[8] = {
-	[__pte2cm_idx(0)] = _PAGE_CACHE_MODE_WB,
-	[__pte2cm_idx(_PAGE_PWT)] = _PAGE_CACHE_MODE_WC,
-	[__pte2cm_idx(_PAGE_PCD)] = _PAGE_CACHE_MODE_UC_MINUS,
-	[__pte2cm_idx(_PAGE_PWT | _PAGE_PCD)] = _PAGE_CACHE_MODE_UC,
-	[__pte2cm_idx(_PAGE_PAT)] = _PAGE_CACHE_MODE_WB,
-	[__pte2cm_idx(_PAGE_PWT | _PAGE_PAT)] = _PAGE_CACHE_MODE_WC,
-	[__pte2cm_idx(_PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS,
+	[__pte2cm_idx( 0        | 0         | 0        )] = _PAGE_CACHE_MODE_WB,
+	[__pte2cm_idx(_PAGE_PWT | 0         | 0        )] = _PAGE_CACHE_MODE_WC,
+	[__pte2cm_idx( 0        | _PAGE_PCD | 0        )] = _PAGE_CACHE_MODE_UC_MINUS,
+	[__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | 0        )] = _PAGE_CACHE_MODE_UC,
+	[__pte2cm_idx( 0        | 0         | _PAGE_PAT)] = _PAGE_CACHE_MODE_WB,
+	[__pte2cm_idx(_PAGE_PWT | 0         | _PAGE_PAT)] = _PAGE_CACHE_MODE_WC,
+	[__pte2cm_idx(0         | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS,
 	[__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC,
 };
 EXPORT_SYMBOL(__pte2cachemode_tbl);
-- 
cgit v1.2.3


From 4e26d11f52684dc8b1632a8cfe450cb5197a8464 Mon Sep 17 00:00:00 2001
From: Hector Marco-Gisbert <hecmargi@upv.es>
Date: Fri, 27 Mar 2015 12:38:21 +0100
Subject: x86/mm: Improve AMD Bulldozer ASLR workaround

The ASLR implementation needs to special-case AMD F15h processors by
clearing out bits [14:12] of the virtual address in order to avoid I$
cross invalidations and thus performance penalty for certain workloads.
For details, see:

  dfb09f9b7ab0 ("x86, amd: Avoid cache aliasing penalties on AMD family 15h")

This special case reduces the mmapped file's entropy by 3 bits.

The following output is the run on an AMD Opteron 62xx class CPU
processor under x86_64 Linux 4.0.0:

  $ for i in `seq 1 10`; do cat /proc/self/maps | grep "r-xp.*libc" ; done
  b7588000-b7736000 r-xp 00000000 00:01 4924       /lib/i386-linux-gnu/libc.so.6
  b7570000-b771e000 r-xp 00000000 00:01 4924       /lib/i386-linux-gnu/libc.so.6
  b75d0000-b777e000 r-xp 00000000 00:01 4924       /lib/i386-linux-gnu/libc.so.6
  b75b0000-b775e000 r-xp 00000000 00:01 4924       /lib/i386-linux-gnu/libc.so.6
  b7578000-b7726000 r-xp 00000000 00:01 4924       /lib/i386-linux-gnu/libc.so.6
  ...

Bits [12:14] are always 0, i.e. the address always ends in 0x8000 or
0x0000.

32-bit systems, as in the example above, are especially sensitive
to this issue because 32-bit randomness for VA space is 8 bits (see
mmap_rnd()). With the Bulldozer special case, this diminishes to only 32
different slots of mmap virtual addresses.

This patch randomizes per boot the three affected bits rather than
setting them to zero. Since all the shared pages have the same value
at bits [12..14], there is no cache aliasing problems. This value gets
generated during system boot and it is thus not known to a potential
remote attacker. Therefore, the impact from the Bulldozer workaround
gets diminished and ASLR randomness increased.

More details at:

  http://hmarco.org/bugs/AMD-Bulldozer-linux-ASLR-weakness-reducing-mmaped-files-by-eight.html

Original white paper by AMD dealing with the issue:

  http://developer.amd.com/wordpress/media/2012/10/SharedL1InstructionCacheonAMD15hCPU.pdf

Mentored-by: Ismael Ripoll <iripoll@disca.upv.es>
Signed-off-by: Hector Marco-Gisbert <hecmargi@upv.es>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jan-Simon <dl9pf@gmx.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-fsdevel@vger.kernel.org
Link: http://lkml.kernel.org/r/1427456301-3764-1-git-send-email-hecmargi@upv.es
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/elf.h   |  1 +
 arch/x86/kernel/cpu/amd.c    |  4 ++++
 arch/x86/kernel/sys_x86_64.c | 30 +++++++++++++++++++++++++++---
 3 files changed, 32 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index ca3347a9dab5..bd292ce9be0a 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -365,6 +365,7 @@ enum align_flags {
 struct va_alignment {
 	int flags;
 	unsigned long mask;
+	unsigned long bits;
 } ____cacheline_aligned;
 
 extern struct va_alignment va_align;
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index a220239cea65..ec6a61b21b41 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -5,6 +5,7 @@
 
 #include <linux/io.h>
 #include <linux/sched.h>
+#include <linux/random.h>
 #include <asm/processor.h>
 #include <asm/apic.h>
 #include <asm/cpu.h>
@@ -488,6 +489,9 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
 
 		va_align.mask	  = (upperbit - 1) & PAGE_MASK;
 		va_align.flags    = ALIGN_VA_32 | ALIGN_VA_64;
+
+		/* A random value per boot for bit slice [12:upper_bit) */
+		va_align.bits = get_random_int() & va_align.mask;
 	}
 }
 
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 30277e27431a..10e0272d789a 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -34,10 +34,26 @@ static unsigned long get_align_mask(void)
 	return va_align.mask;
 }
 
+/*
+ * To avoid aliasing in the I$ on AMD F15h, the bits defined by the
+ * va_align.bits, [12:upper_bit), are set to a random value instead of
+ * zeroing them. This random value is computed once per boot. This form
+ * of ASLR is known as "per-boot ASLR".
+ *
+ * To achieve this, the random value is added to the info.align_offset
+ * value before calling vm_unmapped_area() or ORed directly to the
+ * address.
+ */
+static unsigned long get_align_bits(void)
+{
+	return va_align.bits & get_align_mask();
+}
+
 unsigned long align_vdso_addr(unsigned long addr)
 {
 	unsigned long align_mask = get_align_mask();
-	return (addr + align_mask) & ~align_mask;
+	addr = (addr + align_mask) & ~align_mask;
+	return addr | get_align_bits();
 }
 
 static int __init control_va_addr_alignment(char *str)
@@ -135,8 +151,12 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 	info.length = len;
 	info.low_limit = begin;
 	info.high_limit = end;
-	info.align_mask = filp ? get_align_mask() : 0;
+	info.align_mask = 0;
 	info.align_offset = pgoff << PAGE_SHIFT;
+	if (filp) {
+		info.align_mask = get_align_mask();
+		info.align_offset += get_align_bits();
+	}
 	return vm_unmapped_area(&info);
 }
 
@@ -174,8 +194,12 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 	info.length = len;
 	info.low_limit = PAGE_SIZE;
 	info.high_limit = mm->mmap_base;
-	info.align_mask = filp ? get_align_mask() : 0;
+	info.align_mask = 0;
 	info.align_offset = pgoff << PAGE_SHIFT;
+	if (filp) {
+		info.align_mask = get_align_mask();
+		info.align_offset += get_align_bits();
+	}
 	addr = vm_unmapped_area(&info);
 	if (!(addr & ~PAGE_MASK))
 		return addr;
-- 
cgit v1.2.3