summaryrefslogtreecommitdiff
path: root/arch/x86/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/dump_pagetables.c17
-rw-r--r--arch/x86/mm/fault.c12
-rw-r--r--arch/x86/mm/ident_map.c2
-rw-r--r--arch/x86/mm/init_64.c8
-rw-r--r--arch/x86/mm/kasan_init_64.c14
-rw-r--r--arch/x86/mm/kaslr.c8
-rw-r--r--arch/x86/mm/numa.c22
-rw-r--r--arch/x86/mm/pageattr.c44
-rw-r--r--arch/x86/mm/pkeys.c21
-rw-r--r--arch/x86/mm/pti.c26
-rw-r--r--arch/x86/mm/tlb.c2
11 files changed, 110 insertions, 66 deletions
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 62a7e9f65dec..2f3c9196b834 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -18,6 +18,7 @@
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/seq_file.h>
+#include <linux/highmem.h>
#include <asm/pgtable.h>
@@ -334,16 +335,16 @@ static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
pgprotval_t eff_in, unsigned long P)
{
int i;
- pte_t *start;
+ pte_t *pte;
pgprotval_t prot, eff;
- start = (pte_t *)pmd_page_vaddr(addr);
for (i = 0; i < PTRS_PER_PTE; i++) {
- prot = pte_flags(*start);
- eff = effective_prot(eff_in, prot);
st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT);
+ pte = pte_offset_map(&addr, st->current_address);
+ prot = pte_flags(*pte);
+ eff = effective_prot(eff_in, prot);
note_page(m, st, __pgprot(prot), eff, 5);
- start++;
+ pte_unmap(pte);
}
}
#ifdef CONFIG_KASAN
@@ -359,7 +360,7 @@ static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st,
void *pt)
{
if (__pa(pt) == __pa(kasan_zero_pmd) ||
- (pgtable_l5_enabled && __pa(pt) == __pa(kasan_zero_p4d)) ||
+ (pgtable_l5_enabled() && __pa(pt) == __pa(kasan_zero_p4d)) ||
__pa(pt) == __pa(kasan_zero_pud)) {
pgprotval_t prot = pte_flags(kasan_zero_pte[0]);
note_page(m, st, __pgprot(prot), 0, 5);
@@ -475,8 +476,8 @@ static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
}
}
-#define pgd_large(a) (pgtable_l5_enabled ? pgd_large(a) : p4d_large(__p4d(pgd_val(a))))
-#define pgd_none(a) (pgtable_l5_enabled ? pgd_none(a) : p4d_none(__p4d(pgd_val(a))))
+#define pgd_large(a) (pgtable_l5_enabled() ? pgd_large(a) : p4d_large(__p4d(pgd_val(a))))
+#define pgd_none(a) (pgtable_l5_enabled() ? pgd_none(a) : p4d_none(__p4d(pgd_val(a))))
static inline bool is_hypervisor_range(int idx)
{
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 73bd8c95ac71..9a84a0d08727 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -209,6 +209,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address,
unsigned lsb = 0;
siginfo_t info;
+ clear_siginfo(&info);
info.si_signo = si_signo;
info.si_errno = 0;
info.si_code = si_code;
@@ -439,7 +440,7 @@ static noinline int vmalloc_fault(unsigned long address)
if (pgd_none(*pgd_k))
return -1;
- if (pgtable_l5_enabled) {
+ if (pgtable_l5_enabled()) {
if (pgd_none(*pgd)) {
set_pgd(pgd, *pgd_k);
arch_flush_lazy_mmu_mode();
@@ -454,7 +455,7 @@ static noinline int vmalloc_fault(unsigned long address)
if (p4d_none(*p4d_k))
return -1;
- if (p4d_none(*p4d) && !pgtable_l5_enabled) {
+ if (p4d_none(*p4d) && !pgtable_l5_enabled()) {
set_p4d(p4d, *p4d_k);
arch_flush_lazy_mmu_mode();
} else {
@@ -828,6 +829,8 @@ static inline void
show_signal_msg(struct pt_regs *regs, unsigned long error_code,
unsigned long address, struct task_struct *tsk)
{
+ const char *loglvl = task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG;
+
if (!unhandled_signal(tsk, SIGSEGV))
return;
@@ -835,13 +838,14 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
return;
printk("%s%s[%d]: segfault at %lx ip %px sp %px error %lx",
- task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
- tsk->comm, task_pid_nr(tsk), address,
+ loglvl, tsk->comm, task_pid_nr(tsk), address,
(void *)regs->ip, (void *)regs->sp, error_code);
print_vma_addr(KERN_CONT " in ", regs->ip);
printk(KERN_CONT "\n");
+
+ show_opcodes((u8 *)regs->ip, loglvl);
}
static void
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index a2f0c7e20fb0..fe7a12599d8e 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -123,7 +123,7 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
result = ident_p4d_init(info, p4d, addr, next);
if (result)
return result;
- if (pgtable_l5_enabled) {
+ if (pgtable_l5_enabled()) {
set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag));
} else {
/*
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 0a400606dea0..17383f9677fa 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -180,7 +180,7 @@ static void sync_global_pgds_l4(unsigned long start, unsigned long end)
*/
void sync_global_pgds(unsigned long start, unsigned long end)
{
- if (pgtable_l5_enabled)
+ if (pgtable_l5_enabled())
sync_global_pgds_l5(start, end);
else
sync_global_pgds_l4(start, end);
@@ -643,7 +643,7 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
unsigned long vaddr = (unsigned long)__va(paddr);
int i = p4d_index(vaddr);
- if (!pgtable_l5_enabled)
+ if (!pgtable_l5_enabled())
return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, page_size_mask);
for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) {
@@ -723,7 +723,7 @@ kernel_physical_mapping_init(unsigned long paddr_start,
page_size_mask);
spin_lock(&init_mm.page_table_lock);
- if (pgtable_l5_enabled)
+ if (pgtable_l5_enabled())
pgd_populate(&init_mm, pgd, p4d);
else
p4d_populate(&init_mm, p4d_offset(pgd, vaddr), (pud_t *) p4d);
@@ -1100,7 +1100,7 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end,
* 5-level case we should free them. This code will have to change
* to adapt for boot-time switching between 4 and 5 level page tables.
*/
- if (pgtable_l5_enabled)
+ if (pgtable_l5_enabled())
free_pud_table(pud_base, p4d);
}
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 980dbebd0ca7..e3e77527f8df 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -2,10 +2,8 @@
#define DISABLE_BRANCH_PROFILING
#define pr_fmt(fmt) "kasan: " fmt
-#ifdef CONFIG_X86_5LEVEL
-/* Too early to use cpu_feature_enabled() */
-#define pgtable_l5_enabled __pgtable_l5_enabled
-#endif
+/* cpu_feature_enabled() cannot be used this early */
+#define USE_EARLY_PGTABLE_L5
#include <linux/bootmem.h>
#include <linux/kasan.h>
@@ -182,7 +180,7 @@ static void __init clear_pgds(unsigned long start,
* With folded p4d, pgd_clear() is nop, use p4d_clear()
* instead.
*/
- if (pgtable_l5_enabled)
+ if (pgtable_l5_enabled())
pgd_clear(pgd);
else
p4d_clear(p4d_offset(pgd, start));
@@ -197,7 +195,7 @@ static inline p4d_t *early_p4d_offset(pgd_t *pgd, unsigned long addr)
{
unsigned long p4d;
- if (!pgtable_l5_enabled)
+ if (!pgtable_l5_enabled())
return (p4d_t *)pgd;
p4d = __pa_nodebug(pgd_val(*pgd)) & PTE_PFN_MASK;
@@ -284,7 +282,7 @@ void __init kasan_early_init(void)
for (i = 0; i < PTRS_PER_PUD; i++)
kasan_zero_pud[i] = __pud(pud_val);
- for (i = 0; pgtable_l5_enabled && i < PTRS_PER_P4D; i++)
+ for (i = 0; pgtable_l5_enabled() && i < PTRS_PER_P4D; i++)
kasan_zero_p4d[i] = __p4d(p4d_val);
kasan_map_early_shadow(early_top_pgt);
@@ -315,7 +313,7 @@ void __init kasan_init(void)
* bunch of things like kernel code, modules, EFI mapping, etc.
* We need to take extra steps to not overwrite them.
*/
- if (pgtable_l5_enabled) {
+ if (pgtable_l5_enabled()) {
void *ptr;
ptr = (void *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END));
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 615cc03ced84..61db77b0eda9 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -78,7 +78,7 @@ void __init kernel_randomize_memory(void)
struct rnd_state rand_state;
unsigned long remain_entropy;
- vaddr_start = pgtable_l5_enabled ? __PAGE_OFFSET_BASE_L5 : __PAGE_OFFSET_BASE_L4;
+ vaddr_start = pgtable_l5_enabled() ? __PAGE_OFFSET_BASE_L5 : __PAGE_OFFSET_BASE_L4;
vaddr = vaddr_start;
/*
@@ -124,7 +124,7 @@ void __init kernel_randomize_memory(void)
*/
entropy = remain_entropy / (ARRAY_SIZE(kaslr_regions) - i);
prandom_bytes_state(&rand_state, &rand, sizeof(rand));
- if (pgtable_l5_enabled)
+ if (pgtable_l5_enabled())
entropy = (rand % (entropy + 1)) & P4D_MASK;
else
entropy = (rand % (entropy + 1)) & PUD_MASK;
@@ -136,7 +136,7 @@ void __init kernel_randomize_memory(void)
* randomization alignment.
*/
vaddr += get_padding(&kaslr_regions[i]);
- if (pgtable_l5_enabled)
+ if (pgtable_l5_enabled())
vaddr = round_up(vaddr + 1, P4D_SIZE);
else
vaddr = round_up(vaddr + 1, PUD_SIZE);
@@ -212,7 +212,7 @@ void __meminit init_trampoline(void)
return;
}
- if (pgtable_l5_enabled)
+ if (pgtable_l5_enabled())
init_trampoline_p4d();
else
init_trampoline_pud();
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 25504d5aa816..fa150855647c 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -136,13 +136,13 @@ static int __init numa_add_memblk_to(int nid, u64 start, u64 end,
/* whine about and ignore invalid blks */
if (start > end || nid < 0 || nid >= MAX_NUMNODES) {
- pr_warning("NUMA: Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n",
- nid, start, end - 1);
+ pr_warn("Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n",
+ nid, start, end - 1);
return 0;
}
if (mi->nr_blks >= NR_NODE_MEMBLKS) {
- pr_err("NUMA: too many memblk ranges\n");
+ pr_err("too many memblk ranges\n");
return -EINVAL;
}
@@ -267,14 +267,14 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
*/
if (bi->end > bj->start && bi->start < bj->end) {
if (bi->nid != bj->nid) {
- pr_err("NUMA: node %d [mem %#010Lx-%#010Lx] overlaps with node %d [mem %#010Lx-%#010Lx]\n",
+ pr_err("node %d [mem %#010Lx-%#010Lx] overlaps with node %d [mem %#010Lx-%#010Lx]\n",
bi->nid, bi->start, bi->end - 1,
bj->nid, bj->start, bj->end - 1);
return -EINVAL;
}
- pr_warning("NUMA: Warning: node %d [mem %#010Lx-%#010Lx] overlaps with itself [mem %#010Lx-%#010Lx]\n",
- bi->nid, bi->start, bi->end - 1,
- bj->start, bj->end - 1);
+ pr_warn("Warning: node %d [mem %#010Lx-%#010Lx] overlaps with itself [mem %#010Lx-%#010Lx]\n",
+ bi->nid, bi->start, bi->end - 1,
+ bj->start, bj->end - 1);
}
/*
@@ -364,7 +364,7 @@ static int __init numa_alloc_distance(void)
phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
size, PAGE_SIZE);
if (!phys) {
- pr_warning("NUMA: Warning: can't allocate distance table!\n");
+ pr_warn("Warning: can't allocate distance table!\n");
/* don't retry until explicitly reset */
numa_distance = (void *)1LU;
return -ENOMEM;
@@ -410,14 +410,14 @@ void __init numa_set_distance(int from, int to, int distance)
if (from >= numa_distance_cnt || to >= numa_distance_cnt ||
from < 0 || to < 0) {
- pr_warn_once("NUMA: Warning: node ids are out of bound, from=%d to=%d distance=%d\n",
- from, to, distance);
+ pr_warn_once("Warning: node ids are out of bound, from=%d to=%d distance=%d\n",
+ from, to, distance);
return;
}
if ((u8)distance != distance ||
(from == to && distance != LOCAL_DISTANCE)) {
- pr_warn_once("NUMA: Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
+ pr_warn_once("Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
from, to, distance);
return;
}
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 0f3d50f4c48c..3bded76e8d5c 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -93,6 +93,18 @@ void arch_report_meminfo(struct seq_file *m)
static inline void split_page_count(int level) { }
#endif
+static inline int
+within(unsigned long addr, unsigned long start, unsigned long end)
+{
+ return addr >= start && addr < end;
+}
+
+static inline int
+within_inclusive(unsigned long addr, unsigned long start, unsigned long end)
+{
+ return addr >= start && addr <= end;
+}
+
#ifdef CONFIG_X86_64
static inline unsigned long highmap_start_pfn(void)
@@ -106,20 +118,25 @@ static inline unsigned long highmap_end_pfn(void)
return __pa_symbol(roundup(_brk_end, PMD_SIZE) - 1) >> PAGE_SHIFT;
}
-#endif
-
-static inline int
-within(unsigned long addr, unsigned long start, unsigned long end)
+static bool __cpa_pfn_in_highmap(unsigned long pfn)
{
- return addr >= start && addr < end;
+ /*
+ * Kernel text has an alias mapping at a high address, known
+ * here as "highmap".
+ */
+ return within_inclusive(pfn, highmap_start_pfn(), highmap_end_pfn());
}
-static inline int
-within_inclusive(unsigned long addr, unsigned long start, unsigned long end)
+#else
+
+static bool __cpa_pfn_in_highmap(unsigned long pfn)
{
- return addr >= start && addr <= end;
+ /* There is no highmap on 32-bit */
+ return false;
}
+#endif
+
/*
* Flushing functions
*/
@@ -172,7 +189,7 @@ static void __cpa_flush_all(void *arg)
static void cpa_flush_all(unsigned long cache)
{
- BUG_ON(irqs_disabled());
+ BUG_ON(irqs_disabled() && !early_boot_irqs_disabled);
on_each_cpu(__cpa_flush_all, (void *) cache, 1);
}
@@ -236,7 +253,7 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache,
unsigned long do_wbinvd = cache && numpages >= 1024; /* 4M threshold */
#endif
- BUG_ON(irqs_disabled());
+ BUG_ON(irqs_disabled() && !early_boot_irqs_disabled);
on_each_cpu(__cpa_flush_all, (void *) do_wbinvd, 1);
@@ -1183,6 +1200,10 @@ static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr,
cpa->numpages = 1;
cpa->pfn = __pa(vaddr) >> PAGE_SHIFT;
return 0;
+
+ } else if (__cpa_pfn_in_highmap(cpa->pfn)) {
+ /* Faults in the highmap are OK, so do not warn: */
+ return -EFAULT;
} else {
WARN(1, KERN_WARNING "CPA: called for zero pte. "
"vaddr = %lx cpa->vaddr = %lx\n", vaddr,
@@ -1335,8 +1356,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
* to touch the high mapped kernel as well:
*/
if (!within(vaddr, (unsigned long)_text, _brk_end) &&
- within_inclusive(cpa->pfn, highmap_start_pfn(),
- highmap_end_pfn())) {
+ __cpa_pfn_in_highmap(cpa->pfn)) {
unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) +
__START_KERNEL_map - phys_base;
alias_cpa = *cpa;
diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c
index d7bc0eea20a5..6e98e0a7c923 100644
--- a/arch/x86/mm/pkeys.c
+++ b/arch/x86/mm/pkeys.c
@@ -94,26 +94,27 @@ int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey
*/
if (pkey != -1)
return pkey;
- /*
- * Look for a protection-key-drive execute-only mapping
- * which is now being given permissions that are not
- * execute-only. Move it back to the default pkey.
- */
- if (vma_is_pkey_exec_only(vma) &&
- (prot & (PROT_READ|PROT_WRITE))) {
- return 0;
- }
+
/*
* The mapping is execute-only. Go try to get the
* execute-only protection key. If we fail to do that,
* fall through as if we do not have execute-only
- * support.
+ * support in this mm.
*/
if (prot == PROT_EXEC) {
pkey = execute_only_pkey(vma->vm_mm);
if (pkey > 0)
return pkey;
+ } else if (vma_is_pkey_exec_only(vma)) {
+ /*
+ * Protections are *not* PROT_EXEC, but the mapping
+ * is using the exec-only pkey. This mapping was
+ * PROT_EXEC and will no longer be. Move back to
+ * the default pkey.
+ */
+ return ARCH_DEFAULT_PKEY;
}
+
/*
* This is a vanilla, non-pkey mprotect (or we failed to
* setup execute-only), inherit the pkey from the VMA we
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index f1fd52f449e0..4d418e705878 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -421,6 +421,16 @@ static inline bool pti_kernel_image_global_ok(void)
if (boot_cpu_has(X86_FEATURE_K8))
return false;
+ /*
+ * RANDSTRUCT derives its hardening benefits from the
+ * attacker's lack of knowledge about the layout of kernel
+ * data structures. Keep the kernel image non-global in
+ * cases where RANDSTRUCT is in use to help keep the layout a
+ * secret.
+ */
+ if (IS_ENABLED(CONFIG_GCC_PLUGIN_RANDSTRUCT))
+ return false;
+
return true;
}
@@ -430,12 +440,24 @@ static inline bool pti_kernel_image_global_ok(void)
*/
void pti_clone_kernel_text(void)
{
+ /*
+ * rodata is part of the kernel image and is normally
+ * readable on the filesystem or on the web. But, do not
+ * clone the areas past rodata, they might contain secrets.
+ */
unsigned long start = PFN_ALIGN(_text);
- unsigned long end = ALIGN((unsigned long)_end, PMD_PAGE_SIZE);
+ unsigned long end = (unsigned long)__end_rodata_hpage_align;
if (!pti_kernel_image_global_ok())
return;
+ pr_debug("mapping partial kernel image into user address space\n");
+
+ /*
+ * Note that this will undo _some_ of the work that
+ * pti_set_kernel_image_nonglobal() did to clear the
+ * global bit.
+ */
pti_clone_pmds(start, end, _PAGE_RW);
}
@@ -458,8 +480,6 @@ void pti_set_kernel_image_nonglobal(void)
if (pti_kernel_image_global_ok())
return;
- pr_debug("set kernel image non-global\n");
-
set_memory_nonglobal(start, (end - start) >> PAGE_SHIFT);
}
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index e055d1a06699..6eb1f34c3c85 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -157,7 +157,7 @@ static void sync_current_stack_to_mm(struct mm_struct *mm)
unsigned long sp = current_stack_pointer;
pgd_t *pgd = pgd_offset(mm, sp);
- if (pgtable_l5_enabled) {
+ if (pgtable_l5_enabled()) {
if (unlikely(pgd_none(*pgd))) {
pgd_t *pgd_ref = pgd_offset_k(sp);