From 414ebf148cb5c5fa727ec51fdb69c4ab82dccf3b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 25 Oct 2022 21:39:43 +0200 Subject: x86/mm: Do verify W^X at boot up Straight up revert of commit: a970174d7a10 ("x86/mm: Do not verify W^X at boot up") now that the root cause has been fixed. Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221025201058.011279208@infradead.org --- arch/x86/mm/pat/set_memory.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/x86/mm/pat/set_memory.c') diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 2e5a045731de..97342c42dda8 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -587,10 +587,6 @@ static inline pgprot_t verify_rwx(pgprot_t old, pgprot_t new, unsigned long star { unsigned long end; - /* Kernel text is rw at boot up */ - if (system_state == SYSTEM_BOOTING) - return new; - /* * 32-bit has some unfixable W+X issues, like EFI code * and writeable data being in the same page. Disable -- cgit v1.2.3 From 60463628c9e0a8060ac6bef0457b0505c7532c7c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 29 Oct 2022 13:19:31 +0200 Subject: x86/mm: Implement native set_memory_rox() Provide a native implementation of set_memory_rox(), avoiding the double set_memory_ro();set_memory_x(); calls. Suggested-by: Linus Torvalds Signed-off-by: Peter Zijlstra (Intel) --- arch/x86/include/asm/set_memory.h | 3 +++ arch/x86/mm/pat/set_memory.c | 10 ++++++++++ include/linux/set_memory.h | 2 ++ 3 files changed, 15 insertions(+) (limited to 'arch/x86/mm/pat/set_memory.c') diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index b45c4d27fd46..a5e89641bd2d 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -6,6 +6,9 @@ #include #include +#define set_memory_rox set_memory_rox +int set_memory_rox(unsigned long addr, int numpages); + /* * The set_memory_* API can be used to change various attributes of a virtual * address range. The attributes include: diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 97342c42dda8..f275605892df 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -2025,6 +2025,16 @@ int set_memory_ro(unsigned long addr, int numpages) return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0); } +int set_memory_rox(unsigned long addr, int numpages) +{ + pgprot_t clr = __pgprot(_PAGE_RW); + + if (__supported_pte_mask & _PAGE_NX) + clr.pgprot |= _PAGE_NX; + + return change_page_attr_clear(&addr, numpages, clr, 0); +} + int set_memory_rw(unsigned long addr, int numpages) { return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0); diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h index 023ebc67a36c..95ac8398ee72 100644 --- a/include/linux/set_memory.h +++ b/include/linux/set_memory.h @@ -14,6 +14,7 @@ static inline int set_memory_x(unsigned long addr, int numpages) { return 0; } static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; } #endif +#ifndef set_memory_rox static inline int set_memory_rox(unsigned long addr, int numpages) { int ret = set_memory_ro(addr, numpages); @@ -21,6 +22,7 @@ static inline int set_memory_rox(unsigned long addr, int numpages) return ret; return set_memory_x(addr, numpages); } +#endif #ifndef CONFIG_ARCH_HAS_SET_DIRECT_MAP static inline int set_direct_map_invalid_noflush(struct page *page) -- cgit v1.2.3 From 82328227db8f0b9b5f77bb5afcd47e59d0e4d08f Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Mon, 16 May 2022 18:52:02 +0000 Subject: x86/mm: Remove P*D_PAGE_MASK and P*D_PAGE_SIZE macros Other architectures and the common mm/ use P*D_MASK, and P*D_SIZE. Remove the duplicated P*D_PAGE_MASK and P*D_PAGE_SIZE which are only used in x86/*. Signed-off-by: Pasha Tatashin Signed-off-by: Borislav Petkov Reviewed-by: Anshuman Khandual Acked-by: Mike Rapoport Link: https://lore.kernel.org/r/20220516185202.604654-1-tatashin@google.com --- arch/x86/include/asm/page_types.h | 12 +++--------- arch/x86/kernel/amd_gart_64.c | 2 +- arch/x86/kernel/head64.c | 2 +- arch/x86/mm/mem_encrypt_boot.S | 4 ++-- arch/x86/mm/mem_encrypt_identity.c | 18 +++++++++--------- arch/x86/mm/pat/set_memory.c | 6 +++--- arch/x86/mm/pti.c | 2 +- 7 files changed, 20 insertions(+), 26 deletions(-) (limited to 'arch/x86/mm/pat/set_memory.c') diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index a506a411474d..86bd4311daf8 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -11,20 +11,14 @@ #define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) -#define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT) -#define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1)) - -#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) -#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) - #define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1) -/* Cast *PAGE_MASK to a signed type so that it is sign-extended if +/* Cast P*D_MASK to a signed type so that it is sign-extended if virtual addresses are 32-bits but physical addresses are larger (ie, 32-bit PAE). */ #define PHYSICAL_PAGE_MASK (((signed long)PAGE_MASK) & __PHYSICAL_MASK) -#define PHYSICAL_PMD_PAGE_MASK (((signed long)PMD_PAGE_MASK) & __PHYSICAL_MASK) -#define PHYSICAL_PUD_PAGE_MASK (((signed long)PUD_PAGE_MASK) & __PHYSICAL_MASK) +#define PHYSICAL_PMD_PAGE_MASK (((signed long)PMD_MASK) & __PHYSICAL_MASK) +#define PHYSICAL_PUD_PAGE_MASK (((signed long)PUD_MASK) & __PHYSICAL_MASK) #define HPAGE_SHIFT PMD_SHIFT #define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT) diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index 19a0207e529f..56a917df410d 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -504,7 +504,7 @@ static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) } a = aper + iommu_size; - iommu_size -= round_up(a, PMD_PAGE_SIZE) - a; + iommu_size -= round_up(a, PMD_SIZE) - a; if (iommu_size < 64*1024*1024) { pr_warn("PCI-DMA: Warning: Small IOMMU %luMB." diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 6a3cfaf6b72a..387e4b12e823 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -203,7 +203,7 @@ unsigned long __head __startup_64(unsigned long physaddr, load_delta = physaddr - (unsigned long)(_text - __START_KERNEL_map); /* Is the address not 2M aligned? */ - if (load_delta & ~PMD_PAGE_MASK) + if (load_delta & ~PMD_MASK) for (;;); /* Include the SME encryption mask in the fixup value */ diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S index 9de3d900bc92..e25288ee33c2 100644 --- a/arch/x86/mm/mem_encrypt_boot.S +++ b/arch/x86/mm/mem_encrypt_boot.S @@ -26,7 +26,7 @@ SYM_FUNC_START(sme_encrypt_execute) * RCX - virtual address of the encryption workarea, including: * - stack page (PAGE_SIZE) * - encryption routine page (PAGE_SIZE) - * - intermediate copy buffer (PMD_PAGE_SIZE) + * - intermediate copy buffer (PMD_SIZE) * R8 - physical address of the pagetables to use for encryption */ @@ -123,7 +123,7 @@ SYM_FUNC_START(__enc_copy) wbinvd /* Invalidate any cache entries */ /* Copy/encrypt up to 2MB at a time */ - movq $PMD_PAGE_SIZE, %r12 + movq $PMD_SIZE, %r12 1: cmpq %r12, %r9 jnb 2f diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index f415498d3175..88cccd65029d 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -93,7 +93,7 @@ struct sme_populate_pgd_data { * section is 2MB aligned to allow for simple pagetable setup using only * PMD entries (see vmlinux.lds.S). */ -static char sme_workarea[2 * PMD_PAGE_SIZE] __section(".init.scratch"); +static char sme_workarea[2 * PMD_SIZE] __section(".init.scratch"); static char sme_cmdline_arg[] __initdata = "mem_encrypt"; static char sme_cmdline_on[] __initdata = "on"; @@ -198,8 +198,8 @@ static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd) while (ppd->vaddr < ppd->vaddr_end) { sme_populate_pgd_large(ppd); - ppd->vaddr += PMD_PAGE_SIZE; - ppd->paddr += PMD_PAGE_SIZE; + ppd->vaddr += PMD_SIZE; + ppd->paddr += PMD_SIZE; } } @@ -225,11 +225,11 @@ static void __init __sme_map_range(struct sme_populate_pgd_data *ppd, vaddr_end = ppd->vaddr_end; /* If start is not 2MB aligned, create PTE entries */ - ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_PAGE_SIZE); + ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_SIZE); __sme_map_range_pte(ppd); /* Create PMD entries */ - ppd->vaddr_end = vaddr_end & PMD_PAGE_MASK; + ppd->vaddr_end = vaddr_end & PMD_MASK; __sme_map_range_pmd(ppd); /* If end is not 2MB aligned, create PTE entries */ @@ -325,7 +325,7 @@ void __init sme_encrypt_kernel(struct boot_params *bp) /* Physical addresses gives us the identity mapped virtual addresses */ kernel_start = __pa_symbol(_text); - kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE); + kernel_end = ALIGN(__pa_symbol(_end), PMD_SIZE); kernel_len = kernel_end - kernel_start; initrd_start = 0; @@ -355,12 +355,12 @@ void __init sme_encrypt_kernel(struct boot_params *bp) * executable encryption area size: * stack page (PAGE_SIZE) * encryption routine page (PAGE_SIZE) - * intermediate copy buffer (PMD_PAGE_SIZE) + * intermediate copy buffer (PMD_SIZE) * pagetable structures for the encryption of the kernel * pagetable structures for workarea (in case not currently mapped) */ execute_start = workarea_start; - execute_end = execute_start + (PAGE_SIZE * 2) + PMD_PAGE_SIZE; + execute_end = execute_start + (PAGE_SIZE * 2) + PMD_SIZE; execute_len = execute_end - execute_start; /* @@ -383,7 +383,7 @@ void __init sme_encrypt_kernel(struct boot_params *bp) * before it is mapped. */ workarea_len = execute_len + pgtable_area_len; - workarea_end = ALIGN(workarea_start + workarea_len, PMD_PAGE_SIZE); + workarea_end = ALIGN(workarea_start + workarea_len, PMD_SIZE); /* * Set the address to the start of where newly created pagetable diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index f275605892df..06eb8910462f 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -743,11 +743,11 @@ phys_addr_t slow_virt_to_phys(void *__virt_addr) switch (level) { case PG_LEVEL_1G: phys_addr = (phys_addr_t)pud_pfn(*(pud_t *)pte) << PAGE_SHIFT; - offset = virt_addr & ~PUD_PAGE_MASK; + offset = virt_addr & ~PUD_MASK; break; case PG_LEVEL_2M: phys_addr = (phys_addr_t)pmd_pfn(*(pmd_t *)pte) << PAGE_SHIFT; - offset = virt_addr & ~PMD_PAGE_MASK; + offset = virt_addr & ~PMD_MASK; break; default: phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT; @@ -1037,7 +1037,7 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address, case PG_LEVEL_1G: ref_prot = pud_pgprot(*(pud_t *)kpte); ref_pfn = pud_pfn(*(pud_t *)kpte); - pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT; + pfninc = PMD_SIZE >> PAGE_SHIFT; lpaddr = address & PUD_MASK; lpinc = PMD_SIZE; /* diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index ffe3b3a087fe..78414c6d1b5e 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -592,7 +592,7 @@ static void pti_set_kernel_image_nonglobal(void) * of the image. */ unsigned long start = PFN_ALIGN(_text); - unsigned long end = ALIGN((unsigned long)_end, PMD_PAGE_SIZE); + unsigned long end = ALIGN((unsigned long)_end, PMD_SIZE); /* * This clears _PAGE_GLOBAL from the entire kernel image. -- cgit v1.2.3 From 5ceeee7571b7628f439ae0444ec41d132558f47e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 10 Nov 2022 13:33:50 +0100 Subject: x86/mm: Add a few comments It's a shame to hide useful comments in Changelogs, add some to the code. Shamelessly stolen from commit: c40a56a7818c ("x86/mm/init: Remove freed kernel image areas from alias mapping") Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221110125544.460677011%40infradead.org --- arch/x86/mm/pat/set_memory.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'arch/x86/mm/pat/set_memory.c') diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 06eb8910462f..50f81ea1fbad 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -219,6 +219,23 @@ within_inclusive(unsigned long addr, unsigned long start, unsigned long end) #ifdef CONFIG_X86_64 +/* + * The kernel image is mapped into two places in the virtual address space + * (addresses without KASLR, of course): + * + * 1. The kernel direct map (0xffff880000000000) + * 2. The "high kernel map" (0xffffffff81000000) + * + * We actually execute out of #2. If we get the address of a kernel symbol, it + * points to #2, but almost all physical-to-virtual translations point to #1. + * + * This is so that we can have both a directmap of all physical memory *and* + * take full advantage of the the limited (s32) immediate addressing range (2G) + * of x86_64. + * + * See Documentation/x86/x86_64/mm.rst for more detail. + */ + static inline unsigned long highmap_start_pfn(void) { return __pa_symbol(_text) >> PAGE_SHIFT; @@ -1626,6 +1643,9 @@ repeat: static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias); +/* + * Check the directmap and "high kernel map" 'aliases'. + */ static int cpa_process_alias(struct cpa_data *cpa) { struct cpa_data alias_cpa; -- cgit v1.2.3 From ef9ab81af6e1f7b7ff589aa1504434aa5915c1df Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 10 Nov 2022 13:33:54 +0100 Subject: x86/mm: Untangle __change_page_attr_set_clr(.checkalias) The .checkalias argument to __change_page_attr_set_clr() is overloaded and serves two different purposes: - it inhibits the call to cpa_process_alias() -- as suggested by the name; however, - it also serves as 'primary' indicator for __change_page_attr() ( which in turn also serves as a recursion terminator for cpa_process_alias() ). Untangle these by extending the use of CPA_NO_CHECK_ALIAS to all callsites that currently use .checkalias=0 for this purpose. Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221110125544.527267183%40infradead.org --- arch/x86/mm/pat/set_memory.c | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) (limited to 'arch/x86/mm/pat/set_memory.c') diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 50f81ea1fbad..4943f6c5d8d2 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -1727,7 +1727,7 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) if (ret) goto out; - if (checkalias) { + if (checkalias && !(cpa->flags & CPA_NO_CHECK_ALIAS)) { ret = cpa_process_alias(cpa); if (ret) goto out; @@ -1801,18 +1801,12 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, cpa.numpages = numpages; cpa.mask_set = mask_set; cpa.mask_clr = mask_clr; - cpa.flags = 0; + cpa.flags = in_flag; cpa.curpage = 0; cpa.force_split = force_split; - if (in_flag & (CPA_ARRAY | CPA_PAGES_ARRAY)) - cpa.flags |= in_flag; - /* No alias checking for _NX bit modifications */ checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX; - /* Has caller explicitly disabled alias checking? */ - if (in_flag & CPA_NO_CHECK_ALIAS) - checkalias = 0; ret = __change_page_attr_set_clr(&cpa, checkalias); @@ -2067,11 +2061,9 @@ int set_memory_np(unsigned long addr, int numpages) int set_memory_np_noalias(unsigned long addr, int numpages) { - int cpa_flags = CPA_NO_CHECK_ALIAS; - return change_page_attr_set_clr(&addr, numpages, __pgprot(0), __pgprot(_PAGE_PRESENT), 0, - cpa_flags, NULL); + CPA_NO_CHECK_ALIAS, NULL); } int set_memory_4k(unsigned long addr, int numpages) @@ -2288,7 +2280,7 @@ static int __set_pages_p(struct page *page, int numpages) .numpages = numpages, .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW), .mask_clr = __pgprot(0), - .flags = 0}; + .flags = CPA_NO_CHECK_ALIAS }; /* * No alias checking needed for setting present flag. otherwise, @@ -2296,7 +2288,7 @@ static int __set_pages_p(struct page *page, int numpages) * mappings (this adds to complexity if we want to do this from * atomic context especially). Let's keep it simple! */ - return __change_page_attr_set_clr(&cpa, 0); + return __change_page_attr_set_clr(&cpa, 1); } static int __set_pages_np(struct page *page, int numpages) @@ -2307,7 +2299,7 @@ static int __set_pages_np(struct page *page, int numpages) .numpages = numpages, .mask_set = __pgprot(0), .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW), - .flags = 0}; + .flags = CPA_NO_CHECK_ALIAS }; /* * No alias checking needed for setting not present flag. otherwise, @@ -2315,7 +2307,7 @@ static int __set_pages_np(struct page *page, int numpages) * mappings (this adds to complexity if we want to do this from * atomic context especially). Let's keep it simple! */ - return __change_page_attr_set_clr(&cpa, 0); + return __change_page_attr_set_clr(&cpa, 1); } int set_direct_map_invalid_noflush(struct page *page) @@ -2386,7 +2378,7 @@ int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, .numpages = numpages, .mask_set = __pgprot(0), .mask_clr = __pgprot(~page_flags & (_PAGE_NX|_PAGE_RW)), - .flags = 0, + .flags = CPA_NO_CHECK_ALIAS, }; WARN_ONCE(num_online_cpus() > 1, "Don't call after initializing SMP"); @@ -2399,7 +2391,7 @@ int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, cpa.mask_set = __pgprot(_PAGE_PRESENT | page_flags); - retval = __change_page_attr_set_clr(&cpa, 0); + retval = __change_page_attr_set_clr(&cpa, 1); __flush_tlb_all(); out: @@ -2429,12 +2421,12 @@ int __init kernel_unmap_pages_in_pgd(pgd_t *pgd, unsigned long address, .numpages = numpages, .mask_set = __pgprot(0), .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW), - .flags = 0, + .flags = CPA_NO_CHECK_ALIAS, }; WARN_ONCE(num_online_cpus() > 1, "Don't call after initializing SMP"); - retval = __change_page_attr_set_clr(&cpa, 0); + retval = __change_page_attr_set_clr(&cpa, 1); __flush_tlb_all(); return retval; -- cgit v1.2.3 From d597416683d587e940faa35945fba162329b5a71 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 10 Nov 2022 13:33:57 +0100 Subject: x86/mm: Inhibit _PAGE_NX changes from cpa_process_alias() There is a cludge in change_page_attr_set_clr() that inhibits propagating NX changes to the aliases (directmap and highmap) -- this is a cludge twofold: - it also inhibits the primary checks in __change_page_attr(); - it hard depends on single bit changes. The introduction of set_memory_rox() triggered this last issue for clearing both _PAGE_RW and _PAGE_NX. Explicitly ignore _PAGE_NX in cpa_process_alias() instead. Fixes: b38994948567 ("x86/mm: Implement native set_memory_rox()") Reported-by: kernel test robot Debugged-by: Dave Hansen Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221110125544.594991716%40infradead.org --- arch/x86/mm/pat/set_memory.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) (limited to 'arch/x86/mm/pat/set_memory.c') diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 4943f6c5d8d2..beef77417115 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -1669,6 +1669,12 @@ static int cpa_process_alias(struct cpa_data *cpa) alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); alias_cpa.curpage = 0; + /* Directmap always has NX set, do not modify. */ + if (__supported_pte_mask & _PAGE_NX) { + alias_cpa.mask_clr.pgprot &= ~_PAGE_NX; + alias_cpa.mask_set.pgprot &= ~_PAGE_NX; + } + cpa->force_flush_all = 1; ret = __change_page_attr_set_clr(&alias_cpa, 0); @@ -1691,6 +1697,15 @@ static int cpa_process_alias(struct cpa_data *cpa) alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); alias_cpa.curpage = 0; + /* + * [_text, _brk_end) also covers data, do not modify NX except + * in cases where the highmap is the primary target. + */ + if (__supported_pte_mask & _PAGE_NX) { + alias_cpa.mask_clr.pgprot &= ~_PAGE_NX; + alias_cpa.mask_set.pgprot &= ~_PAGE_NX; + } + cpa->force_flush_all = 1; /* * The high mapping range is imprecise, so ignore the @@ -1709,6 +1724,12 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) unsigned long rempages = numpages; int ret = 0; + /* + * No changes, easy! + */ + if (!(pgprot_val(cpa->mask_set) | pgprot_val(cpa->mask_clr))) + return ret; + while (rempages) { /* * Store the remaining nr of pages for the large page @@ -1755,7 +1776,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, struct page **pages) { struct cpa_data cpa; - int ret, cache, checkalias; + int ret, cache; memset(&cpa, 0, sizeof(cpa)); @@ -1805,10 +1826,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, cpa.curpage = 0; cpa.force_split = force_split; - /* No alias checking for _NX bit modifications */ - checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX; - - ret = __change_page_attr_set_clr(&cpa, checkalias); + ret = __change_page_attr_set_clr(&cpa, 1); /* * Check whether we really changed something: -- cgit v1.2.3 From e996365ee7475805d2a01312532855004e89df84 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 10 Nov 2022 13:34:00 +0100 Subject: x86/mm: Rename __change_page_attr_set_clr(.checkalias) Now that the checkalias functionality is taken by CPA_NO_CHECK_ALIAS rename the argument to better match is remaining purpose: primary, matching __change_page_attr(). Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221110125544.661001508%40infradead.org --- arch/x86/mm/pat/set_memory.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/mm/pat/set_memory.c') diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index beef77417115..220361ceb997 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -1641,7 +1641,7 @@ repeat: return err; } -static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias); +static int __change_page_attr_set_clr(struct cpa_data *cpa, int primary); /* * Check the directmap and "high kernel map" 'aliases'. @@ -1718,7 +1718,7 @@ static int cpa_process_alias(struct cpa_data *cpa) return 0; } -static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) +static int __change_page_attr_set_clr(struct cpa_data *cpa, int primary) { unsigned long numpages = cpa->numpages; unsigned long rempages = numpages; @@ -1742,13 +1742,13 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) if (!debug_pagealloc_enabled()) spin_lock(&cpa_lock); - ret = __change_page_attr(cpa, checkalias); + ret = __change_page_attr(cpa, primary); if (!debug_pagealloc_enabled()) spin_unlock(&cpa_lock); if (ret) goto out; - if (checkalias && !(cpa->flags & CPA_NO_CHECK_ALIAS)) { + if (primary && !(cpa->flags & CPA_NO_CHECK_ALIAS)) { ret = cpa_process_alias(cpa); if (ret) goto out; -- cgit v1.2.3 From 3e844d842d49cdbe61a4b338bdd512654179488a Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 18 Nov 2022 07:16:16 -0800 Subject: x86/mm: Ensure forced page table splitting There are a few kernel users like kfence that require 4k pages to work correctly and do not support large mappings. They use set_memory_4k() to break down those large mappings. That, in turn relies on cpa_data->force_split option to indicate to set_memory code that it should split page tables regardless of whether the need to be. But, a recent change added an optimization which would return early if a set_memory request came in that did not change permissions. It did not consult ->force_split and would mistakenly optimize away the splitting that set_memory_4k() needs. This broke kfence. Skip the same-permission optimization when ->force_split is set. Fixes: 127960a05548 ("x86/mm: Inhibit _PAGE_NX changes from cpa_process_alias()") Signed-off-by: Dave Hansen Tested-by: Marco Elver Cc: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/CA+G9fYuFxZTxkeS35VTZMXwQvohu73W3xbZ5NtjebsVvH6hCuA@mail.gmail.com/ --- arch/x86/mm/pat/set_memory.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/mm/pat/set_memory.c') diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 220361ceb997..0db69514fe29 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -1727,7 +1727,8 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int primary) /* * No changes, easy! */ - if (!(pgprot_val(cpa->mask_set) | pgprot_val(cpa->mask_clr))) + if (!(pgprot_val(cpa->mask_set) | pgprot_val(cpa->mask_clr)) && + !cpa->force_split) return ret; while (rempages) { -- cgit v1.2.3