From f72a85e347810c76eb246e68751233aad3b84ac8 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 4 Dec 2017 07:49:11 +0530 Subject: powerpc/mm/book3s/64: Add proper pte access check helper pte_access_premitted get called in get_user_pages_fast path. If we have marked the pte PROT_NONE, we should not allow a read access on the address. With the current implementation we are not checking the READ and only check for WRITE. This is needed on archs like ppc64 that implement PROT_NONE using RWX access instead of _PAGE_PRESENT. Also add pte_user check just to make sure we are not accessing kernel mapping. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/pgtable.h | 41 ++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'arch/powerpc/include/asm/book3s/64/pgtable.h') diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 44697817ccc6..23b03449e2f1 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -546,6 +546,30 @@ static inline int pte_present(pte_t pte) { return !!(pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT)); } + +#define pte_access_permitted pte_access_permitted +static inline bool pte_access_permitted(pte_t pte, bool write) +{ + unsigned long pteval = pte_val(pte); + /* Also check for pte_user */ + unsigned long clear_pte_bits = _PAGE_PRIVILEGED; + /* + * _PAGE_READ is needed for any access and will be + * cleared for PROT_NONE + */ + unsigned long need_pte_bits = _PAGE_PRESENT | _PAGE_READ; + + if (write) + need_pte_bits |= _PAGE_WRITE; + + if ((pteval & need_pte_bits) != need_pte_bits) + return false; + + if ((pteval & clear_pte_bits) == clear_pte_bits) + return false; + return true; +} + /* * Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. @@ -850,6 +874,11 @@ static inline int pud_bad(pud_t pud) return hash__pud_bad(pud); } +#define pud_access_permitted pud_access_permitted +static inline bool pud_access_permitted(pud_t pud, bool write) +{ + return pte_access_permitted(pud_pte(pud), write); +} #define pgd_write(pgd) pte_write(pgd_pte(pgd)) static inline void pgd_set(pgd_t *pgdp, unsigned long val) @@ -889,6 +918,12 @@ static inline int pgd_bad(pgd_t pgd) return hash__pgd_bad(pgd); } +#define pgd_access_permitted pgd_access_permitted +static inline bool pgd_access_permitted(pgd_t pgd, bool write) +{ + return pte_access_permitted(pgd_pte(pgd), write); +} + extern struct page *pgd_page(pgd_t pgd); /* Pointers in the page table tree are physical addresses */ @@ -1009,6 +1044,12 @@ static inline int pmd_protnone(pmd_t pmd) #define __pmd_write(pmd) __pte_write(pmd_pte(pmd)) #define pmd_savedwrite(pmd) pte_savedwrite(pmd_pte(pmd)) +#define pmd_access_permitted pmd_access_permitted +static inline bool pmd_access_permitted(pmd_t pmd, bool write) +{ + return pte_access_permitted(pmd_pte(pmd), write); +} + #ifdef CONFIG_TRANSPARENT_HUGEPAGE extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot); extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot); -- cgit v1.2.3 From 812fadcb941a81d1f3948b10a95a4dce663da3e4 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Jan 2018 13:45:27 +0100 Subject: powerpc/mm: extend _PAGE_PRIVILEGED to all CPUs commit ac29c64089b74 ("powerpc/mm: Replace _PAGE_USER with _PAGE_PRIVILEGED") introduced _PAGE_PRIVILEGED for BOOK3S/64 This patch generalises _PAGE_PRIVILEGED for all CPUs, allowing to have either _PAGE_PRIVILEGED or _PAGE_USER or both. PPC_8xx has a _PAGE_SHARED flag which is set for and only for all non user pages. Lets rename it _PAGE_PRIVILEGED to remove confusion as it has nothing to do with Linux shared pages. On BookE, there's a _PAGE_BAP_SR which has to be set for kernel pages: defining _PAGE_PRIVILEGED as _PAGE_BAP_SR will make this generic Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/pgtable.h | 2 +- arch/powerpc/include/asm/nohash/32/pte-8xx.h | 10 +--------- arch/powerpc/include/asm/nohash/pte-book3e.h | 1 + arch/powerpc/include/asm/pte-common.h | 24 ++++++++++++++++-------- arch/powerpc/kernel/head_8xx.S | 6 +++--- arch/powerpc/mm/8xx_mmu.c | 2 +- arch/powerpc/mm/dump_linuxpagetables.c | 11 +---------- arch/powerpc/mm/pgtable.c | 3 ++- arch/powerpc/mm/pgtable_32.c | 9 +-------- arch/powerpc/mm/pgtable_64.c | 14 +------------- 10 files changed, 28 insertions(+), 54 deletions(-) (limited to 'arch/powerpc/include/asm/book3s/64/pgtable.h') diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 23b03449e2f1..8e28febedac3 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -15,7 +15,7 @@ #define _PAGE_BIT_SWAP_TYPE 0 #define _PAGE_RO 0 -#define _PAGE_SHARED 0 +#define _PAGE_USER 0 #define _PAGE_EXEC 0x00001 /* execute permission */ #define _PAGE_WRITE 0x00002 /* write access allowed */ diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h index 19a5ecaef265..7c7040f015e2 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h @@ -31,7 +31,7 @@ /* Definitions for 8xx embedded chips. */ #define _PAGE_PRESENT 0x0001 /* Page is valid */ #define _PAGE_NO_CACHE 0x0002 /* I: cache inhibit */ -#define _PAGE_SHARED 0x0004 /* No ASID (context) compare */ +#define _PAGE_PRIVILEGED 0x0004 /* No ASID (context) compare */ #define _PAGE_SPECIAL 0x0008 /* SW entry, forced to 0 by the TLB miss */ #define _PAGE_DIRTY 0x0100 /* C: page changed */ @@ -54,13 +54,5 @@ /* Until my rework is finished, 8xx still needs atomic PTE updates */ #define PTE_ATOMIC_UPDATES 1 -/* We need to add _PAGE_SHARED to kernel pages */ -#define _PAGE_KERNEL_RO (_PAGE_SHARED | _PAGE_RO) -#define _PAGE_KERNEL_ROX (_PAGE_SHARED | _PAGE_RO | _PAGE_EXEC) -#define _PAGE_KERNEL_RW (_PAGE_SHARED | _PAGE_DIRTY | _PAGE_RW | \ - _PAGE_HWWRITE) -#define _PAGE_KERNEL_RWX (_PAGE_SHARED | _PAGE_DIRTY | _PAGE_RW | \ - _PAGE_HWWRITE | _PAGE_EXEC) - #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_NOHASH_32_PTE_8xx_H */ diff --git a/arch/powerpc/include/asm/nohash/pte-book3e.h b/arch/powerpc/include/asm/nohash/pte-book3e.h index 2da4532ca377..ccee8eb509bb 100644 --- a/arch/powerpc/include/asm/nohash/pte-book3e.h +++ b/arch/powerpc/include/asm/nohash/pte-book3e.h @@ -55,6 +55,7 @@ #define _PAGE_KERNEL_RWX (_PAGE_BAP_SW | _PAGE_BAP_SR | _PAGE_DIRTY | _PAGE_BAP_SX) #define _PAGE_KERNEL_ROX (_PAGE_BAP_SR | _PAGE_BAP_SX) #define _PAGE_USER (_PAGE_BAP_UR | _PAGE_BAP_SR) /* Can be read */ +#define _PAGE_PRIVILEGED (_PAGE_BAP_SR) #define _PAGE_HASHPTE 0 #define _PAGE_BUSY 0 diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h index ce142ef99ba7..0e6595a1b9d8 100644 --- a/arch/powerpc/include/asm/pte-common.h +++ b/arch/powerpc/include/asm/pte-common.h @@ -8,9 +8,6 @@ #ifndef _PAGE_HASHPTE #define _PAGE_HASHPTE 0 #endif -#ifndef _PAGE_SHARED -#define _PAGE_SHARED 0 -#endif #ifndef _PAGE_HWWRITE #define _PAGE_HWWRITE 0 #endif @@ -45,6 +42,14 @@ #ifndef _PAGE_PTE #define _PAGE_PTE 0 #endif +/* At least one of _PAGE_PRIVILEGED or _PAGE_USER must be defined */ +#ifndef _PAGE_PRIVILEGED +#define _PAGE_PRIVILEGED 0 +#else +#ifndef _PAGE_USER +#define _PAGE_USER 0 +#endif +#endif #ifndef _PMD_PRESENT_MASK #define _PMD_PRESENT_MASK _PMD_PRESENT @@ -54,16 +59,18 @@ #define PMD_PAGE_SIZE(pmd) bad_call_to_PMD_PAGE_SIZE() #endif #ifndef _PAGE_KERNEL_RO -#define _PAGE_KERNEL_RO (_PAGE_RO) +#define _PAGE_KERNEL_RO (_PAGE_PRIVILEGED | _PAGE_RO) #endif #ifndef _PAGE_KERNEL_ROX -#define _PAGE_KERNEL_ROX (_PAGE_EXEC | _PAGE_RO) +#define _PAGE_KERNEL_ROX (_PAGE_PRIVILEGED | _PAGE_RO | _PAGE_EXEC) #endif #ifndef _PAGE_KERNEL_RW -#define _PAGE_KERNEL_RW (_PAGE_DIRTY | _PAGE_RW | _PAGE_HWWRITE) +#define _PAGE_KERNEL_RW (_PAGE_PRIVILEGED | _PAGE_DIRTY | _PAGE_RW | \ + _PAGE_HWWRITE) #endif #ifndef _PAGE_KERNEL_RWX -#define _PAGE_KERNEL_RWX (_PAGE_DIRTY | _PAGE_RW | _PAGE_HWWRITE | _PAGE_EXEC) +#define _PAGE_KERNEL_RWX (_PAGE_PRIVILEGED | _PAGE_DIRTY | _PAGE_RW | \ + _PAGE_HWWRITE | _PAGE_EXEC) #endif #ifndef _PAGE_HPTEFLAGS #define _PAGE_HPTEFLAGS _PAGE_HASHPTE @@ -85,7 +92,7 @@ extern unsigned long bad_call_to_PMD_PAGE_SIZE(void); */ static inline bool pte_user(pte_t pte) { - return (pte_val(pte) & _PAGE_USER) == _PAGE_USER; + return (pte_val(pte) & (_PAGE_USER | _PAGE_PRIVILEGED)) == _PAGE_USER; } #endif /* __ASSEMBLY__ */ @@ -116,6 +123,7 @@ static inline bool pte_user(pte_t pte) #define PAGE_PROT_BITS (_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \ _PAGE_WRITETHRU | _PAGE_ENDIAN | _PAGE_4K_PFN | \ _PAGE_USER | _PAGE_ACCESSED | _PAGE_RO | \ + _PAGE_PRIVILEGED | \ _PAGE_RW | _PAGE_HWWRITE | _PAGE_DIRTY | _PAGE_EXEC) /* diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 6399dcadf51d..642680389b7e 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -678,7 +678,7 @@ DTLBMissIMMR: mtspr SPRN_MD_TWC, r10 mfspr r10, SPRN_IMMR /* Get current IMMR */ rlwinm r10, r10, 0, 0xfff80000 /* Get 512 kbytes boundary */ - ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \ + ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \ _PAGE_PRESENT | _PAGE_NO_CACHE mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ @@ -696,7 +696,7 @@ DTLBMissLinear: li r11, MD_PS8MEG | MD_SVALID mtspr SPRN_MD_TWC, r11 rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */ - ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \ + ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \ _PAGE_PRESENT mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ @@ -715,7 +715,7 @@ ITLBMissLinear: li r11, MI_PS8MEG | MI_SVALID | _PAGE_EXEC mtspr SPRN_MI_TWC, r11 rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */ - ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \ + ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \ _PAGE_PRESENT mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c index f29212e40f40..c2d9a6d709c3 100644 --- a/arch/powerpc/mm/8xx_mmu.c +++ b/arch/powerpc/mm/8xx_mmu.c @@ -67,7 +67,7 @@ void __init MMU_init_hw(void) /* PIN up to the 3 first 8Mb after IMMR in DTLB table */ #ifdef CONFIG_PIN_TLB_DATA unsigned long ctr = mfspr(SPRN_MD_CTR) & 0xfe000000; - unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY; + unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY; #ifdef CONFIG_PIN_TLB_IMMR int i = 29; #else diff --git a/arch/powerpc/mm/dump_linuxpagetables.c b/arch/powerpc/mm/dump_linuxpagetables.c index 139dd1993eeb..d87ab2cabaa6 100644 --- a/arch/powerpc/mm/dump_linuxpagetables.c +++ b/arch/powerpc/mm/dump_linuxpagetables.c @@ -112,13 +112,8 @@ struct flag_info { static const struct flag_info flag_array[] = { { -#ifdef CONFIG_PPC_BOOK3S_64 - .mask = _PAGE_PRIVILEGED, - .val = 0, -#else - .mask = _PAGE_USER, + .mask = _PAGE_USER | _PAGE_PRIVILEGED, .val = _PAGE_USER, -#endif .set = "user", .clear = " ", }, { @@ -229,10 +224,6 @@ static const struct flag_info flag_array[] = { .mask = _PAGE_SPECIAL, .val = _PAGE_SPECIAL, .set = "special", - }, { - .mask = _PAGE_SHARED, - .val = _PAGE_SHARED, - .set = "shared", } }; diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index a03ff3d99e0c..9f361ae571e9 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -54,7 +54,8 @@ static inline int pte_looks_normal(pte_t pte) return 0; #else return (pte_val(pte) & - (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER)) == + (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER | + _PAGE_PRIVILEGED)) == (_PAGE_PRESENT | _PAGE_USER); #endif } diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index f6c7f54c0515..d35d9ad3c1cd 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -98,14 +98,7 @@ ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long flags) /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */ flags &= ~(_PAGE_USER | _PAGE_EXEC); - -#ifdef _PAGE_BAP_SR - /* _PAGE_USER contains _PAGE_BAP_SR on BookE using the new PTE format - * which means that we just cleared supervisor access... oops ;-) This - * restores it - */ - flags |= _PAGE_BAP_SR; -#endif + flags |= _PAGE_PRIVILEGED; return __ioremap_caller(addr, size, flags, __builtin_return_address(0)); } diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 813ea22c3e00..c9a623c2d8a2 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -244,20 +244,8 @@ void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size, /* * Force kernel mapping. */ -#if defined(CONFIG_PPC_BOOK3S_64) - flags |= _PAGE_PRIVILEGED; -#else flags &= ~_PAGE_USER; -#endif - - -#ifdef _PAGE_BAP_SR - /* _PAGE_USER contains _PAGE_BAP_SR on BookE using the new PTE format - * which means that we just cleared supervisor access... oops ;-) This - * restores it - */ - flags |= _PAGE_BAP_SR; -#endif + flags |= _PAGE_PRIVILEGED; if (ppc_md.ioremap) return ppc_md.ioremap(addr, size, flags, caller); -- cgit v1.2.3 From 351750331fc1580cdb60d2efef04238f5faa89fe Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Jan 2018 13:45:29 +0100 Subject: powerpc/mm: Introduce _PAGE_NA Today, PAGE_NONE is defined as a page not having _PAGE_USER. In some circunstances, when the CPU supports it, it might be better to be able to flag a page with NO ACCESS. In a following patch, the 8xx will switch user access being flagged in the PMD, therefore it will not be possible anymore to use _PAGE_USER as a way to flag a page with no access. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/pgtable.h | 1 + arch/powerpc/include/asm/nohash/32/pgtable.h | 2 +- arch/powerpc/include/asm/pte-common.h | 7 +++++-- arch/powerpc/mm/dump_linuxpagetables.c | 18 +++++++++++------- 4 files changed, 18 insertions(+), 10 deletions(-) (limited to 'arch/powerpc/include/asm/book3s/64/pgtable.h') diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 8e28febedac3..60a0d7fd82bc 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -14,6 +14,7 @@ */ #define _PAGE_BIT_SWAP_TYPE 0 +#define _PAGE_NA 0 #define _PAGE_RO 0 #define _PAGE_USER 0 diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index cc2bfec3aa3b..504a3c36ce5c 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -282,7 +282,7 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm, { unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC); - unsigned long clr = ~pte_val(entry) & _PAGE_RO; + unsigned long clr = ~pte_val(entry) & (_PAGE_RO | _PAGE_NA); pte_update(ptep, clr, set); } diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h index 0e6595a1b9d8..426a902816c5 100644 --- a/arch/powerpc/include/asm/pte-common.h +++ b/arch/powerpc/include/asm/pte-common.h @@ -50,6 +50,9 @@ #define _PAGE_USER 0 #endif #endif +#ifndef _PAGE_NA +#define _PAGE_NA 0 +#endif #ifndef _PMD_PRESENT_MASK #define _PMD_PRESENT_MASK _PMD_PRESENT @@ -122,7 +125,7 @@ static inline bool pte_user(pte_t pte) /* Mask of bits returned by pte_pgprot() */ #define PAGE_PROT_BITS (_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \ _PAGE_WRITETHRU | _PAGE_ENDIAN | _PAGE_4K_PFN | \ - _PAGE_USER | _PAGE_ACCESSED | _PAGE_RO | \ + _PAGE_USER | _PAGE_ACCESSED | _PAGE_RO | _PAGE_NA | \ _PAGE_PRIVILEGED | \ _PAGE_RW | _PAGE_HWWRITE | _PAGE_DIRTY | _PAGE_EXEC) @@ -150,7 +153,7 @@ static inline bool pte_user(pte_t pte) * * Note due to the way vm flags are laid out, the bits are XWR */ -#define PAGE_NONE __pgprot(_PAGE_BASE) +#define PAGE_NONE __pgprot(_PAGE_BASE | _PAGE_NA) #define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW) #define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | \ _PAGE_EXEC) diff --git a/arch/powerpc/mm/dump_linuxpagetables.c b/arch/powerpc/mm/dump_linuxpagetables.c index d87ab2cabaa6..876e2a3c79f2 100644 --- a/arch/powerpc/mm/dump_linuxpagetables.c +++ b/arch/powerpc/mm/dump_linuxpagetables.c @@ -117,16 +117,20 @@ static const struct flag_info flag_array[] = { .set = "user", .clear = " ", }, { -#if _PAGE_RO == 0 - .mask = _PAGE_RW, + .mask = _PAGE_RW | _PAGE_RO | _PAGE_NA, .val = _PAGE_RW, -#else - .mask = _PAGE_RO, - .val = 0, -#endif .set = "rw", - .clear = "ro", }, { + .mask = _PAGE_RW | _PAGE_RO | _PAGE_NA, + .val = _PAGE_RO, + .set = "ro", + }, { +#if _PAGE_NA != 0 + .mask = _PAGE_RW | _PAGE_RO | _PAGE_NA, + .val = _PAGE_RO, + .set = "na", + }, { +#endif .mask = _PAGE_EXEC, .val = _PAGE_EXEC, .set = " X ", -- cgit v1.2.3 From eb95d016ce3d3404e061c7c85d4362094dc34b3f Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Thu, 18 Jan 2018 17:50:35 -0800 Subject: powerpc: map vma key-protection bits to pte key bits. Map the key protection bits of the vma to the pkey bits in the PTE. The PTE bits used for pkey are 3,4,5,6 and 57. The first four bits are the same four bits that were freed up initially in this patch series. remember? :-) Without those four bits this patch wouldn't be possible. BUT, on 4k kernel, bit 3, and 4 could not be freed up. remember? Hence we have to be satisfied with 5, 6 and 7. Signed-off-by: Ram Pai Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/pgtable.h | 25 ++++++++++++++++++++++++- arch/powerpc/include/asm/mman.h | 6 ++++++ arch/powerpc/include/asm/pkeys.h | 12 ++++++++++++ 3 files changed, 42 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include/asm/book3s/64/pgtable.h') diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 60a0d7fd82bc..26285fc17baf 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -40,6 +40,7 @@ #define _RPAGE_RSV2 0x0800000000000000UL #define _RPAGE_RSV3 0x0400000000000000UL #define _RPAGE_RSV4 0x0200000000000000UL +#define _RPAGE_RSV5 0x00040UL #define _PAGE_PTE 0x4000000000000000UL /* distinguishes PTEs from pointers */ #define _PAGE_PRESENT 0x8000000000000000UL /* pte contains a translation */ @@ -59,6 +60,25 @@ /* Max physical address bit as per radix table */ #define _RPAGE_PA_MAX 57 +#ifdef CONFIG_PPC_MEM_KEYS +#ifdef CONFIG_PPC_64K_PAGES +#define H_PTE_PKEY_BIT0 _RPAGE_RSV1 +#define H_PTE_PKEY_BIT1 _RPAGE_RSV2 +#else /* CONFIG_PPC_64K_PAGES */ +#define H_PTE_PKEY_BIT0 0 /* _RPAGE_RSV1 is not available */ +#define H_PTE_PKEY_BIT1 0 /* _RPAGE_RSV2 is not available */ +#endif /* CONFIG_PPC_64K_PAGES */ +#define H_PTE_PKEY_BIT2 _RPAGE_RSV3 +#define H_PTE_PKEY_BIT3 _RPAGE_RSV4 +#define H_PTE_PKEY_BIT4 _RPAGE_RSV5 +#else /* CONFIG_PPC_MEM_KEYS */ +#define H_PTE_PKEY_BIT0 0 +#define H_PTE_PKEY_BIT1 0 +#define H_PTE_PKEY_BIT2 0 +#define H_PTE_PKEY_BIT3 0 +#define H_PTE_PKEY_BIT4 0 +#endif /* CONFIG_PPC_MEM_KEYS */ + /* * Max physical address bit we will use for now. * @@ -122,13 +142,16 @@ #define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \ _PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE | \ _PAGE_SOFT_DIRTY) + +#define H_PTE_PKEY (H_PTE_PKEY_BIT0 | H_PTE_PKEY_BIT1 | H_PTE_PKEY_BIT2 | \ + H_PTE_PKEY_BIT3 | H_PTE_PKEY_BIT4) /* * Mask of bits returned by pte_pgprot() */ #define PAGE_PROT_BITS (_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT | \ H_PAGE_4K_PFN | _PAGE_PRIVILEGED | _PAGE_ACCESSED | \ _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_EXEC | \ - _PAGE_SOFT_DIRTY) + _PAGE_SOFT_DIRTY | H_PTE_PKEY) /* * We define 2 sets of base prot bits, one for basic pages (ie, * cacheable kernel and user pages) and one for non cacheable diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h index 29994788b9f3..07e3f54de9e3 100644 --- a/arch/powerpc/include/asm/mman.h +++ b/arch/powerpc/include/asm/mman.h @@ -33,7 +33,13 @@ static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, static inline pgprot_t arch_vm_get_page_prot(unsigned long vm_flags) { +#ifdef CONFIG_PPC_MEM_KEYS + return (vm_flags & VM_SAO) ? + __pgprot(_PAGE_SAO | vmflag_to_pte_pkey_bits(vm_flags)) : + __pgprot(0 | vmflag_to_pte_pkey_bits(vm_flags)); +#else return (vm_flags & VM_SAO) ? __pgprot(_PAGE_SAO) : __pgprot(0); +#endif } #define arch_vm_get_page_prot(vm_flags) arch_vm_get_page_prot(vm_flags) diff --git a/arch/powerpc/include/asm/pkeys.h b/arch/powerpc/include/asm/pkeys.h index 24e82260052f..03d8d5053126 100644 --- a/arch/powerpc/include/asm/pkeys.h +++ b/arch/powerpc/include/asm/pkeys.h @@ -41,6 +41,18 @@ static inline u64 pkey_to_vmflag_bits(u16 pkey) return (((u64)pkey << VM_PKEY_SHIFT) & ARCH_VM_PKEY_FLAGS); } +static inline u64 vmflag_to_pte_pkey_bits(u64 vm_flags) +{ + if (static_branch_likely(&pkey_disabled)) + return 0x0UL; + + return (((vm_flags & VM_PKEY_BIT0) ? H_PTE_PKEY_BIT4 : 0x0UL) | + ((vm_flags & VM_PKEY_BIT1) ? H_PTE_PKEY_BIT3 : 0x0UL) | + ((vm_flags & VM_PKEY_BIT2) ? H_PTE_PKEY_BIT2 : 0x0UL) | + ((vm_flags & VM_PKEY_BIT3) ? H_PTE_PKEY_BIT1 : 0x0UL) | + ((vm_flags & VM_PKEY_BIT4) ? H_PTE_PKEY_BIT0 : 0x0UL)); +} + static inline int vma_pkey(struct vm_area_struct *vma) { if (static_branch_likely(&pkey_disabled)) -- cgit v1.2.3 From f2407ef3ba225665ee24965f69bc84435fb590cf Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Thu, 18 Jan 2018 17:50:37 -0800 Subject: powerpc: helper to validate key-access permissions of a pte helper function that checks if the read/write/execute is allowed on the pte. Signed-off-by: Ram Pai Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/pgtable.h | 2 ++ arch/powerpc/include/asm/pkeys.h | 9 +++++++++ arch/powerpc/mm/pkeys.c | 28 ++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+) (limited to 'arch/powerpc/include/asm/book3s/64/pgtable.h') diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 26285fc17baf..449b797f8b7b 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -571,6 +571,8 @@ static inline int pte_present(pte_t pte) return !!(pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT)); } +extern bool arch_pte_access_permitted(u64 pte, bool write, bool execute); + #define pte_access_permitted pte_access_permitted static inline bool pte_access_permitted(pte_t pte, bool write) { diff --git a/arch/powerpc/include/asm/pkeys.h b/arch/powerpc/include/asm/pkeys.h index a8ef7fa5360e..2298771b066b 100644 --- a/arch/powerpc/include/asm/pkeys.h +++ b/arch/powerpc/include/asm/pkeys.h @@ -71,6 +71,15 @@ static inline u64 pte_to_hpte_pkey_bits(u64 pteflags) ((pteflags & H_PTE_PKEY_BIT4) ? HPTE_R_KEY_BIT4 : 0x0UL)); } +static inline u16 pte_to_pkey_bits(u64 pteflags) +{ + return (((pteflags & H_PTE_PKEY_BIT0) ? 0x10 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT1) ? 0x8 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT2) ? 0x4 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT3) ? 0x2 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT4) ? 0x1 : 0x0UL)); +} + #define pkey_alloc_mask(pkey) (0x1 << pkey) #define mm_pkey_allocation_map(mm) (mm->context.pkey_allocation_map) diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c index 010c90c1f5da..5071778fdd20 100644 --- a/arch/powerpc/mm/pkeys.c +++ b/arch/powerpc/mm/pkeys.c @@ -358,3 +358,31 @@ int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, /* Nothing to override. */ return vma_pkey(vma); } + +static bool pkey_access_permitted(int pkey, bool write, bool execute) +{ + int pkey_shift; + u64 amr; + + if (!pkey) + return true; + + if (!is_pkey_enabled(pkey)) + return true; + + pkey_shift = pkeyshift(pkey); + if (execute && !(read_iamr() & (IAMR_EX_BIT << pkey_shift))) + return true; + + amr = read_amr(); /* Delay reading amr until absolutely needed */ + return ((!write && !(amr & (AMR_RD_BIT << pkey_shift))) || + (write && !(amr & (AMR_WR_BIT << pkey_shift)))); +} + +bool arch_pte_access_permitted(u64 pte, bool write, bool execute) +{ + if (static_branch_likely(&pkey_disabled)) + return true; + + return pkey_access_permitted(pte_to_pkey_bits(pte), write, execute); +} -- cgit v1.2.3 From bca7aacfe8be121ecefb3be609420220b0820c2c Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Thu, 18 Jan 2018 17:50:38 -0800 Subject: powerpc: check key protection for user page access Make sure that the kernel does not access user pages without checking their key-protection. Signed-off-by: Ram Pai [mpe: Integrate with upstream version of pte_access_permitted()] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/pgtable.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include/asm/book3s/64/pgtable.h') diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 449b797f8b7b..1c495068bcfa 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -571,7 +571,14 @@ static inline int pte_present(pte_t pte) return !!(pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT)); } +#ifdef CONFIG_PPC_MEM_KEYS extern bool arch_pte_access_permitted(u64 pte, bool write, bool execute); +#else +static inline bool arch_pte_access_permitted(u64 pte, bool write, bool execute) +{ + return true; +} +#endif /* CONFIG_PPC_MEM_KEYS */ #define pte_access_permitted pte_access_permitted static inline bool pte_access_permitted(pte_t pte, bool write) @@ -593,7 +600,8 @@ static inline bool pte_access_permitted(pte_t pte, bool write) if ((pteval & clear_pte_bits) == clear_pte_bits) return false; - return true; + + return arch_pte_access_permitted(pte_val(pte), write, 0); } /* -- cgit v1.2.3 From 8cc931e03339eebbdbaa2ac1998d25a8a90b77d4 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 31 Jan 2018 16:18:02 -0800 Subject: powerpc/mm: update pmdp_invalidate to return old pmd value It's required to avoid losing dirty and accessed bits. Link: http://lkml.kernel.org/r/20171213105756.69879-7-kirill.shutemov@linux.intel.com Signed-off-by: Aneesh Kumar K.V Signed-off-by: Kirill A. Shutemov Cc: Vlastimil Babka Cc: Andrea Arcangeli Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/include/asm/book3s/64/pgtable.h | 4 ++-- arch/powerpc/mm/pgtable-book3s64.c | 7 +++++-- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/include/asm/book3s/64/pgtable.h') diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 44697817ccc6..ee19d5bbee06 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1137,8 +1137,8 @@ static inline pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, } #define __HAVE_ARCH_PMDP_INVALIDATE -extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, - pmd_t *pmdp); +extern pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, + pmd_t *pmdp); #define __HAVE_ARCH_PMDP_HUGE_SPLIT_PREPARE static inline void pmdp_huge_split_prepare(struct vm_area_struct *vma, diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c index 3b65917785a5..422e80253a33 100644 --- a/arch/powerpc/mm/pgtable-book3s64.c +++ b/arch/powerpc/mm/pgtable-book3s64.c @@ -90,16 +90,19 @@ void serialize_against_pte_lookup(struct mm_struct *mm) * We use this to invalidate a pmdp entry before switching from a * hugepte to regular pmd entry. */ -void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, +pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { - pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0); + unsigned long old_pmd; + + old_pmd = pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0); flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); /* * This ensures that generic code that rely on IRQ disabling * to prevent a parallel THP split work as expected. */ serialize_against_pte_lookup(vma->vm_mm); + return __pmd(old_pmd); } static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot) -- cgit v1.2.3 From 423ac9af3ceff967a77b0714781033629593b077 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 31 Jan 2018 16:18:24 -0800 Subject: mm/thp: remove pmd_huge_split_prepare() Instead of marking the pmd ready for split, invalidate the pmd. This should take care of powerpc requirement. Only side effect is that we mark the pmd invalid early. This can result in us blocking access to the page a bit longer if we race against a thp split. [kirill.shutemov@linux.intel.com: rebased, dirty THP once] Link: http://lkml.kernel.org/r/20171213105756.69879-13-kirill.shutemov@linux.intel.com Signed-off-by: Aneesh Kumar K.V Signed-off-by: Kirill A. Shutemov Cc: Andrea Arcangeli Cc: Catalin Marinas Cc: David Daney Cc: David Miller Cc: H. Peter Anvin Cc: Hugh Dickins Cc: Ingo Molnar Cc: Martin Schwidefsky Cc: Michal Hocko Cc: Nitin Gupta Cc: Ralf Baechle Cc: Thomas Gleixner Cc: Vineet Gupta Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/include/asm/book3s/64/hash-4k.h | 2 - arch/powerpc/include/asm/book3s/64/hash-64k.h | 2 - arch/powerpc/include/asm/book3s/64/pgtable.h | 9 ---- arch/powerpc/include/asm/book3s/64/radix.h | 6 --- arch/powerpc/mm/pgtable-hash64.c | 22 -------- include/asm-generic/pgtable.h | 8 --- mm/huge_memory.c | 72 +++++++++++++-------------- 7 files changed, 35 insertions(+), 86 deletions(-) (limited to 'arch/powerpc/include/asm/book3s/64/pgtable.h') diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index 197ced1eaaa0..2d9df40446f6 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -101,8 +101,6 @@ extern pmd_t hash__pmdp_collapse_flush(struct vm_area_struct *vma, extern void hash__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, pgtable_t pgtable); extern pgtable_t hash__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); -extern void hash__pmdp_huge_split_prepare(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp); extern pmd_t hash__pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp); extern int hash__has_transparent_hugepage(void); diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index 8d40cf03cb67..cb46d1034f33 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -203,8 +203,6 @@ extern pmd_t hash__pmdp_collapse_flush(struct vm_area_struct *vma, extern void hash__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, pgtable_t pgtable); extern pgtable_t hash__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); -extern void hash__pmdp_huge_split_prepare(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp); extern pmd_t hash__pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp); extern int hash__has_transparent_hugepage(void); diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index ee19d5bbee06..6ca1208cedcb 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1140,15 +1140,6 @@ static inline pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, extern pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp); -#define __HAVE_ARCH_PMDP_HUGE_SPLIT_PREPARE -static inline void pmdp_huge_split_prepare(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp) -{ - if (radix_enabled()) - return radix__pmdp_huge_split_prepare(vma, address, pmdp); - return hash__pmdp_huge_split_prepare(vma, address, pmdp); -} - #define pmd_move_must_withdraw pmd_move_must_withdraw struct spinlock; static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index 19c44e1495ae..365010f66570 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -269,12 +269,6 @@ static inline pmd_t radix__pmd_mkhuge(pmd_t pmd) return __pmd(pmd_val(pmd) | _PAGE_PTE | R_PAGE_LARGE); return __pmd(pmd_val(pmd) | _PAGE_PTE); } -static inline void radix__pmdp_huge_split_prepare(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp) -{ - /* Nothing to do for radix. */ - return; -} extern unsigned long radix__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, unsigned long clr, diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/pgtable-hash64.c index ec277913e01b..469808e77e58 100644 --- a/arch/powerpc/mm/pgtable-hash64.c +++ b/arch/powerpc/mm/pgtable-hash64.c @@ -296,28 +296,6 @@ pgtable_t hash__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) return pgtable; } -void hash__pmdp_huge_split_prepare(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp) -{ - VM_BUG_ON(address & ~HPAGE_PMD_MASK); - VM_BUG_ON(REGION_ID(address) != USER_REGION_ID); - VM_BUG_ON(pmd_devmap(*pmdp)); - - /* - * We can't mark the pmd none here, because that will cause a race - * against exit_mmap. We need to continue mark pmd TRANS HUGE, while - * we spilt, but at the same time we wan't rest of the ppc64 code - * not to insert hash pte on this, because we will be modifying - * the deposited pgtable in the caller of this function. Hence - * clear the _PAGE_USER so that we move the fault handling to - * higher level function and that will serialize against ptl. - * We need to flush existing hash pte entries here even though, - * the translation is still valid, because we will withdraw - * pgtable_t after this. - */ - pmd_hugepage_update(vma->vm_mm, address, pmdp, 0, _PAGE_PRIVILEGED); -} - /* * A linux hugepage PMD was changed and the corresponding hash table entries * neesd to be flushed. diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 51eebd7546b2..2cfa3075d148 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -329,14 +329,6 @@ extern pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp); #endif -#ifndef __HAVE_ARCH_PMDP_HUGE_SPLIT_PREPARE -static inline void pmdp_huge_split_prepare(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp) -{ - -} -#endif - #ifndef __HAVE_ARCH_PTE_SAME static inline int pte_same(pte_t pte_a, pte_t pte_b) { diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 2a79a6b7d19b..87ab9b8f56b5 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2063,7 +2063,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, struct mm_struct *mm = vma->vm_mm; struct page *page; pgtable_t pgtable; - pmd_t old, _pmd; + pmd_t old_pmd, _pmd; bool young, write, soft_dirty, pmd_migration = false; unsigned long addr; int i; @@ -2106,23 +2106,50 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, return __split_huge_zero_page_pmd(vma, haddr, pmd); } + /* + * Up to this point the pmd is present and huge and userland has the + * whole access to the hugepage during the split (which happens in + * place). If we overwrite the pmd with the not-huge version pointing + * to the pte here (which of course we could if all CPUs were bug + * free), userland could trigger a small page size TLB miss on the + * small sized TLB while the hugepage TLB entry is still established in + * the huge TLB. Some CPU doesn't like that. + * See http://support.amd.com/us/Processor_TechDocs/41322.pdf, Erratum + * 383 on page 93. Intel should be safe but is also warns that it's + * only safe if the permission and cache attributes of the two entries + * loaded in the two TLB is identical (which should be the case here). + * But it is generally safer to never allow small and huge TLB entries + * for the same virtual address to be loaded simultaneously. So instead + * of doing "pmd_populate(); flush_pmd_tlb_range();" we first mark the + * current pmd notpresent (atomically because here the pmd_trans_huge + * must remain set at all times on the pmd until the split is complete + * for this pmd), then we flush the SMP TLB and finally we write the + * non-huge version of the pmd entry with pmd_populate. + */ + old_pmd = pmdp_invalidate(vma, haddr, pmd); + #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION - pmd_migration = is_pmd_migration_entry(*pmd); + pmd_migration = is_pmd_migration_entry(old_pmd); if (pmd_migration) { swp_entry_t entry; - entry = pmd_to_swp_entry(*pmd); + entry = pmd_to_swp_entry(old_pmd); page = pfn_to_page(swp_offset(entry)); } else #endif - page = pmd_page(*pmd); + page = pmd_page(old_pmd); VM_BUG_ON_PAGE(!page_count(page), page); page_ref_add(page, HPAGE_PMD_NR - 1); - write = pmd_write(*pmd); - young = pmd_young(*pmd); - soft_dirty = pmd_soft_dirty(*pmd); + if (pmd_dirty(old_pmd)) + SetPageDirty(page); + write = pmd_write(old_pmd); + young = pmd_young(old_pmd); + soft_dirty = pmd_soft_dirty(old_pmd); - pmdp_huge_split_prepare(vma, haddr, pmd); + /* + * Withdraw the table only after we mark the pmd entry invalid. + * This's critical for some architectures (Power). + */ pgtable = pgtable_trans_huge_withdraw(mm, pmd); pmd_populate(mm, &_pmd, pgtable); @@ -2176,35 +2203,6 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, } smp_wmb(); /* make pte visible before pmd */ - /* - * Up to this point the pmd is present and huge and userland has the - * whole access to the hugepage during the split (which happens in - * place). If we overwrite the pmd with the not-huge version pointing - * to the pte here (which of course we could if all CPUs were bug - * free), userland could trigger a small page size TLB miss on the - * small sized TLB while the hugepage TLB entry is still established in - * the huge TLB. Some CPU doesn't like that. - * See http://support.amd.com/us/Processor_TechDocs/41322.pdf, Erratum - * 383 on page 93. Intel should be safe but is also warns that it's - * only safe if the permission and cache attributes of the two entries - * loaded in the two TLB is identical (which should be the case here). - * But it is generally safer to never allow small and huge TLB entries - * for the same virtual address to be loaded simultaneously. So instead - * of doing "pmd_populate(); flush_pmd_tlb_range();" we first mark the - * current pmd notpresent (atomically because here the pmd_trans_huge - * must remain set at all times on the pmd until the split is complete - * for this pmd), then we flush the SMP TLB and finally we write the - * non-huge version of the pmd entry with pmd_populate. - */ - old = pmdp_invalidate(vma, haddr, pmd); - - /* - * Transfer dirty bit using value returned by pmd_invalidate() to be - * sure we don't race with CPU that can set the bit under us. - */ - if (pmd_dirty(old)) - SetPageDirty(page); - pmd_populate(mm, pmd, pgtable); if (freeze) { -- cgit v1.2.3