diff options
Diffstat (limited to 'arch/powerpc/mm')
33 files changed, 538 insertions, 389 deletions
diff --git a/arch/powerpc/mm/40x_mmu.c b/arch/powerpc/mm/40x_mmu.c index 61ac468c87c6..b9cf6f8764b0 100644 --- a/arch/powerpc/mm/40x_mmu.c +++ b/arch/powerpc/mm/40x_mmu.c @@ -93,7 +93,7 @@ void __init MMU_init_hw(void) #define LARGE_PAGE_SIZE_16M (1<<24) #define LARGE_PAGE_SIZE_4M (1<<22) -unsigned long __init mmu_mapin_ram(unsigned long top) +unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) { unsigned long v, s, mapped; phys_addr_t p; diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c index ea2b9af08a48..aad127acdbaa 100644 --- a/arch/powerpc/mm/44x_mmu.c +++ b/arch/powerpc/mm/44x_mmu.c @@ -170,7 +170,7 @@ void __init MMU_init_hw(void) flush_instruction_cache(); } -unsigned long __init mmu_mapin_ram(unsigned long top) +unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) { unsigned long addr; unsigned long memstart = memstart_addr & ~(PPC_PIN_SIZE - 1); diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c index bfa503cff351..fe1f6443d57f 100644 --- a/arch/powerpc/mm/8xx_mmu.c +++ b/arch/powerpc/mm/8xx_mmu.c @@ -66,26 +66,22 @@ unsigned long p_block_mapped(phys_addr_t pa) void __init MMU_init_hw(void) { /* PIN up to the 3 first 8Mb after IMMR in DTLB table */ -#ifdef CONFIG_PIN_TLB_DATA - unsigned long ctr = mfspr(SPRN_MD_CTR) & 0xfe000000; - unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY; -#ifdef CONFIG_PIN_TLB_IMMR - int i = 29; -#else - int i = 28; -#endif - unsigned long addr = 0; - unsigned long mem = total_lowmem; - - for (; i < 32 && mem >= LARGE_PAGE_SIZE_8M; i++) { - mtspr(SPRN_MD_CTR, ctr | (i << 8)); - mtspr(SPRN_MD_EPN, (unsigned long)__va(addr) | MD_EVALID); - mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID); - mtspr(SPRN_MD_RPN, addr | flags | _PAGE_PRESENT); - addr += LARGE_PAGE_SIZE_8M; - mem -= LARGE_PAGE_SIZE_8M; + if (IS_ENABLED(CONFIG_PIN_TLB_DATA)) { + unsigned long ctr = mfspr(SPRN_MD_CTR) & 0xfe000000; + unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY; + int i = IS_ENABLED(CONFIG_PIN_TLB_IMMR) ? 29 : 28; + unsigned long addr = 0; + unsigned long mem = total_lowmem; + + for (; i < 32 && mem >= LARGE_PAGE_SIZE_8M; i++) { + mtspr(SPRN_MD_CTR, ctr | (i << 8)); + mtspr(SPRN_MD_EPN, (unsigned long)__va(addr) | MD_EVALID); + mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID); + mtspr(SPRN_MD_RPN, addr | flags | _PAGE_PRESENT); + addr += LARGE_PAGE_SIZE_8M; + mem -= LARGE_PAGE_SIZE_8M; + } } -#endif } static void __init mmu_mapin_immr(void) @@ -98,26 +94,36 @@ static void __init mmu_mapin_immr(void) map_kernel_page(v + offset, p + offset, PAGE_KERNEL_NCG); } -static void __init mmu_patch_cmp_limit(s32 *site, unsigned long mapped) +static void mmu_patch_cmp_limit(s32 *site, unsigned long mapped) { modify_instruction_site(site, 0xffff, (unsigned long)__va(mapped) >> 16); } -unsigned long __init mmu_mapin_ram(unsigned long top) +static void mmu_patch_addis(s32 *site, long simm) +{ + unsigned int instr = *(unsigned int *)patch_site_addr(site); + + instr &= 0xffff0000; + instr |= ((unsigned long)simm) >> 16; + patch_instruction_site(site, instr); +} + +unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) { unsigned long mapped; if (__map_without_ltlbs) { mapped = 0; mmu_mapin_immr(); -#ifndef CONFIG_PIN_TLB_IMMR - patch_instruction_site(&patch__dtlbmiss_immr_jmp, PPC_INST_NOP); -#endif -#ifndef CONFIG_PIN_TLB_TEXT - mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, 0); -#endif + if (!IS_ENABLED(CONFIG_PIN_TLB_IMMR)) + patch_instruction_site(&patch__dtlbmiss_immr_jmp, PPC_INST_NOP); + if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT)) + mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, 0); } else { mapped = top & ~(LARGE_PAGE_SIZE_8M - 1); + if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT)) + mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, + _ALIGN(__pa(_einittext), 8 << 20)); } mmu_patch_cmp_limit(&patch__dtlbmiss_linmem_top, mapped); @@ -138,6 +144,26 @@ unsigned long __init mmu_mapin_ram(unsigned long top) return mapped; } +void mmu_mark_initmem_nx(void) +{ + if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) && CONFIG_ETEXT_SHIFT < 23) + mmu_patch_addis(&patch__itlbmiss_linmem_top8, + -((long)_etext & ~(LARGE_PAGE_SIZE_8M - 1))); + if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT)) + mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, __pa(_etext)); +} + +#ifdef CONFIG_STRICT_KERNEL_RWX +void mmu_mark_rodata_ro(void) +{ + if (CONFIG_DATA_SHIFT < 23) + mmu_patch_addis(&patch__dtlbmiss_romem_top8, + -__pa(((unsigned long)_sinittext) & + ~(LARGE_PAGE_SIZE_8M - 1))); + mmu_patch_addis(&patch__dtlbmiss_romem_top, -__pa(_sinittext)); +} +#endif + void __init setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size) { @@ -146,8 +172,8 @@ void __init setup_initial_memory_limit(phys_addr_t first_memblock_base, */ BUG_ON(first_memblock_base != 0); - /* 8xx can only access 24MB at the moment */ - memblock_set_current_limit(min_t(u64, first_memblock_size, 0x01800000)); + /* 8xx can only access 32MB at the moment */ + memblock_set_current_limit(min_t(u64, first_memblock_size, 0x02000000)); } /* @@ -162,14 +188,11 @@ void set_context(unsigned long id, pgd_t *pgd) { s16 offset = (s16)(__pa(swapper_pg_dir)); -#ifdef CONFIG_BDI_SWITCH - pgd_t **ptr = *(pgd_t ***)(KERNELBASE + 0xf0); - /* Context switch the PTE pointer for the Abatron BDI2000. * The PGDIR is passed as second argument. */ - *(ptr + 1) = pgd; -#endif + if (IS_ENABLED(CONFIG_BDI_SWITCH)) + abatron_pteptrs[1] = pgd; /* Register M_TWB will contain base address of level 1 table minus the * lower part of the kernel PGDIR base address, so that all accesses to diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index f965fc33a8b7..d52ec118e09d 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -45,13 +45,10 @@ obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o obj-$(CONFIG_HIGHMEM) += highmem.o obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o obj-$(CONFIG_SPAPR_TCE_IOMMU) += mmu_context_iommu.o -obj-$(CONFIG_PPC_PTDUMP) += dump_linuxpagetables.o -ifdef CONFIG_PPC_PTDUMP -obj-$(CONFIG_4xx) += dump_linuxpagetables-generic.o -obj-$(CONFIG_PPC_8xx) += dump_linuxpagetables-8xx.o -obj-$(CONFIG_PPC_BOOK3E_MMU) += dump_linuxpagetables-generic.o -obj-$(CONFIG_PPC_BOOK3S_32) += dump_linuxpagetables-generic.o dump_bats.o dump_sr.o -obj-$(CONFIG_PPC_BOOK3S_64) += dump_linuxpagetables-book3s64.o -endif -obj-$(CONFIG_PPC_HTDUMP) += dump_hashpagetable.o +obj-$(CONFIG_PPC_PTDUMP) += ptdump/ obj-$(CONFIG_PPC_MEM_KEYS) += pkeys.o + +# Disable kcov instrumentation on sensitive code +# This is necessary for booting with kcov enabled on book3e machines +KCOV_INSTRUMENT_tlb_nohash.o := n +KCOV_INSTRUMENT_fsl_booke_mmu.o := n diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c index e955539686a4..b5d2658c26af 100644 --- a/arch/powerpc/mm/dma-noncoherent.c +++ b/arch/powerpc/mm/dma-noncoherent.c @@ -30,6 +30,7 @@ #include <linux/types.h> #include <linux/highmem.h> #include <linux/dma-direct.h> +#include <linux/dma-noncoherent.h> #include <linux/export.h> #include <asm/tlbflush.h> @@ -151,8 +152,8 @@ static struct ppc_vm_region *ppc_vm_region_find(struct ppc_vm_region *head, unsi * Allocate DMA-coherent memory space and return both the kernel remapped * virtual and bus address for that space. */ -void *__dma_nommu_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) +void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t gfp, unsigned long attrs) { struct page *page; struct ppc_vm_region *c; @@ -253,7 +254,7 @@ void *__dma_nommu_alloc_coherent(struct device *dev, size_t size, /* * free a page as defined by the above mapping. */ -void __dma_nommu_free_coherent(struct device *dev, size_t size, void *vaddr, +void arch_dma_free(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, unsigned long attrs) { struct ppc_vm_region *c; @@ -313,7 +314,7 @@ void __dma_nommu_free_coherent(struct device *dev, size_t size, void *vaddr, /* * make an area consistent. */ -void __dma_sync(void *vaddr, size_t size, int direction) +static void __dma_sync(void *vaddr, size_t size, int direction) { unsigned long start = (unsigned long)vaddr; unsigned long end = start + size; @@ -339,7 +340,6 @@ void __dma_sync(void *vaddr, size_t size, int direction) break; } } -EXPORT_SYMBOL(__dma_sync); #ifdef CONFIG_HIGHMEM /* @@ -386,28 +386,42 @@ static inline void __dma_sync_page_highmem(struct page *page, * __dma_sync_page makes memory consistent. identical to __dma_sync, but * takes a struct page instead of a virtual address */ -void __dma_sync_page(struct page *page, unsigned long offset, - size_t size, int direction) +static void __dma_sync_page(phys_addr_t paddr, size_t size, int dir) { + struct page *page = pfn_to_page(paddr >> PAGE_SHIFT); + unsigned offset = paddr & ~PAGE_MASK; + #ifdef CONFIG_HIGHMEM - __dma_sync_page_highmem(page, offset, size, direction); + __dma_sync_page_highmem(page, offset, size, dir); #else unsigned long start = (unsigned long)page_address(page) + offset; - __dma_sync((void *)start, size, direction); + __dma_sync((void *)start, size, dir); #endif } -EXPORT_SYMBOL(__dma_sync_page); + +void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, + size_t size, enum dma_data_direction dir) +{ + __dma_sync_page(paddr, size, dir); +} + +void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, + size_t size, enum dma_data_direction dir) +{ + __dma_sync_page(paddr, size, dir); +} /* - * Return the PFN for a given cpu virtual address returned by - * __dma_nommu_alloc_coherent. This is used by dma_mmap_coherent() + * Return the PFN for a given cpu virtual address returned by arch_dma_alloc. */ -unsigned long __dma_get_coherent_pfn(unsigned long cpu_addr) +long arch_dma_coherent_to_pfn(struct device *dev, void *vaddr, + dma_addr_t dma_addr) { /* This should always be populated, so we don't test every * level. If that fails, we'll have a nice crash which * will be as good as a BUG_ON() */ + unsigned long cpu_addr = (unsigned long)vaddr; pgd_t *pgd = pgd_offset_k(cpu_addr); pud_t *pud = pud_offset(pgd, cpu_addr); pmd_t *pmd = pmd_offset(pud, cpu_addr); diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c index 080d49b26c3a..210cbc1faf63 100644 --- a/arch/powerpc/mm/fsl_booke_mmu.c +++ b/arch/powerpc/mm/fsl_booke_mmu.c @@ -221,7 +221,7 @@ unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx, bool dryrun) #error "LOWMEM_CAM_NUM must be less than NUM_TLBCAMS" #endif -unsigned long __init mmu_mapin_ram(unsigned long top) +unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) { return tlbcam_addrs[tlbcam_index - 1].limit - PAGE_OFFSET + 1; } diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S index 1e2df3e9f9ea..1f13494efb2b 100644 --- a/arch/powerpc/mm/hash_low_32.S +++ b/arch/powerpc/mm/hash_low_32.S @@ -47,14 +47,13 @@ mmu_hash_lock: * Returns to the caller if the access is illegal or there is no * mapping for the address. Otherwise it places an appropriate PTE * in the hash table and returns from the exception. - * Uses r0, r3 - r8, r10, ctr, lr. + * Uses r0, r3 - r6, r8, r10, ctr, lr. */ .text _GLOBAL(hash_page) - tophys(r7,0) /* gets -KERNELBASE into r7 */ #ifdef CONFIG_SMP - addis r8,r7,mmu_hash_lock@h - ori r8,r8,mmu_hash_lock@l + lis r8, (mmu_hash_lock - PAGE_OFFSET)@h + ori r8, r8, (mmu_hash_lock - PAGE_OFFSET)@l lis r0,0x0fff b 10f 11: lwz r6,0(r8) @@ -70,14 +69,13 @@ _GLOBAL(hash_page) /* Get PTE (linux-style) and check access */ lis r0,KERNELBASE@h /* check if kernel address */ cmplw 0,r4,r0 - mfspr r8,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ ori r3,r3,_PAGE_USER|_PAGE_PRESENT /* test low addresses as user */ - lwz r5,PGDIR(r8) /* virt page-table root */ + mfspr r5, SPRN_SPRG_PGDIR /* virt page-table root */ blt+ 112f /* assume user more likely */ lis r5,swapper_pg_dir@ha /* if kernel address, use */ addi r5,r5,swapper_pg_dir@l /* kernel page table */ rlwimi r3,r9,32-12,29,29 /* MSR_PR -> _PAGE_USER */ -112: add r5,r5,r7 /* convert to phys addr */ +112: tophys(r5, r5) #ifndef CONFIG_PTE_64BIT rlwimi r5,r4,12,20,29 /* insert top 10 bits of address */ lwz r8,0(r5) /* get pmd entry */ @@ -144,25 +142,24 @@ retry: #ifdef CONFIG_SMP eieio - addis r8,r7,mmu_hash_lock@ha + lis r8, (mmu_hash_lock - PAGE_OFFSET)@ha li r0,0 - stw r0,mmu_hash_lock@l(r8) + stw r0, (mmu_hash_lock - PAGE_OFFSET)@l(r8) #endif /* Return from the exception */ lwz r5,_CTR(r11) mtctr r5 lwz r0,GPR0(r11) - lwz r7,GPR7(r11) lwz r8,GPR8(r11) b fast_exception_return #ifdef CONFIG_SMP hash_page_out: eieio - addis r8,r7,mmu_hash_lock@ha + lis r8, (mmu_hash_lock - PAGE_OFFSET)@ha li r0,0 - stw r0,mmu_hash_lock@l(r8) + stw r0, (mmu_hash_lock - PAGE_OFFSET)@l(r8) blr #endif /* CONFIG_SMP */ @@ -186,8 +183,7 @@ _GLOBAL(add_hash_page) add r3,r3,r0 /* note create_hpte trims to 24 bits */ #ifdef CONFIG_SMP - CURRENT_THREAD_INFO(r8, r1) /* use cpu number to make tag */ - lwz r8,TI_CPU(r8) /* to go in mmu_hash_lock */ + lwz r8,TASK_CPU(r2) /* to go in mmu_hash_lock */ oris r8,r8,12 #endif /* CONFIG_SMP */ @@ -208,11 +204,9 @@ _GLOBAL(add_hash_page) SYNC_601 isync - tophys(r7,0) - #ifdef CONFIG_SMP - addis r6,r7,mmu_hash_lock@ha - addi r6,r6,mmu_hash_lock@l + lis r6, (mmu_hash_lock - PAGE_OFFSET)@ha + addi r6, r6, (mmu_hash_lock - PAGE_OFFSET)@l 10: lwarx r0,0,r6 /* take the mmu_hash_lock */ cmpi 0,r0,0 bne- 11f @@ -257,8 +251,8 @@ _GLOBAL(add_hash_page) 9: #ifdef CONFIG_SMP - addis r6,r7,mmu_hash_lock@ha - addi r6,r6,mmu_hash_lock@l + lis r6, (mmu_hash_lock - PAGE_OFFSET)@ha + addi r6, r6, (mmu_hash_lock - PAGE_OFFSET)@l eieio li r0,0 stw r0,0(r6) /* clear mmu_hash_lock */ @@ -278,10 +272,8 @@ _GLOBAL(add_hash_page) * It is designed to be called with the MMU either on or off. * r3 contains the VSID, r4 contains the virtual address, * r5 contains the linux PTE, r6 contains the old value of the - * linux PTE (before setting _PAGE_HASHPTE) and r7 contains the - * offset to be added to addresses (0 if the MMU is on, - * -KERNELBASE if it is off). r10 contains the upper half of - * the PTE if CONFIG_PTE_64BIT. + * linux PTE (before setting _PAGE_HASHPTE). r10 contains the + * upper half of the PTE if CONFIG_PTE_64BIT. * On SMP, the caller should have the mmu_hash_lock held. * We assume that the caller has (or will) set the _PAGE_HASHPTE * bit in the linux PTE in memory. The value passed in r6 should @@ -342,7 +334,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) patch_site 1f, patch__hash_page_A1 patch_site 2f, patch__hash_page_A2 /* Get the address of the primary PTE group in the hash table (r3) */ -0: addis r0,r7,Hash_base@h /* base address of hash table */ +0: lis r0, (Hash_base - PAGE_OFFSET)@h /* base address of hash table */ 1: rlwimi r0,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* VSID -> hash */ 2: rlwinm r3,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */ xor r3,r3,r0 /* make primary hash */ @@ -356,10 +348,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) beq+ 10f /* no PTE: go look for an empty slot */ tlbie r4 - addis r4,r7,htab_hash_searches@ha - lwz r6,htab_hash_searches@l(r4) + lis r4, (htab_hash_searches - PAGE_OFFSET)@ha + lwz r6, (htab_hash_searches - PAGE_OFFSET)@l(r4) addi r6,r6,1 /* count how many searches we do */ - stw r6,htab_hash_searches@l(r4) + stw r6, (htab_hash_searches - PAGE_OFFSET)@l(r4) /* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */ mtctr r0 @@ -391,10 +383,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) beq+ found_empty /* update counter of times that the primary PTEG is full */ - addis r4,r7,primary_pteg_full@ha - lwz r6,primary_pteg_full@l(r4) + lis r4, (primary_pteg_full - PAGE_OFFSET)@ha + lwz r6, (primary_pteg_full - PAGE_OFFSET)@l(r4) addi r6,r6,1 - stw r6,primary_pteg_full@l(r4) + stw r6, (primary_pteg_full - PAGE_OFFSET)@l(r4) patch_site 0f, patch__hash_page_C /* Search the secondary PTEG for an empty slot */ @@ -428,8 +420,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) * lockup here but that shouldn't happen */ -1: addis r4,r7,next_slot@ha /* get next evict slot */ - lwz r6,next_slot@l(r4) +1: lis r4, (next_slot - PAGE_OFFSET)@ha /* get next evict slot */ + lwz r6, (next_slot - PAGE_OFFSET)@l(r4) addi r6,r6,HPTE_SIZE /* search for candidate */ andi. r6,r6,7*HPTE_SIZE stw r6,next_slot@l(r4) @@ -501,8 +493,6 @@ htab_hash_searches: * We assume that there is a hash table in use (Hash != 0). */ _GLOBAL(flush_hash_pages) - tophys(r7,0) - /* * We disable interrupts here, even on UP, because we want * the _PAGE_HASHPTE bit to be a reliable indication of @@ -547,11 +537,9 @@ _GLOBAL(flush_hash_pages) SET_V(r11) /* set V (valid) bit */ #ifdef CONFIG_SMP - addis r9,r7,mmu_hash_lock@ha - addi r9,r9,mmu_hash_lock@l - CURRENT_THREAD_INFO(r8, r1) - add r8,r8,r7 - lwz r8,TI_CPU(r8) + lis r9, (mmu_hash_lock - PAGE_OFFSET)@ha + addi r9, r9, (mmu_hash_lock - PAGE_OFFSET)@l + lwz r8,TASK_CPU(r2) oris r8,r8,9 10: lwarx r0,0,r9 cmpi 0,r0,0 @@ -584,7 +572,7 @@ _GLOBAL(flush_hash_pages) patch_site 1f, patch__flush_hash_A1 patch_site 2f, patch__flush_hash_A2 /* Get the address of the primary PTE group in the hash table (r3) */ -0: addis r8,r7,Hash_base@h /* base address of hash table */ +0: lis r8, (Hash_base - PAGE_OFFSET)@h /* base address of hash table */ 1: rlwimi r8,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* VSID -> hash */ 2: rlwinm r0,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */ xor r8,r0,r8 /* make primary hash */ @@ -646,8 +634,7 @@ EXPORT_SYMBOL(flush_hash_pages) */ _GLOBAL(_tlbie) #ifdef CONFIG_SMP - CURRENT_THREAD_INFO(r8, r1) - lwz r8,TI_CPU(r8) + lwz r8,TASK_CPU(r2) oris r8,r8,11 mfmsr r10 SYNC @@ -684,8 +671,7 @@ _GLOBAL(_tlbie) */ _GLOBAL(_tlbia) #if defined(CONFIG_SMP) - CURRENT_THREAD_INFO(r8, r1) - lwz r8,TI_CPU(r8) + lwz r8,TASK_CPU(r2) oris r8,r8,10 mfmsr r10 SYNC diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 0cc7fbc3bd1c..3d4b2399192f 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -908,9 +908,9 @@ static void __init htab_initialize(void) #ifdef CONFIG_DEBUG_PAGEALLOC if (debug_pagealloc_enabled()) { linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT; - linear_map_hash_slots = __va(memblock_alloc_base( - linear_map_hash_count, 1, ppc64_rma_size)); - memset(linear_map_hash_slots, 0, linear_map_hash_count); + linear_map_hash_slots = memblock_alloc_try_nid( + linear_map_hash_count, 1, MEMBLOCK_LOW_LIMIT, + ppc64_rma_size, NUMA_NO_NODE); } #endif /* CONFIG_DEBUG_PAGEALLOC */ @@ -1889,12 +1889,12 @@ static int hpt_order_set(void *data, u64 val) return mmu_hash_ops.resize_hpt(val); } -DEFINE_SIMPLE_ATTRIBUTE(fops_hpt_order, hpt_order_get, hpt_order_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_hpt_order, hpt_order_get, hpt_order_set, "%llu\n"); static int __init hash64_debugfs(void) { - if (!debugfs_create_file("hpt_order", 0600, powerpc_debugfs_root, - NULL, &fops_hpt_order)) { + if (!debugfs_create_file_unsafe("hpt_order", 0600, powerpc_debugfs_root, + NULL, &fops_hpt_order)) { pr_err("lpar: unable to create hpt_order debugsfs file\n"); } diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c index 2e6a8f9345d3..b0d9209d9a86 100644 --- a/arch/powerpc/mm/hugetlbpage-hash64.c +++ b/arch/powerpc/mm/hugetlbpage-hash64.c @@ -26,7 +26,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, real_pte_t rpte; unsigned long vpn; unsigned long old_pte, new_pte; - unsigned long rflags, pa, sz; + unsigned long rflags, pa; long slot, offset; BUG_ON(shift != mmu_psize_defs[mmu_psize].shift); @@ -73,7 +73,6 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, offset = PTRS_PER_PMD; rpte = __real_pte(__pte(old_pte), ptep, offset); - sz = ((1UL) << shift); if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) /* No CPU has hugepages but lacks no execute, so we * don't need to worry about that case */ @@ -121,3 +120,28 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, *ptep = __pte(new_pte & ~H_PAGE_BUSY); return 0; } + +pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ + unsigned long pte_val; + /* + * Clear the _PAGE_PRESENT so that no hardware parallel update is + * possible. Also keep the pte_present true so that we don't take + * wrong fault. + */ + pte_val = pte_update(vma->vm_mm, addr, ptep, + _PAGE_PRESENT, _PAGE_INVALID, 1); + + return __pte(pte_val); +} + +void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, + pte_t *ptep, pte_t old_pte, pte_t pte) +{ + + if (radix_enabled()) + return radix__huge_ptep_modify_prot_commit(vma, addr, ptep, + old_pte, pte); + set_huge_pte_at(vma->vm_mm, addr, ptep, pte); +} diff --git a/arch/powerpc/mm/hugetlbpage-radix.c b/arch/powerpc/mm/hugetlbpage-radix.c index 2486bee0f93e..cab06331c0c0 100644 --- a/arch/powerpc/mm/hugetlbpage-radix.c +++ b/arch/powerpc/mm/hugetlbpage-radix.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/mm.h> #include <linux/hugetlb.h> +#include <linux/security.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/cacheflush.h> @@ -73,7 +74,7 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr, if (addr) { addr = ALIGN(addr, huge_page_size(h)); vma = find_vma(mm, addr); - if (high_limit - len >= addr && + if (high_limit - len >= addr && addr >= mmap_min_addr && (!vma || addr + len <= vm_start_gap(vma))) return addr; } @@ -83,10 +84,27 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr, */ info.flags = VM_UNMAPPED_AREA_TOPDOWN; info.length = len; - info.low_limit = PAGE_SIZE; + info.low_limit = max(PAGE_SIZE, mmap_min_addr); info.high_limit = mm->mmap_base + (high_limit - DEFAULT_MAP_WINDOW); info.align_mask = PAGE_MASK & ~huge_page_mask(h); info.align_offset = 0; return vm_unmapped_area(&info); } + +void radix__huge_ptep_modify_prot_commit(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t old_pte, pte_t pte) +{ + struct mm_struct *mm = vma->vm_mm; + + /* + * To avoid NMMU hang while relaxing access we need to flush the tlb before + * we set the new value. + */ + if (is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) && + (atomic_read(&mm->context.copros) > 0)) + radix__flush_hugetlb_page(vma, addr); + + set_huge_pte_at(vma->vm_mm, addr, ptep, pte); +} diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 3e59e5d64b01..41a3513cadc9 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -108,12 +108,8 @@ static void __init MMU_setup(void) __map_without_bats = 1; __map_without_ltlbs = 1; } -#ifdef CONFIG_STRICT_KERNEL_RWX - if (rodata_enabled) { - __map_without_bats = 1; + if (strict_kernel_rwx_enabled() && !IS_ENABLED(CONFIG_PPC_8xx)) __map_without_ltlbs = 1; - } -#endif } /* diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index a5091c034747..a4c155af1597 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -274,7 +274,6 @@ void __ref vmemmap_free(unsigned long start, unsigned long end, for (; start < end; start += page_size) { unsigned long nr_pages, addr; - struct page *section_base; struct page *page; /* @@ -290,7 +289,6 @@ void __ref vmemmap_free(unsigned long start, unsigned long end, continue; page = pfn_to_page(addr >> PAGE_SHIFT); - section_base = pfn_to_page(vmemmap_section_start(start)); nr_pages = 1 << page_order; base_pfn = PHYS_PFN(addr); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 33cc6f676fa6..f6787f90e158 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -69,22 +69,14 @@ pte_t *kmap_pte; EXPORT_SYMBOL(kmap_pte); pgprot_t kmap_prot; EXPORT_SYMBOL(kmap_prot); -#define TOP_ZONE ZONE_HIGHMEM static inline pte_t *virt_to_kpte(unsigned long vaddr) { return pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), vaddr), vaddr), vaddr); } -#else -#define TOP_ZONE ZONE_NORMAL #endif -int page_is_ram(unsigned long pfn) -{ - return memblock_is_memory(__pfn_to_phys(pfn)); -} - pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { @@ -176,34 +168,6 @@ int __meminit arch_remove_memory(int nid, u64 start, u64 size, #endif #endif /* CONFIG_MEMORY_HOTPLUG */ -/* - * walk_memory_resource() needs to make sure there is no holes in a given - * memory range. PPC64 does not maintain the memory layout in /proc/iomem. - * Instead it maintains it in memblock.memory structures. Walk through the - * memory regions, find holes and callback for contiguous regions. - */ -int -walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, - void *arg, int (*func)(unsigned long, unsigned long, void *)) -{ - struct memblock_region *reg; - unsigned long end_pfn = start_pfn + nr_pages; - unsigned long tstart, tend; - int ret = -1; - - for_each_memblock(memory, reg) { - tstart = max(start_pfn, memblock_region_memory_base_pfn(reg)); - tend = min(end_pfn, memblock_region_memory_end_pfn(reg)); - if (tstart >= tend) - continue; - ret = (*func)(tstart, tend - tstart, arg); - if (ret) - break; - } - return ret; -} -EXPORT_SYMBOL_GPL(walk_system_ram_range); - #ifndef CONFIG_NEED_MULTIPLE_NODES void __init mem_topology_setup(void) { @@ -262,25 +226,6 @@ static int __init mark_nonram_nosave(void) static unsigned long max_zone_pfns[MAX_NR_ZONES]; /* - * Find the least restrictive zone that is entirely below the - * specified pfn limit. Returns < 0 if no suitable zone is found. - * - * pfn_limit must be u64 because it can exceed 32 bits even on 32-bit - * systems -- the DMA limit can be higher than any possible real pfn. - */ -int dma_pfn_limit_to_zone(u64 pfn_limit) -{ - int i; - - for (i = TOP_ZONE; i >= 0; i--) { - if (max_zone_pfns[i] <= pfn_limit) - return i; - } - - return -EPERM; -} - -/* * paging_init() sets up the page tables - in fact we've already done this. */ void __init paging_init(void) @@ -585,3 +530,9 @@ int devmem_is_allowed(unsigned long pfn) return 0; } #endif /* CONFIG_STRICT_DEVMEM */ + +/* + * This is defined in kernel/resource.c but only powerpc needs to export it, for + * the EHEA driver. Drop this when drivers/net/ethernet/ibm/ehea is removed. + */ +EXPORT_SYMBOL_GPL(walk_system_ram_range); diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c index a712a650a8b6..e7a9c4f6bfca 100644 --- a/arch/powerpc/mm/mmu_context_iommu.c +++ b/arch/powerpc/mm/mmu_context_iommu.c @@ -21,6 +21,7 @@ #include <linux/sizes.h> #include <asm/mmu_context.h> #include <asm/pte-walk.h> +#include <linux/mm_inline.h> static DEFINE_MUTEX(mem_list_mutex); @@ -34,8 +35,18 @@ struct mm_iommu_table_group_mem_t { atomic64_t mapped; unsigned int pageshift; u64 ua; /* userspace address */ - u64 entries; /* number of entries in hpas[] */ - u64 *hpas; /* vmalloc'ed */ + u64 entries; /* number of entries in hpas/hpages[] */ + /* + * in mm_iommu_get we temporarily use this to store + * struct page address. + * + * We need to convert ua to hpa in real mode. Make it + * simpler by storing physical address. + */ + union { + struct page **hpages; /* vmalloc'ed */ + phys_addr_t *hpas; + }; #define MM_IOMMU_TABLE_INVALID_HPA ((uint64_t)-1) u64 dev_hpa; /* Device memory base address */ }; @@ -80,64 +91,13 @@ bool mm_iommu_preregistered(struct mm_struct *mm) } EXPORT_SYMBOL_GPL(mm_iommu_preregistered); -/* - * Taken from alloc_migrate_target with changes to remove CMA allocations - */ -struct page *new_iommu_non_cma_page(struct page *page, unsigned long private) -{ - gfp_t gfp_mask = GFP_USER; - struct page *new_page; - - if (PageCompound(page)) - return NULL; - - if (PageHighMem(page)) - gfp_mask |= __GFP_HIGHMEM; - - /* - * We don't want the allocation to force an OOM if possibe - */ - new_page = alloc_page(gfp_mask | __GFP_NORETRY | __GFP_NOWARN); - return new_page; -} - -static int mm_iommu_move_page_from_cma(struct page *page) -{ - int ret = 0; - LIST_HEAD(cma_migrate_pages); - - /* Ignore huge pages for now */ - if (PageCompound(page)) - return -EBUSY; - - lru_add_drain(); - ret = isolate_lru_page(page); - if (ret) - return ret; - - list_add(&page->lru, &cma_migrate_pages); - put_page(page); /* Drop the gup reference */ - - ret = migrate_pages(&cma_migrate_pages, new_iommu_non_cma_page, - NULL, 0, MIGRATE_SYNC, MR_CONTIG_RANGE); - if (ret) { - if (!list_empty(&cma_migrate_pages)) - putback_movable_pages(&cma_migrate_pages); - } - - return 0; -} - static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, - unsigned long entries, unsigned long dev_hpa, - struct mm_iommu_table_group_mem_t **pmem) + unsigned long entries, unsigned long dev_hpa, + struct mm_iommu_table_group_mem_t **pmem) { struct mm_iommu_table_group_mem_t *mem; - long i, j, ret = 0, locked_entries = 0; + long i, ret, locked_entries = 0; unsigned int pageshift; - unsigned long flags; - unsigned long cur_ua; - struct page *page = NULL; mutex_lock(&mem_list_mutex); @@ -187,62 +147,43 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, goto unlock_exit; } + down_read(&mm->mmap_sem); + ret = get_user_pages_longterm(ua, entries, FOLL_WRITE, mem->hpages, NULL); + up_read(&mm->mmap_sem); + if (ret != entries) { + /* free the reference taken */ + for (i = 0; i < ret; i++) + put_page(mem->hpages[i]); + + vfree(mem->hpas); + kfree(mem); + ret = -EFAULT; + goto unlock_exit; + } + + pageshift = PAGE_SHIFT; for (i = 0; i < entries; ++i) { - cur_ua = ua + (i << PAGE_SHIFT); - if (1 != get_user_pages_fast(cur_ua, - 1/* pages */, 1/* iswrite */, &page)) { - ret = -EFAULT; - for (j = 0; j < i; ++j) - put_page(pfn_to_page(mem->hpas[j] >> - PAGE_SHIFT)); - vfree(mem->hpas); - kfree(mem); - goto unlock_exit; - } + struct page *page = mem->hpages[i]; + /* - * If we get a page from the CMA zone, since we are going to - * be pinning these entries, we might as well move them out - * of the CMA zone if possible. NOTE: faulting in + migration - * can be expensive. Batching can be considered later + * Allow to use larger than 64k IOMMU pages. Only do that + * if we are backed by hugetlb. */ - if (is_migrate_cma_page(page)) { - if (mm_iommu_move_page_from_cma(page)) - goto populate; - if (1 != get_user_pages_fast(cur_ua, - 1/* pages */, 1/* iswrite */, - &page)) { - ret = -EFAULT; - for (j = 0; j < i; ++j) - put_page(pfn_to_page(mem->hpas[j] >> - PAGE_SHIFT)); - vfree(mem->hpas); - kfree(mem); - goto unlock_exit; - } - } -populate: - pageshift = PAGE_SHIFT; - if (mem->pageshift > PAGE_SHIFT && PageCompound(page)) { - pte_t *pte; + if ((mem->pageshift > PAGE_SHIFT) && PageHuge(page)) { struct page *head = compound_head(page); - unsigned int compshift = compound_order(head); - unsigned int pteshift; - - local_irq_save(flags); /* disables as well */ - pte = find_linux_pte(mm->pgd, cur_ua, NULL, &pteshift); - - /* Double check it is still the same pinned page */ - if (pte && pte_page(*pte) == head && - pteshift == compshift + PAGE_SHIFT) - pageshift = max_t(unsigned int, pteshift, - PAGE_SHIFT); - local_irq_restore(flags); + + pageshift = compound_order(head) + PAGE_SHIFT; } mem->pageshift = min(mem->pageshift, pageshift); + /* + * We don't need struct page reference any more, switch + * to physical address. + */ mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT; } good_exit: + ret = 0; atomic64_set(&mem->mapped, 1); mem->used = 1; mem->ua = ua; diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index c4a717da65eb..74ff61dabcb1 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -130,7 +130,7 @@ extern void wii_memory_fixups(void); */ #ifdef CONFIG_PPC32 extern void MMU_init_hw(void); -extern unsigned long mmu_mapin_ram(unsigned long top); +unsigned long mmu_mapin_ram(unsigned long base, unsigned long top); #endif #ifdef CONFIG_PPC_FSL_BOOK3E @@ -165,3 +165,11 @@ unsigned long p_block_mapped(phys_addr_t pa); static inline phys_addr_t v_block_mapped(unsigned long va) { return 0; } static inline unsigned long p_block_mapped(phys_addr_t pa) { return 0; } #endif + +#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_PPC_8xx) +void mmu_mark_initmem_nx(void); +void mmu_mark_rodata_ro(void); +#else +static inline void mmu_mark_initmem_nx(void) { } +static inline void mmu_mark_rodata_ro(void) { } +#endif diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 87f0dd004295..ac49e4158e50 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -84,7 +84,7 @@ static void __init setup_node_to_cpumask_map(void) alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]); /* cpumask_of_node() will now work */ - dbg("Node to cpumask map for %d nodes\n", nr_node_ids); + dbg("Node to cpumask map for %u nodes\n", nr_node_ids); } static int __init fake_numa_create_new_node(unsigned long end_pfn, @@ -215,7 +215,7 @@ static void initialize_distance_lookup_table(int nid, */ static int associativity_to_nid(const __be32 *associativity) { - int nid = -1; + int nid = NUMA_NO_NODE; if (min_common_depth == -1) goto out; @@ -225,7 +225,7 @@ static int associativity_to_nid(const __be32 *associativity) /* POWER4 LPAR uses 0xffff as invalid node */ if (nid == 0xffff || nid >= MAX_NUMNODES) - nid = -1; + nid = NUMA_NO_NODE; if (nid > 0 && of_read_number(associativity, 1) >= distance_ref_points_depth) { @@ -244,7 +244,7 @@ out: */ static int of_node_to_nid_single(struct device_node *device) { - int nid = -1; + int nid = NUMA_NO_NODE; const __be32 *tmp; tmp = of_get_associativity(device); @@ -256,7 +256,7 @@ static int of_node_to_nid_single(struct device_node *device) /* Walk the device tree upwards, looking for an associativity id */ int of_node_to_nid(struct device_node *device) { - int nid = -1; + int nid = NUMA_NO_NODE; of_node_get(device); while (device) { @@ -454,7 +454,7 @@ static int of_drconf_to_nid_single(struct drmem_lmb *lmb) */ static int numa_setup_cpu(unsigned long lcpu) { - int nid = -1; + int nid = NUMA_NO_NODE; struct device_node *cpu; /* @@ -930,7 +930,7 @@ static int hot_add_drconf_scn_to_nid(unsigned long scn_addr) { struct drmem_lmb *lmb; unsigned long lmb_size; - int nid = -1; + int nid = NUMA_NO_NODE; lmb_size = drmem_lmb_size(); @@ -960,7 +960,7 @@ static int hot_add_drconf_scn_to_nid(unsigned long scn_addr) static int hot_add_node_scn_to_nid(unsigned long scn_addr) { struct device_node *memory; - int nid = -1; + int nid = NUMA_NO_NODE; for_each_node_by_type(memory, "memory") { unsigned long start, size; @@ -1460,13 +1460,6 @@ static void reset_topology_timer(void) #ifdef CONFIG_SMP -static void stage_topology_update(int core_id) -{ - cpumask_or(&cpu_associativity_changes_mask, - &cpu_associativity_changes_mask, cpu_sibling_mask(core_id)); - reset_topology_timer(); -} - static int dt_update_callback(struct notifier_block *nb, unsigned long action, void *data) { @@ -1479,7 +1472,7 @@ static int dt_update_callback(struct notifier_block *nb, !of_prop_cmp(update->prop->name, "ibm,associativity")) { u32 core_id; of_property_read_u32(update->dn, "reg", &core_id); - stage_topology_update(core_id); + rc = dlpar_cpu_readd(core_id); rc = NOTIFY_OK; } break; diff --git a/arch/powerpc/mm/pgtable-book3e.c b/arch/powerpc/mm/pgtable-book3e.c index e0ccf36714b2..53cbc7dc2df2 100644 --- a/arch/powerpc/mm/pgtable-book3e.c +++ b/arch/powerpc/mm/pgtable-book3e.c @@ -57,12 +57,8 @@ void vmemmap_remove_mapping(unsigned long start, static __ref void *early_alloc_pgtable(unsigned long size) { - void *pt; - - pt = __va(memblock_alloc_base(size, size, __pa(MAX_DMA_ADDRESS))); - memset(pt, 0, size); - - return pt; + return memblock_alloc_try_nid(size, size, MEMBLOCK_LOW_LIMIT, + __pa(MAX_DMA_ADDRESS), NUMA_NO_NODE); } /* diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c index f3c31f5e1026..92a3e4c39540 100644 --- a/arch/powerpc/mm/pgtable-book3s64.c +++ b/arch/powerpc/mm/pgtable-book3s64.c @@ -195,11 +195,8 @@ void __init mmu_partition_table_init(void) unsigned long ptcr; BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 36), "Partition table size too large."); - partition_tb = __va(memblock_alloc_base(patb_size, patb_size, - MEMBLOCK_ALLOC_ANYWHERE)); - /* Initialize the Partition Table with no entries */ - memset((void *)partition_tb, 0, patb_size); + partition_tb = memblock_alloc(patb_size, patb_size); /* * update partition table control register, @@ -400,3 +397,50 @@ void arch_report_meminfo(struct seq_file *m) atomic_long_read(&direct_pages_count[MMU_PAGE_1G]) << 20); } #endif /* CONFIG_PROC_FS */ + +pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, + pte_t *ptep) +{ + unsigned long pte_val; + + /* + * Clear the _PAGE_PRESENT so that no hardware parallel update is + * possible. Also keep the pte_present true so that we don't take + * wrong fault. + */ + pte_val = pte_update(vma->vm_mm, addr, ptep, _PAGE_PRESENT, _PAGE_INVALID, 0); + + return __pte(pte_val); + +} + +void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, + pte_t *ptep, pte_t old_pte, pte_t pte) +{ + if (radix_enabled()) + return radix__ptep_modify_prot_commit(vma, addr, + ptep, old_pte, pte); + set_pte_at(vma->vm_mm, addr, ptep, pte); +} + +/* + * For hash translation mode, we use the deposited table to store hash slot + * information and they are stored at PTRS_PER_PMD offset from related pmd + * location. Hence a pmd move requires deposit and withdraw. + * + * For radix translation with split pmd ptl, we store the deposited table in the + * pmd page. Hence if we have different pmd page we need to withdraw during pmd + * move. + * + * With hash we use deposited table always irrespective of anon or not. + * With radix we use deposited table only for anonymous mapping. + */ +int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, + struct spinlock *old_pmd_ptl, + struct vm_area_struct *vma) +{ + if (radix_enabled()) + return (new_pmd_ptl != old_pmd_ptl) && vma_is_anonymous(vma); + + return true; +} diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 931156069a81..e377684ac6ad 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -51,26 +51,15 @@ static int native_register_process_table(unsigned long base, unsigned long pg_sz static __ref void *early_alloc_pgtable(unsigned long size, int nid, unsigned long region_start, unsigned long region_end) { - unsigned long pa = 0; - void *pt; + phys_addr_t min_addr = MEMBLOCK_LOW_LIMIT; + phys_addr_t max_addr = MEMBLOCK_ALLOC_ANYWHERE; - if (region_start || region_end) /* has region hint */ - pa = memblock_alloc_range(size, size, region_start, region_end, - MEMBLOCK_NONE); - else if (nid != -1) /* has node hint */ - pa = memblock_alloc_base_nid(size, size, - MEMBLOCK_ALLOC_ANYWHERE, - nid, MEMBLOCK_NONE); + if (region_start) + min_addr = region_start; + if (region_end) + max_addr = region_end; - if (!pa) - pa = memblock_alloc_base(size, size, MEMBLOCK_ALLOC_ANYWHERE); - - BUG_ON(!pa); - - pt = __va(pa); - memset(pt, 0, size); - - return pt; + return memblock_alloc_try_nid(size, size, min_addr, max_addr, nid); } static int early_map_kernel_page(unsigned long ea, unsigned long pa, @@ -1063,3 +1052,21 @@ void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep, } /* See ptesync comment in radix__set_pte_at */ } + +void radix__ptep_modify_prot_commit(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t old_pte, pte_t pte) +{ + struct mm_struct *mm = vma->vm_mm; + + /* + * To avoid NMMU hang while relaxing access we need to flush the tlb before + * we set the new value. We need to do this only for radix, because hash + * translation does flush when updating the linux pte. + */ + if (is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) && + (atomic_read(&mm->context.copros) > 0)) + radix__flush_tlb_page(vma, addr); + + set_pte_at(mm, addr, ptep, pte); +} diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index ded71126ce4c..6e56a6240bfa 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -254,26 +254,20 @@ static void __init __mapin_ram_chunk(unsigned long offset, unsigned long top) void __init mapin_ram(void) { - unsigned long s, top; - -#ifndef CONFIG_WII - top = total_lowmem; - s = mmu_mapin_ram(top); - __mapin_ram_chunk(s, top); -#else - if (!wii_hole_size) { - s = mmu_mapin_ram(total_lowmem); - __mapin_ram_chunk(s, total_lowmem); - } else { - top = wii_hole_start; - s = mmu_mapin_ram(top); - __mapin_ram_chunk(s, top); - - top = memblock_end_of_DRAM(); - s = wii_mmu_mapin_mem2(top); - __mapin_ram_chunk(s, top); + struct memblock_region *reg; + + for_each_memblock(memory, reg) { + phys_addr_t base = reg->base; + phys_addr_t top = min(base + reg->size, total_lowmem); + + if (base >= top) + continue; + base = mmu_mapin_ram(base, top); + if (IS_ENABLED(CONFIG_BDI_SWITCH)) + __mapin_ram_chunk(reg->base, top); + else + __mapin_ram_chunk(base, top); } -#endif } /* Scan the real Linux page tables and return a PTE pointer for @@ -359,7 +353,10 @@ void mark_initmem_nx(void) unsigned long numpages = PFN_UP((unsigned long)_einittext) - PFN_DOWN((unsigned long)_sinittext); - change_page_attr(page, numpages, PAGE_KERNEL); + if (v_block_mapped((unsigned long)_stext) + 1) + mmu_mark_initmem_nx(); + else + change_page_attr(page, numpages, PAGE_KERNEL); } #ifdef CONFIG_STRICT_KERNEL_RWX @@ -368,6 +365,11 @@ void mark_rodata_ro(void) struct page *page; unsigned long numpages; + if (v_block_mapped((unsigned long)_sinittext)) { + mmu_mark_rodata_ro(); + return; + } + page = virt_to_page(_stext); numpages = PFN_UP((unsigned long)_etext) - PFN_DOWN((unsigned long)_stext); diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index 3f4193201ee7..6c8a60b1e31d 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c @@ -32,6 +32,7 @@ #include <asm/mmu.h> #include <asm/machdep.h> #include <asm/code-patching.h> +#include <asm/sections.h> #include "mmu_decl.h" @@ -73,45 +74,171 @@ unsigned long p_block_mapped(phys_addr_t pa) return 0; } -unsigned long __init mmu_mapin_ram(unsigned long top) +static int find_free_bat(void) { - unsigned long tot, bl, done; - unsigned long max_size = (256<<20); + int b; + + if (cpu_has_feature(CPU_FTR_601)) { + for (b = 0; b < 4; b++) { + struct ppc_bat *bat = BATS[b]; + + if (!(bat[0].batl & 0x40)) + return b; + } + } else { + int n = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4; + + for (b = 0; b < n; b++) { + struct ppc_bat *bat = BATS[b]; + + if (!(bat[1].batu & 3)) + return b; + } + } + return -1; +} + +static unsigned int block_size(unsigned long base, unsigned long top) +{ + unsigned int max_size = (cpu_has_feature(CPU_FTR_601) ? 8 : 256) << 20; + unsigned int base_shift = (fls(base) - 1) & 31; + unsigned int block_shift = (fls(top - base) - 1) & 31; + + return min3(max_size, 1U << base_shift, 1U << block_shift); +} + +/* + * Set up one of the IBAT (block address translation) register pairs. + * The parameters are not checked; in particular size must be a power + * of 2 between 128k and 256M. + * Only for 603+ ... + */ +static void setibat(int index, unsigned long virt, phys_addr_t phys, + unsigned int size, pgprot_t prot) +{ + unsigned int bl = (size >> 17) - 1; + int wimgxpp; + struct ppc_bat *bat = BATS[index]; + unsigned long flags = pgprot_val(prot); + + if (!cpu_has_feature(CPU_FTR_NEED_COHERENT)) + flags &= ~_PAGE_COHERENT; + + wimgxpp = (flags & _PAGE_COHERENT) | (_PAGE_EXEC ? BPP_RX : BPP_XX); + bat[0].batu = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */ + bat[0].batl = BAT_PHYS_ADDR(phys) | wimgxpp; + if (flags & _PAGE_USER) + bat[0].batu |= 1; /* Vp = 1 */ +} + +static void clearibat(int index) +{ + struct ppc_bat *bat = BATS[index]; + + bat[0].batu = 0; + bat[0].batl = 0; +} + +static unsigned long __init __mmu_mapin_ram(unsigned long base, unsigned long top) +{ + int idx; + + while ((idx = find_free_bat()) != -1 && base != top) { + unsigned int size = block_size(base, top); + + if (size < 128 << 10) + break; + setbat(idx, PAGE_OFFSET + base, base, size, PAGE_KERNEL_X); + base += size; + } + + return base; +} + +unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) +{ + int done; + unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET; if (__map_without_bats) { - printk(KERN_DEBUG "RAM mapped without BATs\n"); - return 0; + pr_debug("RAM mapped without BATs\n"); + return base; + } + + if (!strict_kernel_rwx_enabled() || base >= border || top <= border) + return __mmu_mapin_ram(base, top); + + done = __mmu_mapin_ram(base, border); + if (done != border - base) + return done; + + return done + __mmu_mapin_ram(border, top); +} + +void mmu_mark_initmem_nx(void) +{ + int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4; + int i; + unsigned long base = (unsigned long)_stext - PAGE_OFFSET; + unsigned long top = (unsigned long)_etext - PAGE_OFFSET; + unsigned long size; + + if (cpu_has_feature(CPU_FTR_601)) + return; + + for (i = 0; i < nb - 1 && base < top && top - base > (128 << 10);) { + size = block_size(base, top); + setibat(i++, PAGE_OFFSET + base, base, size, PAGE_KERNEL_TEXT); + base += size; } + if (base < top) { + size = block_size(base, top); + size = max(size, 128UL << 10); + if ((top - base) > size) { + if (strict_kernel_rwx_enabled()) + pr_warn("Kernel _etext not properly aligned\n"); + size <<= 1; + } + setibat(i++, PAGE_OFFSET + base, base, size, PAGE_KERNEL_TEXT); + base += size; + } + for (; i < nb; i++) + clearibat(i); - /* Set up BAT2 and if necessary BAT3 to cover RAM. */ + update_bats(); - /* Make sure we don't map a block larger than the - smallest alignment of the physical address. */ - tot = top; - for (bl = 128<<10; bl < max_size; bl <<= 1) { - if (bl * 2 > tot) + for (i = TASK_SIZE >> 28; i < 16; i++) { + /* Do not set NX on VM space for modules */ + if (IS_ENABLED(CONFIG_MODULES) && + (VMALLOC_START & 0xf0000000) == i << 28) break; + mtsrin(mfsrin(i << 28) | 0x10000000, i << 28); } +} + +void mmu_mark_rodata_ro(void) +{ + int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4; + int i; + + if (cpu_has_feature(CPU_FTR_601)) + return; + + for (i = 0; i < nb; i++) { + struct ppc_bat *bat = BATS[i]; - setbat(2, PAGE_OFFSET, 0, bl, PAGE_KERNEL_X); - done = (unsigned long)bat_addrs[2].limit - PAGE_OFFSET + 1; - if ((done < tot) && !bat_addrs[3].limit) { - /* use BAT3 to cover a bit more */ - tot -= done; - for (bl = 128<<10; bl < max_size; bl <<= 1) - if (bl * 2 > tot) - break; - setbat(3, PAGE_OFFSET+done, done, bl, PAGE_KERNEL_X); - done = (unsigned long)bat_addrs[3].limit - PAGE_OFFSET + 1; + if (bat_addrs[i].start < (unsigned long)__init_begin) + bat[1].batl = (bat[1].batl & ~BPP_RW) | BPP_RX; } - return done; + update_bats(); } /* * Set up one of the I/D BAT (block address translation) register pairs. * The parameters are not checked; in particular size must be a power * of 2 between 128k and 256M. + * On 603+, only set IBAT when _PAGE_EXEC is set */ void __init setbat(int index, unsigned long virt, phys_addr_t phys, unsigned int size, pgprot_t prot) @@ -138,11 +265,12 @@ void __init setbat(int index, unsigned long virt, phys_addr_t phys, bat[1].batu |= 1; /* Vp = 1 */ if (flags & _PAGE_GUARDED) { /* G bit must be zero in IBATs */ - bat[0].batu = bat[0].batl = 0; - } else { - /* make IBAT same as DBAT */ - bat[0] = bat[1]; + flags &= ~_PAGE_EXEC; } + if (flags & _PAGE_EXEC) + bat[0] = bat[1]; + else + bat[0].batu = bat[0].batl = 0; } else { /* 601 cpu */ if (bl > BL_8M) @@ -211,8 +339,7 @@ void __init MMU_init_hw(void) * Find some memory for the hash table. */ if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322); - Hash = __va(memblock_phys_alloc(Hash_size, Hash_size)); - memset(Hash, 0, Hash_size); + Hash = memblock_alloc(Hash_size, Hash_size); _SDR1 = __pa(Hash) | SDR1_LOW_BITS; Hash_end = (struct hash_pte *) ((unsigned long)Hash + Hash_size); @@ -231,7 +358,8 @@ void __init MMU_init_hw(void) if (lg_n_hpteg > 16) mb2 = 16 - LG_HPTEG_SIZE; - modify_instruction_site(&patch__hash_page_A0, 0xffff, (unsigned int)Hash >> 16); + modify_instruction_site(&patch__hash_page_A0, 0xffff, + ((unsigned int)Hash - PAGE_OFFSET) >> 16); modify_instruction_site(&patch__hash_page_A1, 0x7c0, mb << 6); modify_instruction_site(&patch__hash_page_A2, 0x7c0, mb2 << 6); modify_instruction_site(&patch__hash_page_B, 0xffff, hmask); @@ -240,7 +368,8 @@ void __init MMU_init_hw(void) /* * Patch up the instructions in hashtable.S:flush_hash_page */ - modify_instruction_site(&patch__flush_hash_A0, 0xffff, (unsigned int)Hash >> 16); + modify_instruction_site(&patch__flush_hash_A0, 0xffff, + ((unsigned int)Hash - PAGE_OFFSET) >> 16); modify_instruction_site(&patch__flush_hash_A1, 0x7c0, mb << 6); modify_instruction_site(&patch__flush_hash_A2, 0x7c0, mb2 << 6); modify_instruction_site(&patch__flush_hash_B, 0xffff, hmask); diff --git a/arch/powerpc/mm/dump_linuxpagetables-8xx.c b/arch/powerpc/mm/ptdump/8xx.c index ab9e3f24db2f..9e2d8e847d6e 100644 --- a/arch/powerpc/mm/dump_linuxpagetables-8xx.c +++ b/arch/powerpc/mm/ptdump/8xx.c @@ -7,7 +7,7 @@ #include <linux/kernel.h> #include <asm/pgtable.h> -#include "dump_linuxpagetables.h" +#include "ptdump.h" static const struct flag_info flag_array[] = { { diff --git a/arch/powerpc/mm/ptdump/Makefile b/arch/powerpc/mm/ptdump/Makefile new file mode 100644 index 000000000000..712762be3cb1 --- /dev/null +++ b/arch/powerpc/mm/ptdump/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-y += ptdump.o + +obj-$(CONFIG_4xx) += shared.o +obj-$(CONFIG_PPC_8xx) += 8xx.o +obj-$(CONFIG_PPC_BOOK3E_MMU) += shared.o +obj-$(CONFIG_PPC_BOOK3S_32) += shared.o bats.o segment_regs.o +obj-$(CONFIG_PPC_BOOK3S_64) += book3s64.o hashpagetable.o diff --git a/arch/powerpc/mm/dump_bats.c b/arch/powerpc/mm/ptdump/bats.c index a0d23e96e841..a0d23e96e841 100644 --- a/arch/powerpc/mm/dump_bats.c +++ b/arch/powerpc/mm/ptdump/bats.c diff --git a/arch/powerpc/mm/dump_linuxpagetables-book3s64.c b/arch/powerpc/mm/ptdump/book3s64.c index ed6fcf78256e..0dfca72cb9bd 100644 --- a/arch/powerpc/mm/dump_linuxpagetables-book3s64.c +++ b/arch/powerpc/mm/ptdump/book3s64.c @@ -7,7 +7,7 @@ #include <linux/kernel.h> #include <asm/pgtable.h> -#include "dump_linuxpagetables.h" +#include "ptdump.h" static const struct flag_info flag_array[] = { { diff --git a/arch/powerpc/mm/dump_hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c index 869294695048..b430e4e08af6 100644 --- a/arch/powerpc/mm/dump_hashpagetable.c +++ b/arch/powerpc/mm/ptdump/hashpagetable.c @@ -342,7 +342,7 @@ static unsigned long hpte_find(struct pg_state *st, unsigned long ea, int psize) /* Look in secondary table */ if (slot == -1) - slot = base_hpte_find(ea, psize, true, &v, &r); + slot = base_hpte_find(ea, psize, false, &v, &r); /* No entry found */ if (slot == -1) diff --git a/arch/powerpc/mm/dump_linuxpagetables.c b/arch/powerpc/mm/ptdump/ptdump.c index 6aa41669ac1a..37138428ab55 100644 --- a/arch/powerpc/mm/dump_linuxpagetables.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -28,7 +28,7 @@ #include <asm/page.h> #include <asm/pgalloc.h> -#include "dump_linuxpagetables.h" +#include "ptdump.h" #ifdef CONFIG_PPC32 #define KERN_VIRT_START 0 @@ -143,14 +143,19 @@ static void dump_addr(struct pg_state *st, unsigned long addr) unsigned long delta; #ifdef CONFIG_PPC64 - seq_printf(st->seq, "0x%016lx-0x%016lx ", st->start_address, addr-1); - seq_printf(st->seq, "0x%016lx ", st->start_pa); +#define REG "0x%016lx" #else - seq_printf(st->seq, "0x%08lx-0x%08lx ", st->start_address, addr - 1); - seq_printf(st->seq, "0x%08lx ", st->start_pa); +#define REG "0x%08lx" #endif - delta = (addr - st->start_address) >> 10; + seq_printf(st->seq, REG "-" REG " ", st->start_address, addr - 1); + if (st->start_pa == st->last_pa && st->start_address + PAGE_SIZE != addr) { + seq_printf(st->seq, "[" REG "]", st->start_pa); + delta = PAGE_SIZE >> 10; + } else { + seq_printf(st->seq, " " REG " ", st->start_pa); + delta = (addr - st->start_address) >> 10; + } /* Work out what appropriate unit to use */ while (!(delta & 1023) && unit[1]) { delta >>= 10; @@ -184,7 +189,8 @@ static void note_page(struct pg_state *st, unsigned long addr, */ } else if (flag != st->current_flags || level != st->level || addr >= st->marker[1].start_address || - pa != st->last_pa + PAGE_SIZE) { + (pa != st->last_pa + PAGE_SIZE && + (pa != st->start_pa || st->start_pa != st->last_pa))) { /* Check the PTE flags */ if (st->current_flags) { diff --git a/arch/powerpc/mm/dump_linuxpagetables.h b/arch/powerpc/mm/ptdump/ptdump.h index 5d513636de73..5d513636de73 100644 --- a/arch/powerpc/mm/dump_linuxpagetables.h +++ b/arch/powerpc/mm/ptdump/ptdump.h diff --git a/arch/powerpc/mm/dump_sr.c b/arch/powerpc/mm/ptdump/segment_regs.c index 501843664bb9..501843664bb9 100644 --- a/arch/powerpc/mm/dump_sr.c +++ b/arch/powerpc/mm/ptdump/segment_regs.c diff --git a/arch/powerpc/mm/dump_linuxpagetables-generic.c b/arch/powerpc/mm/ptdump/shared.c index 3fe98a0974c6..f7ed2f187cb0 100644 --- a/arch/powerpc/mm/dump_linuxpagetables-generic.c +++ b/arch/powerpc/mm/ptdump/shared.c @@ -7,7 +7,7 @@ #include <linux/kernel.h> #include <asm/pgtable.h> -#include "dump_linuxpagetables.h" +#include "ptdump.h" static const struct flag_info flag_array[] = { { diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index bc3914d54e26..5986df48359b 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -69,6 +69,11 @@ static void assert_slb_presence(bool present, unsigned long ea) if (!cpu_has_feature(CPU_FTR_ARCH_206)) return; + /* + * slbfee. requires bit 24 (PPC bit 39) be clear in RB. Hardware + * ignores all other bits from 0-27, so just clear them all. + */ + ea &= ~((1UL << 28) - 1); asm volatile(__PPC_SLBFEE_DOT(%0, %1) : "=r"(tmp) : "r"(ea) : "cr0"); WARN_ON(present == (tmp == 0)); diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index 06898c13901d..aec91dbcdc0b 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -32,6 +32,7 @@ #include <linux/export.h> #include <linux/hugetlb.h> #include <linux/sched/mm.h> +#include <linux/security.h> #include <asm/mman.h> #include <asm/mmu.h> #include <asm/copro.h> @@ -377,6 +378,7 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm, int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); unsigned long addr, found, prev; struct vm_unmapped_area_info info; + unsigned long min_addr = max(PAGE_SIZE, mmap_min_addr); info.flags = VM_UNMAPPED_AREA_TOPDOWN; info.length = len; @@ -393,7 +395,7 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm, if (high_limit > DEFAULT_MAP_WINDOW) addr += mm->context.slb_addr_limit - DEFAULT_MAP_WINDOW; - while (addr > PAGE_SIZE) { + while (addr > min_addr) { info.high_limit = addr; if (!slice_scan_available(addr - 1, available, 0, &addr)) continue; @@ -405,8 +407,8 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm, * Check if we need to reduce the range, or if we can * extend it to cover the previous available slice. */ - if (addr < PAGE_SIZE) - addr = PAGE_SIZE; + if (addr < min_addr) + addr = min_addr; else if (slice_scan_available(addr - 1, available, 0, &prev)) { addr = prev; goto prev_slice; @@ -528,7 +530,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, addr = _ALIGN_UP(addr, page_size); slice_dbg(" aligned addr=%lx\n", addr); /* Ignore hint if it's too large or overlaps a VMA */ - if (addr > high_limit - len || + if (addr > high_limit - len || addr < mmap_min_addr || !slice_area_is_free(mm, addr, len)) addr = 0; } diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index ae5d568e267f..ac23dc1c6535 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -302,7 +302,7 @@ void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, * This function as well as __local_flush_tlb_page() must only be called * for user contexts. */ - if (unlikely(WARN_ON(!mm))) + if (WARN_ON(!mm)) return; preempt_disable(); |