diff options
Diffstat (limited to 'arch/powerpc/mm')
25 files changed, 513 insertions, 189 deletions
diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c index 6c5025e81236..f4c6472f2fc4 100644 --- a/arch/powerpc/mm/8xx_mmu.c +++ b/arch/powerpc/mm/8xx_mmu.c @@ -88,7 +88,7 @@ static void mmu_mapin_immr(void) int offset; for (offset = 0; offset < IMMR_SIZE; offset += PAGE_SIZE) - map_page(v + offset, p + offset, f); + map_kernel_page(v + offset, p + offset, f); } /* Address of instructions to patch */ diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c index 2dc74e5c6458..382528475433 100644 --- a/arch/powerpc/mm/dma-noncoherent.c +++ b/arch/powerpc/mm/dma-noncoherent.c @@ -227,7 +227,7 @@ __dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t do { SetPageReserved(page); - map_page(vaddr, page_to_phys(page), + map_kernel_page(vaddr, page_to_phys(page), pgprot_val(pgprot_noncached(PAGE_KERNEL))); page++; vaddr += PAGE_SIZE; diff --git a/arch/powerpc/mm/dump_hashpagetable.c b/arch/powerpc/mm/dump_hashpagetable.c index c6b900f54c07..b1c144b03fcf 100644 --- a/arch/powerpc/mm/dump_hashpagetable.c +++ b/arch/powerpc/mm/dump_hashpagetable.c @@ -335,7 +335,7 @@ static unsigned long hpte_find(struct pg_state *st, unsigned long ea, int psize) unsigned long rpn, lp_bits; int base_psize = 0, actual_psize = 0; - if (ea <= PAGE_OFFSET) + if (ea < PAGE_OFFSET) return -1; /* Look in primary table */ diff --git a/arch/powerpc/mm/dump_linuxpagetables.c b/arch/powerpc/mm/dump_linuxpagetables.c index d659345a98d6..44fe4833910f 100644 --- a/arch/powerpc/mm/dump_linuxpagetables.c +++ b/arch/powerpc/mm/dump_linuxpagetables.c @@ -16,6 +16,7 @@ */ #include <linux/debugfs.h> #include <linux/fs.h> +#include <linux/hugetlb.h> #include <linux/io.h> #include <linux/mm.h> #include <linux/sched.h> @@ -391,7 +392,7 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) for (i = 0; i < PTRS_PER_PMD; i++, pmd++) { addr = start + i * PMD_SIZE; - if (!pmd_none(*pmd)) + if (!pmd_none(*pmd) && !pmd_huge(*pmd)) /* pmd exists */ walk_pte(st, pmd, addr); else @@ -407,7 +408,7 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) for (i = 0; i < PTRS_PER_PUD; i++, pud++) { addr = start + i * PUD_SIZE; - if (!pud_none(*pud)) + if (!pud_none(*pud) && !pud_huge(*pud)) /* pud exists */ walk_pmd(st, pud, addr); else @@ -427,7 +428,7 @@ static void walk_pagetables(struct pg_state *st) */ for (i = 0; i < PTRS_PER_PGD; i++, pgd++) { addr = KERN_VIRT_START + i * PGDIR_SIZE; - if (!pgd_none(*pgd)) + if (!pgd_none(*pgd) && !pgd_huge(*pgd)) /* pgd exists */ walk_pud(st, pgd, addr); else diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 3a7d580fdc59..4c422632047b 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -206,6 +206,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, int is_write = 0; int trap = TRAP(regs); int is_exec = trap == 0x400; + int is_user = user_mode(regs); int fault; int rc = 0, store_update_sp = 0; @@ -216,7 +217,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, * bits we are interested in. But there are some bits which * indicate errors in DSISR but can validly be set in SRR1. */ - if (trap == 0x400) + if (is_exec) error_code &= 0x48200000; else is_write = error_code & DSISR_ISSTORE; @@ -247,13 +248,13 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, * The kernel should never take an execute fault nor should it * take a page fault to a kernel address. */ - if (!user_mode(regs) && (is_exec || (address >= TASK_SIZE))) { + if (!is_user && (is_exec || (address >= TASK_SIZE))) { rc = SIGSEGV; goto bail; } #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE) || \ - defined(CONFIG_PPC_BOOK3S_64)) + defined(CONFIG_PPC_BOOK3S_64) || defined(CONFIG_PPC_8xx)) if (error_code & DSISR_DABRMATCH) { /* breakpoint match */ do_break(regs, address, error_code); @@ -266,7 +267,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, local_irq_enable(); if (faulthandler_disabled() || mm == NULL) { - if (!user_mode(regs)) { + if (!is_user) { rc = SIGSEGV; goto bail; } @@ -287,10 +288,10 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, * can result in fault, which will cause a deadlock when called with * mmap_sem held */ - if (!is_exec && user_mode(regs)) + if (is_write && is_user) store_update_sp = store_updates_sp(regs); - if (user_mode(regs)) + if (is_user) flags |= FAULT_FLAG_USER; /* When running in the kernel we expect faults to occur only to @@ -309,7 +310,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, * thus avoiding the deadlock. */ if (!down_read_trylock(&mm->mmap_sem)) { - if (!user_mode(regs) && !search_exception_tables(regs->nip)) + if (!is_user && !search_exception_tables(regs->nip)) goto bad_area_nosemaphore; retry: @@ -509,7 +510,7 @@ bad_area: bad_area_nosemaphore: /* User mode accesses cause a SIGSEGV */ - if (user_mode(regs)) { + if (is_user) { _exception(SIGSEGV, regs, code, address); goto bail; } diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 65bb8f33b399..3848af167df9 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -15,6 +15,7 @@ #include <linux/spinlock.h> #include <linux/bitops.h> #include <linux/of.h> +#include <linux/processor.h> #include <linux/threads.h> #include <linux/smp.h> @@ -23,6 +24,7 @@ #include <asm/mmu_context.h> #include <asm/pgtable.h> #include <asm/tlbflush.h> +#include <asm/trace.h> #include <asm/tlb.h> #include <asm/cputable.h> #include <asm/udbg.h> @@ -98,6 +100,7 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize) : "memory"); break; } + trace_tlbie(0, 0, va, 0, 0, 0, 0); } static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize) @@ -147,6 +150,7 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize) : "memory"); break; } + trace_tlbie(0, 1, va, 0, 0, 0, 0); } @@ -181,8 +185,10 @@ static inline void native_lock_hpte(struct hash_pte *hptep) while (1) { if (!test_and_set_bit_lock(HPTE_LOCK_BIT, word)) break; + spin_begin(); while(test_bit(HPTE_LOCK_BIT, word)) - cpu_relax(); + spin_cpu_relax(); + spin_end(); } } @@ -407,6 +413,38 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, tlbie(vpn, psize, psize, ssize, 0); } +/* + * Remove a bolted kernel entry. Memory hotplug uses this. + * + * No need to lock here because we should be the only user. + */ +static int native_hpte_removebolted(unsigned long ea, int psize, int ssize) +{ + unsigned long vpn; + unsigned long vsid; + long slot; + struct hash_pte *hptep; + + vsid = get_kernel_vsid(ea, ssize); + vpn = hpt_vpn(ea, vsid, ssize); + + slot = native_hpte_find(vpn, psize, ssize); + if (slot == -1) + return -ENOENT; + + hptep = htab_address + slot; + + VM_WARN_ON(!(be64_to_cpu(hptep->v) & HPTE_V_BOLTED)); + + /* Invalidate the hpte */ + hptep->v = 0; + + /* Invalidate the TLB */ + tlbie(vpn, psize, psize, ssize, 0); + return 0; +} + + static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, int bpsize, int apsize, int ssize, int local) { @@ -725,6 +763,7 @@ void __init hpte_init_native(void) mmu_hash_ops.hpte_invalidate = native_hpte_invalidate; mmu_hash_ops.hpte_updatepp = native_hpte_updatepp; mmu_hash_ops.hpte_updateboltedpp = native_hpte_updateboltedpp; + mmu_hash_ops.hpte_removebolted = native_hpte_removebolted; mmu_hash_ops.hpte_insert = native_hpte_insert; mmu_hash_ops.hpte_remove = native_hpte_remove; mmu_hash_ops.hpte_clear_all = native_hpte_clear; diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index f2095ce9d4b0..7a20669c19e7 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -810,6 +810,8 @@ static void update_hid_for_hash(void) asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) : : "r"(rb), "i"(0), "i"(0), "i"(2), "r"(0) : "memory"); asm volatile("eieio; tlbsync; ptesync; isync; slbia": : :"memory"); + trace_tlbie(0, 0, rb, 0, 2, 0, 0); + /* * now switch the HID */ diff --git a/arch/powerpc/mm/hugetlbpage-radix.c b/arch/powerpc/mm/hugetlbpage-radix.c index 6575b9aabef4..a12e86395025 100644 --- a/arch/powerpc/mm/hugetlbpage-radix.c +++ b/arch/powerpc/mm/hugetlbpage-radix.c @@ -68,7 +68,7 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr, addr = ALIGN(addr, huge_page_size(h)); vma = find_vma(mm, addr); if (mm->task_size - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } /* diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index a4f33de4008e..e1bf5ca397fe 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -17,6 +17,8 @@ #include <linux/memblock.h> #include <linux/bootmem.h> #include <linux/moduleparam.h> +#include <linux/swap.h> +#include <linux/swapops.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/tlb.h> @@ -32,6 +34,7 @@ #define PAGE_SHIFT_16G 34 unsigned int HPAGE_SHIFT; +EXPORT_SYMBOL(HPAGE_SHIFT); /* * Tracks gpages after the device tree is scanned and before the @@ -55,7 +58,7 @@ static unsigned nr_gpages; #define hugepd_none(hpd) (hpd_val(hpd) == 0) -pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) +pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz) { /* Only called for hugetlbfs pages, hence can ignore THP */ return __find_linux_pte_or_hugepte(mm->pgd, addr, NULL, NULL); @@ -77,7 +80,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, num_hugepd = 1; } - new = kmem_cache_zalloc(cachep, GFP_KERNEL); + new = kmem_cache_zalloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL)); BUG_ON(pshift > HUGEPD_SHIFT_MASK); BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK); @@ -617,62 +620,39 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, } while (addr = next, addr != end); } -/* - * We are holding mmap_sem, so a parallel huge page collapse cannot run. - * To prevent hugepage split, disable irq. - */ -struct page * -follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) +struct page *follow_huge_pd(struct vm_area_struct *vma, + unsigned long address, hugepd_t hpd, + int flags, int pdshift) { - bool is_thp; - pte_t *ptep, pte; - unsigned shift; - unsigned long mask, flags; - struct page *page = ERR_PTR(-EINVAL); - - local_irq_save(flags); - ptep = find_linux_pte_or_hugepte(mm->pgd, address, &is_thp, &shift); - if (!ptep) - goto no_page; - pte = READ_ONCE(*ptep); - /* - * Verify it is a huge page else bail. - * Transparent hugepages are handled by generic code. We can skip them - * here. - */ - if (!shift || is_thp) - goto no_page; - - if (!pte_present(pte)) { - page = NULL; - goto no_page; + pte_t *ptep; + spinlock_t *ptl; + struct page *page = NULL; + unsigned long mask; + int shift = hugepd_shift(hpd); + struct mm_struct *mm = vma->vm_mm; + +retry: + ptl = &mm->page_table_lock; + spin_lock(ptl); + + ptep = hugepte_offset(hpd, address, pdshift); + if (pte_present(*ptep)) { + mask = (1UL << shift) - 1; + page = pte_page(*ptep); + page += ((address & mask) >> PAGE_SHIFT); + if (flags & FOLL_GET) + get_page(page); + } else { + if (is_hugetlb_entry_migration(*ptep)) { + spin_unlock(ptl); + __migration_entry_wait(mm, ptep, ptl); + goto retry; + } } - mask = (1UL << shift) - 1; - page = pte_page(pte); - if (page) - page += (address & mask) / PAGE_SIZE; - -no_page: - local_irq_restore(flags); + spin_unlock(ptl); return page; } -struct page * -follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmd, int write) -{ - BUG(); - return NULL; -} - -struct page * -follow_huge_pud(struct mm_struct *mm, unsigned long address, - pud_t *pud, int write) -{ - BUG(); - return NULL; -} - static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end, unsigned long sz) { @@ -763,8 +743,11 @@ static int __init add_huge_page_size(unsigned long long size) * Hash: 16M and 16G */ if (radix_enabled()) { - if (mmu_psize != MMU_PAGE_2M) - return -EINVAL; + if (mmu_psize != MMU_PAGE_2M) { + if (cpu_has_feature(CPU_FTR_POWER9_DD1) || + (mmu_psize != MMU_PAGE_1G)) + return -EINVAL; + } } else { if (mmu_psize != MMU_PAGE_16M && mmu_psize != MMU_PAGE_16G) return -EINVAL; @@ -963,7 +946,7 @@ pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, if (pmd_none(pmd)) return NULL; - if (pmd_trans_huge(pmd)) { + if (pmd_trans_huge(pmd) || pmd_devmap(pmd)) { if (is_thp) *is_thp = true; ret_pte = (pte_t *) pmdp; diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index ec84b31c6c86..5b4c25d12ff3 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -44,6 +44,7 @@ #include <linux/slab.h> #include <linux/of_fdt.h> #include <linux/libfdt.h> +#include <linux/memremap.h> #include <asm/pgalloc.h> #include <asm/page.h> @@ -110,8 +111,29 @@ static int __meminit vmemmap_populated(unsigned long start, int page_size) return 0; } +/* + * vmemmap virtual address space management does not have a traditonal page + * table to track which virtual struct pages are backed by physical mapping. + * The virtual to physical mappings are tracked in a simple linked list + * format. 'vmemmap_list' maintains the entire vmemmap physical mapping at + * all times where as the 'next' list maintains the available + * vmemmap_backing structures which have been deleted from the + * 'vmemmap_global' list during system runtime (memory hotplug remove + * operation). The freed 'vmemmap_backing' structures are reused later when + * new requests come in without allocating fresh memory. This pointer also + * tracks the allocated 'vmemmap_backing' structures as we allocate one + * full page memory at a time when we dont have any. + */ struct vmemmap_backing *vmemmap_list; static struct vmemmap_backing *next; + +/* + * The same pointer 'next' tracks individual chunks inside the allocated + * full page during the boot time and again tracks the freeed nodes during + * runtime. It is racy but it does not happen as they are separated by the + * boot process. Will create problem if some how we have memory hotplug + * operation during boot !! + */ static int num_left; static int num_freed; @@ -171,13 +193,17 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) pr_debug("vmemmap_populate %lx..%lx, node %d\n", start, end, node); for (; start < end; start += page_size) { + struct vmem_altmap *altmap; void *p; int rc; if (vmemmap_populated(start, page_size)) continue; - p = vmemmap_alloc_block(page_size, node); + /* altmap lookups only work at section boundaries */ + altmap = to_vmem_altmap(SECTION_ALIGN_DOWN(start)); + + p = __vmemmap_alloc_block_buf(page_size, node, altmap); if (!p) return -ENOMEM; @@ -234,13 +260,17 @@ static unsigned long vmemmap_list_free(unsigned long start) void __ref vmemmap_free(unsigned long start, unsigned long end) { unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; + unsigned long page_order = get_order(page_size); start = _ALIGN_DOWN(start, page_size); pr_debug("vmemmap_free %lx...%lx\n", start, end); for (; start < end; start += page_size) { - unsigned long addr; + unsigned long nr_pages, addr; + struct vmem_altmap *altmap; + struct page *section_base; + struct page *page; /* * the section has already be marked as invalid, so @@ -251,29 +281,33 @@ void __ref vmemmap_free(unsigned long start, unsigned long end) continue; addr = vmemmap_list_free(start); - if (addr) { - struct page *page = pfn_to_page(addr >> PAGE_SHIFT); - - if (PageReserved(page)) { - /* allocated from bootmem */ - if (page_size < PAGE_SIZE) { - /* - * this shouldn't happen, but if it is - * the case, leave the memory there - */ - WARN_ON_ONCE(1); - } else { - unsigned int nr_pages = - 1 << get_order(page_size); - while (nr_pages--) - free_reserved_page(page++); - } - } else - free_pages((unsigned long)(__va(addr)), - get_order(page_size)); - - vmemmap_remove_mapping(start, page_size); + if (!addr) + continue; + + page = pfn_to_page(addr >> PAGE_SHIFT); + section_base = pfn_to_page(vmemmap_section_start(start)); + nr_pages = 1 << page_order; + + altmap = to_vmem_altmap((unsigned long) section_base); + if (altmap) { + vmem_altmap_free(altmap, nr_pages); + } else if (PageReserved(page)) { + /* allocated from bootmem */ + if (page_size < PAGE_SIZE) { + /* + * this shouldn't happen, but if it is + * the case, leave the memory there + */ + WARN_ON_ONCE(1); + } else { + while (nr_pages--) + free_reserved_page(page++); + } + } else { + free_pages((unsigned long)(__va(addr)), page_order); } + + vmemmap_remove_mapping(start, page_size); } } #endif diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 9ee536ec0739..8541f18694a4 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -36,6 +36,7 @@ #include <linux/hugetlb.h> #include <linux/slab.h> #include <linux/vmalloc.h> +#include <linux/memremap.h> #include <asm/pgalloc.h> #include <asm/prom.h> @@ -126,18 +127,14 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end) return -ENODEV; } -int arch_add_memory(int nid, u64 start, u64 size, bool for_device) +int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) { - struct pglist_data *pgdata; - struct zone *zone; unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; int rc; resize_hpt_for_hotplug(memblock_phys_mem_size()); - pgdata = NODE_DATA(nid); - start = (unsigned long)__va(start); rc = create_section_mapping(start, start + size); if (rc) { @@ -147,11 +144,7 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device) return -EFAULT; } - /* this should work for most non-highmem platforms */ - zone = pgdata->node_zones + - zone_for_memory(nid, start, size, 0, for_device); - - return __add_pages(nid, zone, start_pfn, nr_pages); + return __add_pages(nid, start_pfn, nr_pages, want_memblock); } #ifdef CONFIG_MEMORY_HOTREMOVE @@ -159,11 +152,20 @@ int arch_remove_memory(u64 start, u64 size) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - struct zone *zone; + struct vmem_altmap *altmap; + struct page *page; int ret; - zone = page_zone(pfn_to_page(start_pfn)); - ret = __remove_pages(zone, start_pfn, nr_pages); + /* + * If we have an altmap then we need to skip over any reserved PFNs + * when querying the zone. + */ + page = pfn_to_page(start_pfn); + altmap = to_vmem_altmap((unsigned long) page); + if (altmap) + page += vmem_altmap_offset(altmap); + + ret = __remove_pages(page_zone(page), start_pfn, nr_pages); if (ret) return ret; @@ -313,11 +315,11 @@ void __init paging_init(void) unsigned long end = __fix_to_virt(FIX_HOLE); for (; v < end; v += PAGE_SIZE) - map_page(v, 0, 0); /* XXX gross */ + map_kernel_page(v, 0, 0); /* XXX gross */ #endif #ifdef CONFIG_HIGHMEM - map_page(PKMAP_BASE, 0, 0); /* XXX gross */ + map_kernel_page(PKMAP_BASE, 0, 0); /* XXX gross */ pkmap_page_table = virt_to_kpte(PKMAP_BASE); kmap_pte = virt_to_kpte(__fix_to_virt(FIX_KMAP_BEGIN)); diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c index 9dbd2a733d6b..5d78b193fec4 100644 --- a/arch/powerpc/mm/mmap.c +++ b/arch/powerpc/mm/mmap.c @@ -34,16 +34,9 @@ /* * Top of mmap area (just below the process stack). * - * Leave at least a ~128 MB hole on 32bit applications. - * - * On 64bit applications we randomise the stack by 1GB so we need to - * space our mmap start address by a further 1GB, otherwise there is a - * chance the mmap area will end up closer to the stack than our ulimit - * requires. + * Leave at least a ~128 MB hole. */ -#define MIN_GAP32 (128*1024*1024) -#define MIN_GAP64 ((128 + 1024)*1024*1024UL) -#define MIN_GAP ((is_32bit_task()) ? MIN_GAP32 : MIN_GAP64) +#define MIN_GAP (128*1024*1024) #define MAX_GAP (TASK_SIZE/6*5) static inline int mmap_is_legacy(void) @@ -71,9 +64,26 @@ unsigned long arch_mmap_rnd(void) return rnd << PAGE_SHIFT; } +static inline unsigned long stack_maxrandom_size(void) +{ + if (!(current->flags & PF_RANDOMIZE)) + return 0; + + /* 8MB for 32bit, 1GB for 64bit */ + if (is_32bit_task()) + return (1<<23); + else + return (1<<30); +} + static inline unsigned long mmap_base(unsigned long rnd) { unsigned long gap = rlimit(RLIMIT_STACK); + unsigned long pad = stack_maxrandom_size() + stack_guard_gap; + + /* Values close to RLIM_INFINITY can overflow. */ + if (gap + pad > gap) + gap += pad; if (gap < MIN_GAP) gap = MIN_GAP; @@ -112,7 +122,7 @@ radix__arch_get_unmapped_area(struct file *filp, unsigned long addr, addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); if (mm->task_size - len >= addr && addr >= mmap_min_addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } @@ -157,7 +167,7 @@ radix__arch_get_unmapped_area_topdown(struct file *filp, addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); if (mm->task_size - len >= addr && addr >= mmap_min_addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index c6dca2ae78ef..abed1fe6992f 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -99,7 +99,7 @@ static int hash__init_new_context(struct mm_struct *mm) * mm->context.addr_limit. Default to max task size so that we copy the * default values to paca which will help us to handle slb miss early. */ - mm->context.addr_limit = TASK_SIZE_128TB; + mm->context.addr_limit = DEFAULT_MAP_WINDOW_USER64; /* * The old code would re-promote on fork, we don't do that when using @@ -138,6 +138,14 @@ static int radix__init_new_context(struct mm_struct *mm) rts_field = radix__get_tree_size(); process_tb[index].prtb0 = cpu_to_be64(rts_field | __pa(mm->pgd) | RADIX_PGD_INDEX_SIZE); + /* + * Order the above store with subsequent update of the PID + * register (at which point HW can start loading/caching + * the entry) and the corresponding load by the MMU from + * the L2 cache. + */ + asm volatile("ptesync;isync" : : : "memory"); + mm->context.npu_context = NULL; return index; @@ -223,9 +231,15 @@ void destroy_context(struct mm_struct *mm) mm->context.cop_lockp = NULL; #endif /* CONFIG_PPC_ICSWX */ - if (radix_enabled()) - process_tb[mm->context.id].prtb1 = 0; - else + if (radix_enabled()) { + /* + * Radix doesn't have a valid bit in the process table + * entries. However we know that at least P9 implementation + * will avoid caching an entry with an invalid RTS field, + * and 0 is invalid. So this will do. + */ + process_tb[mm->context.id].prtb0 = 0; + } else subpage_prot_free(mm); destroy_pagetable_page(mm); __destroy_context(mm->context.id); @@ -235,10 +249,15 @@ void destroy_context(struct mm_struct *mm) #ifdef CONFIG_PPC_RADIX_MMU void radix__switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) { - asm volatile("isync": : :"memory"); - mtspr(SPRN_PID, next->context.id); - asm volatile("isync \n" - PPC_SLBIA(0x7) - : : :"memory"); + + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) { + isync(); + mtspr(SPRN_PID, next->context.id); + isync(); + asm volatile(PPC_INVALIDATE_ERAT : : :"memory"); + } else { + mtspr(SPRN_PID, next->context.id); + isync(); + } } #endif diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index f988db655e5b..d46128b22150 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -94,7 +94,6 @@ extern void _tlbia(void); #ifdef CONFIG_PPC32 extern void mapin_ram(void); -extern int map_page(unsigned long va, phys_addr_t pa, int flags); extern void setbat(int index, unsigned long virt, phys_addr_t phys, unsigned int size, pgprot_t prot); diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 371792e4418f..b95c584ce19d 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1311,8 +1311,10 @@ static int update_lookup_table(void *data) /* * Update the node maps and sysfs entries for each cpu whose home node * has changed. Returns 1 when the topology has changed, and 0 otherwise. + * + * cpus_locked says whether we already hold cpu_hotplug_lock. */ -int arch_update_cpu_topology(void) +int numa_update_cpu_topology(bool cpus_locked) { unsigned int cpu, sibling, changed = 0; struct topology_update_data *updates, *ud; @@ -1400,15 +1402,23 @@ int arch_update_cpu_topology(void) if (!cpumask_weight(&updated_cpus)) goto out; - stop_machine(update_cpu_topology, &updates[0], &updated_cpus); + if (cpus_locked) + stop_machine_cpuslocked(update_cpu_topology, &updates[0], + &updated_cpus); + else + stop_machine(update_cpu_topology, &updates[0], &updated_cpus); /* * Update the numa-cpu lookup table with the new mappings, even for * offline CPUs. It is best to perform this update from the stop- * machine context. */ - stop_machine(update_lookup_table, &updates[0], + if (cpus_locked) + stop_machine_cpuslocked(update_lookup_table, &updates[0], cpumask_of(raw_smp_processor_id())); + else + stop_machine(update_lookup_table, &updates[0], + cpumask_of(raw_smp_processor_id())); for (ud = &updates[0]; ud; ud = ud->next) { unregister_cpu_under_node(ud->cpu, ud->old_nid); @@ -1426,6 +1436,12 @@ out: return changed; } +int arch_update_cpu_topology(void) +{ + lockdep_assert_cpus_held(); + return numa_update_cpu_topology(true); +} + static void topology_work_fn(struct work_struct *work) { rebuild_sched_domains(); diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c index 5fcb3dd74c13..31eed8fa8e99 100644 --- a/arch/powerpc/mm/pgtable-book3s64.c +++ b/arch/powerpc/mm/pgtable-book3s64.c @@ -32,7 +32,7 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address, { int changed; #ifdef CONFIG_DEBUG_VM - WARN_ON(!pmd_trans_huge(*pmdp)); + WARN_ON(!pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp)); assert_spin_locked(&vma->vm_mm->page_table_lock); #endif changed = !pmd_same(*(pmdp), entry); @@ -59,7 +59,7 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, #ifdef CONFIG_DEBUG_VM WARN_ON(pte_present(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp))); assert_spin_locked(&mm->page_table_lock); - WARN_ON(!pmd_trans_huge(pmd)); + WARN_ON(!(pmd_trans_huge(pmd) || pmd_devmap(pmd))); #endif trace_hugepage_set_pmd(addr, pmd_val(pmd)); return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd)); diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/pgtable-hash64.c index 8b85a14b08ea..188b4107584d 100644 --- a/arch/powerpc/mm/pgtable-hash64.c +++ b/arch/powerpc/mm/pgtable-hash64.c @@ -11,8 +11,12 @@ #include <linux/sched.h> #include <linux/mm_types.h> +#include <linux/mm.h> #include <asm/pgalloc.h> +#include <asm/pgtable.h> +#include <asm/sections.h> +#include <asm/mmu.h> #include <asm/tlb.h> #include "mmu_decl.h" @@ -22,6 +26,81 @@ #ifdef CONFIG_SPARSEMEM_VMEMMAP /* + * vmemmap is the starting address of the virtual address space where + * struct pages are allocated for all possible PFNs present on the system + * including holes and bad memory (hence sparse). These virtual struct + * pages are stored in sequence in this virtual address space irrespective + * of the fact whether the corresponding PFN is valid or not. This achieves + * constant relationship between address of struct page and its PFN. + * + * During boot or memory hotplug operation when a new memory section is + * added, physical memory allocation (including hash table bolting) will + * be performed for the set of struct pages which are part of the memory + * section. This saves memory by not allocating struct pages for PFNs + * which are not valid. + * + * ---------------------------------------------- + * | PHYSICAL ALLOCATION OF VIRTUAL STRUCT PAGES| + * ---------------------------------------------- + * + * f000000000000000 c000000000000000 + * vmemmap +--------------+ +--------------+ + * + | page struct | +--------------> | page struct | + * | +--------------+ +--------------+ + * | | page struct | +--------------> | page struct | + * | +--------------+ | +--------------+ + * | | page struct | + +------> | page struct | + * | +--------------+ | +--------------+ + * | | page struct | | +--> | page struct | + * | +--------------+ | | +--------------+ + * | | page struct | | | + * | +--------------+ | | + * | | page struct | | | + * | +--------------+ | | + * | | page struct | | | + * | +--------------+ | | + * | | page struct | | | + * | +--------------+ | | + * | | page struct | +-------+ | + * | +--------------+ | + * | | page struct | +-----------+ + * | +--------------+ + * | | page struct | No mapping + * | +--------------+ + * | | page struct | No mapping + * v +--------------+ + * + * ----------------------------------------- + * | RELATION BETWEEN STRUCT PAGES AND PFNS| + * ----------------------------------------- + * + * vmemmap +--------------+ +---------------+ + * + | page struct | +-------------> | PFN | + * | +--------------+ +---------------+ + * | | page struct | +-------------> | PFN | + * | +--------------+ +---------------+ + * | | page struct | +-------------> | PFN | + * | +--------------+ +---------------+ + * | | page struct | +-------------> | PFN | + * | +--------------+ +---------------+ + * | | | + * | +--------------+ + * | | | + * | +--------------+ + * | | | + * | +--------------+ +---------------+ + * | | page struct | +-------------> | PFN | + * | +--------------+ +---------------+ + * | | | + * | +--------------+ + * | | | + * | +--------------+ +---------------+ + * | | page struct | +-------------> | PFN | + * | +--------------+ +---------------+ + * | | page struct | +-------------> | PFN | + * v +--------------+ +---------------+ + */ +/* * On hash-based CPUs, the vmemmap is bolted in the hash table. * */ @@ -109,7 +188,7 @@ unsigned long hash__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr unsigned long old; #ifdef CONFIG_DEBUG_VM - WARN_ON(!pmd_trans_huge(*pmdp)); + WARN_ON(!hash__pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp)); assert_spin_locked(&mm->page_table_lock); #endif @@ -141,6 +220,7 @@ pmd_t hash__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long addres VM_BUG_ON(address & ~HPAGE_PMD_MASK); VM_BUG_ON(pmd_trans_huge(*pmdp)); + VM_BUG_ON(pmd_devmap(*pmdp)); pmd = *pmdp; pmd_clear(pmdp); @@ -221,6 +301,7 @@ void hash__pmdp_huge_split_prepare(struct vm_area_struct *vma, { VM_BUG_ON(address & ~HPAGE_PMD_MASK); VM_BUG_ON(REGION_ID(address) != USER_REGION_ID); + VM_BUG_ON(pmd_devmap(*pmdp)); /* * We can't mark the pmd none here, because that will cause a race @@ -342,3 +423,35 @@ int hash__has_transparent_hugepage(void) return 1; } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + +#ifdef CONFIG_STRICT_KERNEL_RWX +void hash__mark_rodata_ro(void) +{ + unsigned long start = (unsigned long)_stext; + unsigned long end = (unsigned long)__init_begin; + unsigned long idx; + unsigned int step, shift; + unsigned long newpp = PP_RXXX; + + shift = mmu_psize_defs[mmu_linear_psize].shift; + step = 1 << shift; + + start = ((start + step - 1) >> shift) << shift; + end = (end >> shift) << shift; + + pr_devel("marking ro start %lx, end %lx, step %x\n", + start, end, step); + + if (start == end) { + pr_warn("could not set rodata ro, relocate the start" + " of the kernel to a 0x%x boundary\n", step); + return; + } + + for (idx = start; idx < end; idx += step) + /* Not sure if we can do much with the return value */ + mmu_hash_ops.hpte_updateboltedpp(newpp, idx, mmu_linear_psize, + mmu_kernel_ssize); + +} +#endif diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index c28165d8970b..8c13e4282308 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -11,6 +11,7 @@ #include <linux/sched/mm.h> #include <linux/memblock.h> #include <linux/of_fdt.h> +#include <linux/mm.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> @@ -19,6 +20,8 @@ #include <asm/mmu.h> #include <asm/firmware.h> #include <asm/powernv.h> +#include <asm/sections.h> +#include <asm/trace.h> #include <trace/events/thp.h> @@ -108,6 +111,49 @@ set_the_pte: return 0; } +#ifdef CONFIG_STRICT_KERNEL_RWX +void radix__mark_rodata_ro(void) +{ + unsigned long start = (unsigned long)_stext; + unsigned long end = (unsigned long)__init_begin; + unsigned long idx; + pgd_t *pgdp; + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep; + + start = ALIGN_DOWN(start, PAGE_SIZE); + end = PAGE_ALIGN(end); // aligns up + + pr_devel("marking ro start %lx, end %lx\n", start, end); + + for (idx = start; idx < end; idx += PAGE_SIZE) { + pgdp = pgd_offset_k(idx); + pudp = pud_alloc(&init_mm, pgdp, idx); + if (!pudp) + continue; + if (pud_huge(*pudp)) { + ptep = (pte_t *)pudp; + goto update_the_pte; + } + pmdp = pmd_alloc(&init_mm, pudp, idx); + if (!pmdp) + continue; + if (pmd_huge(*pmdp)) { + ptep = pmdp_ptep(pmdp); + goto update_the_pte; + } + ptep = pte_alloc_kernel(pmdp, idx); + if (!ptep) + continue; +update_the_pte: + radix__pte_update(&init_mm, idx, ptep, _PAGE_WRITE, 0, 0); + } + + radix__flush_tlb_kernel_range(start, end); +} +#endif /* CONFIG_STRICT_KERNEL_RWX */ + static inline void __meminit print_mapping(unsigned long start, unsigned long end, unsigned long size) @@ -121,7 +167,14 @@ static inline void __meminit print_mapping(unsigned long start, static int __meminit create_physical_mapping(unsigned long start, unsigned long end) { - unsigned long addr, mapping_size = 0; + unsigned long vaddr, addr, mapping_size = 0; + pgprot_t prot; + unsigned long max_mapping_size; +#ifdef CONFIG_STRICT_KERNEL_RWX + int split_text_mapping = 1; +#else + int split_text_mapping = 0; +#endif start = _ALIGN_UP(start, PAGE_SIZE); for (addr = start; addr < end; addr += mapping_size) { @@ -130,9 +183,12 @@ static int __meminit create_physical_mapping(unsigned long start, gap = end - addr; previous_size = mapping_size; + max_mapping_size = PUD_SIZE; +retry: if (IS_ALIGNED(addr, PUD_SIZE) && gap >= PUD_SIZE && - mmu_psize_defs[MMU_PAGE_1G].shift) + mmu_psize_defs[MMU_PAGE_1G].shift && + PUD_SIZE <= max_mapping_size) mapping_size = PUD_SIZE; else if (IS_ALIGNED(addr, PMD_SIZE) && gap >= PMD_SIZE && mmu_psize_defs[MMU_PAGE_2M].shift) @@ -140,13 +196,32 @@ static int __meminit create_physical_mapping(unsigned long start, else mapping_size = PAGE_SIZE; + if (split_text_mapping && (mapping_size == PUD_SIZE) && + (addr <= __pa_symbol(__init_begin)) && + (addr + mapping_size) >= __pa_symbol(_stext)) { + max_mapping_size = PMD_SIZE; + goto retry; + } + + if (split_text_mapping && (mapping_size == PMD_SIZE) && + (addr <= __pa_symbol(__init_begin)) && + (addr + mapping_size) >= __pa_symbol(_stext)) + mapping_size = PAGE_SIZE; + if (mapping_size != previous_size) { print_mapping(start, addr, previous_size); start = addr; } - rc = radix__map_kernel_page((unsigned long)__va(addr), addr, - PAGE_KERNEL_X, mapping_size); + vaddr = (unsigned long)__va(addr); + + if (overlaps_kernel_text(vaddr, vaddr + mapping_size) || + overlaps_interrupt_vector_text(vaddr, vaddr + mapping_size)) + prot = PAGE_KERNEL_X; + else + prot = PAGE_KERNEL; + + rc = radix__map_kernel_page(vaddr, addr, prot, mapping_size); if (rc) return rc; } @@ -190,6 +265,7 @@ static void __init radix_init_pgtable(void) asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : : "r" (TLBIEL_INVAL_SET_LPID), "r" (0)); asm volatile("eieio; tlbsync; ptesync" : : : "memory"); + trace_tlbie(0, 0, TLBIEL_INVAL_SET_LPID, 0, 2, 1, 1); } static void __init radix_init_partition_table(void) @@ -316,6 +392,9 @@ static void update_hid_for_radix(void) asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) : : "r"(rb), "i"(1), "i"(1), "i"(2), "r"(0) : "memory"); asm volatile("eieio; tlbsync; ptesync; isync; slbia": : :"memory"); + trace_tlbie(0, 0, rb, 0, 2, 0, 1); + trace_tlbie(0, 0, rb, 0, 2, 1, 1); + /* * now switch the HID */ @@ -683,7 +762,7 @@ unsigned long radix__pmd_hugepage_update(struct mm_struct *mm, unsigned long add unsigned long old; #ifdef CONFIG_DEBUG_VM - WARN_ON(!radix__pmd_trans_huge(*pmdp)); + WARN_ON(!radix__pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp)); assert_spin_locked(&mm->page_table_lock); #endif @@ -701,6 +780,7 @@ pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long addre VM_BUG_ON(address & ~HPAGE_PMD_MASK); VM_BUG_ON(radix__pmd_trans_huge(*pmdp)); + VM_BUG_ON(pmd_devmap(*pmdp)); /* * khugepaged calls this for normal pmd */ diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index a65c0b4c0669..a9e4bfc025bc 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -60,7 +60,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { struct page *ptepage; - gfp_t flags = GFP_KERNEL | __GFP_ZERO; + gfp_t flags = GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT; ptepage = alloc_pages(flags, 0); if (!ptepage) @@ -189,7 +189,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags, err = 0; for (i = 0; i < size && err == 0; i += PAGE_SIZE) - err = map_page(v+i, p+i, flags); + err = map_kernel_page(v+i, p+i, flags); if (err) { if (slab_is_available()) vunmap((void *)v); @@ -215,7 +215,7 @@ void iounmap(volatile void __iomem *addr) } EXPORT_SYMBOL(iounmap); -int map_page(unsigned long va, phys_addr_t pa, int flags) +int map_kernel_page(unsigned long va, phys_addr_t pa, int flags) { pmd_t *pd; pte_t *pg; @@ -255,7 +255,7 @@ void __init __mapin_ram_chunk(unsigned long offset, unsigned long top) ktext = ((char *)v >= _stext && (char *)v < etext) || ((char *)v >= _sinittext && (char *)v < _einittext); f = ktext ? pgprot_val(PAGE_KERNEL_TEXT) : pgprot_val(PAGE_KERNEL); - map_page(v, p, f); + map_kernel_page(v, p, f); #ifdef CONFIG_PPC_STD_MMU_32 if (ktext) hash_preload(&init_mm, v, 0, 0x300); @@ -387,11 +387,6 @@ void __set_fixmap (enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) return; } - map_page(address, phys, pgprot_val(flags)); + map_kernel_page(address, phys, pgprot_val(flags)); fixmaps++; } - -void __this_fixmap_does_not_exist(void) -{ - WARN_ON(1); -} diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index db93cf747a03..5c0b795d656c 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -47,6 +47,7 @@ #include <asm/smp.h> #include <asm/machdep.h> #include <asm/tlb.h> +#include <asm/trace.h> #include <asm/processor.h> #include <asm/cputable.h> #include <asm/sections.h> @@ -323,7 +324,7 @@ struct page *pud_page(pud_t pud) */ struct page *pmd_page(pmd_t pmd) { - if (pmd_trans_huge(pmd) || pmd_huge(pmd)) + if (pmd_trans_huge(pmd) || pmd_huge(pmd) || pmd_devmap(pmd)) return pte_page(pmd_pte(pmd)); return virt_to_page(pmd_page_vaddr(pmd)); } @@ -351,12 +352,20 @@ static pte_t *get_from_cache(struct mm_struct *mm) static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel) { void *ret = NULL; - struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); - if (!page) - return NULL; - if (!kernel && !pgtable_page_ctor(page)) { - __free_page(page); - return NULL; + struct page *page; + + if (!kernel) { + page = alloc_page(PGALLOC_GFP | __GFP_ACCOUNT); + if (!page) + return NULL; + if (!pgtable_page_ctor(page)) { + __free_page(page); + return NULL; + } + } else { + page = alloc_page(PGALLOC_GFP); + if (!page) + return NULL; } ret = page_address(page); @@ -469,13 +478,31 @@ void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0, * use of this partition ID was, not the new use. */ asm volatile("ptesync" : : : "memory"); - if (old & PATB_HR) + if (old & PATB_HR) { asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : : "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); - else + trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 1); + } else { asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : : "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); + trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 0); + } asm volatile("eieio; tlbsync; ptesync" : : : "memory"); } EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry); #endif /* CONFIG_PPC_BOOK3S_64 */ + +#ifdef CONFIG_STRICT_KERNEL_RWX +void mark_rodata_ro(void) +{ + if (!mmu_has_feature(MMU_FTR_KERNEL_RO)) { + pr_warn("Warning: Unable to mark rodata read only on this CPU.\n"); + return; + } + + if (radix_enabled()) + radix__mark_rodata_ro(); + else + hash__mark_rodata_ro(); +} +#endif diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 654a0d7ba0e7..13cfe413b40d 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -33,15 +33,7 @@ enum slb_index { KSTACK_INDEX = 2, /* Kernel stack map */ }; -extern void slb_allocate_realmode(unsigned long ea); - -static void slb_allocate(unsigned long ea) -{ - /* Currently, we do real mode for all SLBs including user, but - * that will change if we bring back dynamic VSIDs - */ - slb_allocate_realmode(ea); -} +extern void slb_allocate(unsigned long ea); #define slb_esid_mask(ssize) \ (((ssize) == MMU_SEGSIZE_256M)? ESID_MASK: ESID_MASK_1T) diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index 1519617aab36..bde378559d01 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S @@ -65,14 +65,15 @@ MMU_FTR_SECTION_ELSE \ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_68_BIT_VA) -/* void slb_allocate_realmode(unsigned long ea); +/* void slb_allocate(unsigned long ea); * * Create an SLB entry for the given EA (user or kernel). * r3 = faulting address, r13 = PACA * r9, r10, r11 are clobbered by this function + * r3 is preserved. * No other registers are examined or changed. */ -_GLOBAL(slb_allocate_realmode) +_GLOBAL(slb_allocate) /* * check for bad kernel/user address * (ea & ~REGION_MASK) >= PGTABLE_RANGE @@ -235,6 +236,9 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) * dont have any LRU information to help us choose a slot. */ + mr r9,r3 + + /* slb_finish_load_1T continues here. r9=EA with non-ESID bits clear */ 7: ld r10,PACASTABRR(r13) addi r10,r10,1 /* This gets soft patched on boot. */ @@ -249,10 +253,10 @@ slb_compare_rr_to_size: std r10,PACASTABRR(r13) 3: - rldimi r3,r10,0,36 /* r3= EA[0:35] | entry */ - oris r10,r3,SLB_ESID_V@h /* r3 |= SLB_ESID_V */ + rldimi r9,r10,0,36 /* r9 = EA[0:35] | entry */ + oris r10,r9,SLB_ESID_V@h /* r10 = r9 | SLB_ESID_V */ - /* r3 = ESID data, r11 = VSID data */ + /* r9 = ESID data, r11 = VSID data */ /* * No need for an isync before or after this slbmte. The exception @@ -265,21 +269,21 @@ slb_compare_rr_to_size: bgelr cr7 /* Update the slb cache */ - lhz r3,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */ - cmpldi r3,SLB_CACHE_ENTRIES + lhz r9,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */ + cmpldi r9,SLB_CACHE_ENTRIES bge 1f /* still room in the slb cache */ - sldi r11,r3,2 /* r11 = offset * sizeof(u32) */ + sldi r11,r9,2 /* r11 = offset * sizeof(u32) */ srdi r10,r10,28 /* get the 36 bits of the ESID */ add r11,r11,r13 /* r11 = (u32 *)paca + offset */ stw r10,PACASLBCACHE(r11) /* paca->slb_cache[offset] = esid */ - addi r3,r3,1 /* offset++ */ + addi r9,r9,1 /* offset++ */ b 2f 1: /* offset >= SLB_CACHE_ENTRIES */ - li r3,SLB_CACHE_ENTRIES+1 + li r9,SLB_CACHE_ENTRIES+1 2: - sth r3,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */ + sth r9,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */ crclr 4*cr0+eq /* set result to "success" */ blr @@ -301,11 +305,11 @@ slb_compare_rr_to_size: rldimi r11,r10,SLB_VSID_SSIZE_SHIFT,0 /* insert segment size */ /* r3 = EA, r11 = VSID data */ - clrrdi r3,r3,SID_SHIFT_1T /* clear out non-ESID bits */ + clrrdi r9,r3,SID_SHIFT_1T /* clear out non-ESID bits */ b 7b -_ASM_NOKPROBE_SYMBOL(slb_allocate_realmode) +_ASM_NOKPROBE_SYMBOL(slb_allocate) _ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_linear) _ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_io) _ASM_NOKPROBE_SYMBOL(slb_compare_rr_to_size) diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index 966b9fccfa66..45f6740dd407 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -99,7 +99,7 @@ static int slice_area_is_free(struct mm_struct *mm, unsigned long addr, if ((mm->task_size - len) < addr) return 0; vma = find_vma(mm, addr); - return (!vma || (addr + len) <= vma->vm_start); + return (!vma || (addr + len) <= vm_start_gap(vma)); } static int slice_low_has_vma(struct mm_struct *mm, unsigned long slice) diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 02e71402fdd3..744e0164ecf5 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -16,6 +16,7 @@ #include <asm/tlb.h> #include <asm/tlbflush.h> +#include <asm/trace.h> #define RIC_FLUSH_TLB 0 @@ -35,6 +36,7 @@ static inline void __tlbiel_pid(unsigned long pid, int set, asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + trace_tlbie(0, 1, rb, rs, ric, prs, r); } /* @@ -87,6 +89,7 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric) asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); asm volatile("eieio; tlbsync; ptesync": : :"memory"); + trace_tlbie(0, 0, rb, rs, ric, prs, r); } static inline void _tlbiel_va(unsigned long va, unsigned long pid, @@ -104,6 +107,7 @@ static inline void _tlbiel_va(unsigned long va, unsigned long pid, asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); asm volatile("ptesync": : :"memory"); + trace_tlbie(0, 1, rb, rs, ric, prs, r); } static inline void _tlbie_va(unsigned long va, unsigned long pid, @@ -121,6 +125,7 @@ static inline void _tlbie_va(unsigned long va, unsigned long pid, asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); asm volatile("eieio; tlbsync; ptesync": : :"memory"); + trace_tlbie(0, 0, rb, rs, ric, prs, r); } /* @@ -377,6 +382,7 @@ void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa, asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); asm volatile("eieio; tlbsync; ptesync": : :"memory"); + trace_tlbie(lpid, 0, rb, rs, ric, prs, r); } EXPORT_SYMBOL(radix__flush_tlb_lpid_va); @@ -394,6 +400,7 @@ void radix__flush_tlb_lpid(unsigned long lpid) asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); asm volatile("eieio; tlbsync; ptesync": : :"memory"); + trace_tlbie(lpid, 0, rb, rs, ric, prs, r); } EXPORT_SYMBOL(radix__flush_tlb_lpid); @@ -420,12 +427,14 @@ void radix__flush_tlb_all(void) */ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory"); + trace_tlbie(0, 0, rb, rs, ric, prs, r); /* * now flush host entires by passing PRS = 0 and LPID == 0 */ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory"); asm volatile("eieio; tlbsync; ptesync": : :"memory"); + trace_tlbie(0, 0, rb, 0, ric, prs, r); } void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm, diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c index 4517aa43a8b1..b5b0fb97b9c0 100644 --- a/arch/powerpc/mm/tlb_hash64.c +++ b/arch/powerpc/mm/tlb_hash64.c @@ -93,12 +93,10 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, /* * Check if we have an active batch on this CPU. If not, just - * flush now and return. For now, we don global invalidates - * in that case, might be worth testing the mm cpu mask though - * and decide to use local invalidates instead... + * flush now and return. */ if (!batch->active) { - flush_hash_page(vpn, rpte, psize, ssize, 0); + flush_hash_page(vpn, rpte, psize, ssize, mm_is_thread_local(mm)); put_cpu_var(ppc64_tlb_batch); return; } |