From 736d2169338a50c8814efc186b5423aee43b0c68 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 22 Nov 2015 00:07:06 +0100 Subject: parisc: Add Huge Page and HUGETLBFS support This patch adds huge page support to allow userspace to allocate huge pages and to use hugetlbfs filesystem on 32- and 64-bit Linux kernels. A later patch will add kernel support to map kernel text and data on huge pages. The only requirement is, that the kernel needs to be compiled for a PA8X00 CPU (PA2.0 architecture). Older PA1.X CPUs do not support variable page sizes. 64bit Kernels are compiled for PA2.0 by default. Technically on parisc multiple physical huge pages may be needed to emulate standard 2MB huge pages. Signed-off-by: Helge Deller --- arch/parisc/mm/Makefile | 1 + arch/parisc/mm/hugetlbpage.c | 161 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 162 insertions(+) create mode 100644 arch/parisc/mm/hugetlbpage.c (limited to 'arch/parisc/mm') diff --git a/arch/parisc/mm/Makefile b/arch/parisc/mm/Makefile index 758ceefb373a..134393de69d2 100644 --- a/arch/parisc/mm/Makefile +++ b/arch/parisc/mm/Makefile @@ -3,3 +3,4 @@ # obj-y := init.o fault.o ioremap.o +obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/parisc/mm/hugetlbpage.c b/arch/parisc/mm/hugetlbpage.c new file mode 100644 index 000000000000..f6fdc77a72bd --- /dev/null +++ b/arch/parisc/mm/hugetlbpage.c @@ -0,0 +1,161 @@ +/* + * PARISC64 Huge TLB page support. + * + * This parisc implementation is heavily based on the SPARC and x86 code. + * + * Copyright (C) 2015 Helge Deller + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + + +unsigned long +hugetlb_get_unmapped_area(struct file *file, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + struct hstate *h = hstate_file(file); + + if (len & ~huge_page_mask(h)) + return -EINVAL; + if (len > TASK_SIZE) + return -ENOMEM; + + if (flags & MAP_FIXED) + if (prepare_hugepage_range(file, addr, len)) + return -EINVAL; + + if (addr) + addr = ALIGN(addr, huge_page_size(h)); + + /* we need to make sure the colouring is OK */ + return arch_get_unmapped_area(file, addr, len, pgoff, flags); +} + + +pte_t *huge_pte_alloc(struct mm_struct *mm, + unsigned long addr, unsigned long sz) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte = NULL; + + /* We must align the address, because our caller will run + * set_huge_pte_at() on whatever we return, which writes out + * all of the sub-ptes for the hugepage range. So we have + * to give it the first such sub-pte. + */ + addr &= HPAGE_MASK; + + pgd = pgd_offset(mm, addr); + pud = pud_alloc(mm, pgd, addr); + if (pud) { + pmd = pmd_alloc(mm, pud, addr); + if (pmd) + pte = pte_alloc_map(mm, NULL, pmd, addr); + } + return pte; +} + +pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte = NULL; + + addr &= HPAGE_MASK; + + pgd = pgd_offset(mm, addr); + if (!pgd_none(*pgd)) { + pud = pud_offset(pgd, addr); + if (!pud_none(*pud)) { + pmd = pmd_offset(pud, addr); + if (!pmd_none(*pmd)) + pte = pte_offset_map(pmd, addr); + } + } + return pte; +} + +/* Purge data and instruction TLB entries. Must be called holding + * the pa_tlb_lock. The TLB purge instructions are slow on SMP + * machines since the purge must be broadcast to all CPUs. + */ +static inline void purge_tlb_entries_huge(struct mm_struct *mm, unsigned long addr) +{ + int i; + + /* We may use multiple physical huge pages (e.g. 2x1 MB) to emulate + * Linux standard huge pages (e.g. 2 MB) */ + BUILD_BUG_ON(REAL_HPAGE_SHIFT > HPAGE_SHIFT); + + addr &= HPAGE_MASK; + addr |= _HUGE_PAGE_SIZE_ENCODING_DEFAULT; + + for (i = 0; i < (1 << (HPAGE_SHIFT-REAL_HPAGE_SHIFT)); i++) { + mtsp(mm->context, 1); + pdtlb(addr); + if (unlikely(split_tlb)) + pitlb(addr); + addr += (1UL << REAL_HPAGE_SHIFT); + } +} + +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t entry) +{ + unsigned long addr_start; + int i; + + addr &= HPAGE_MASK; + addr_start = addr; + + for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { + /* Directly write pte entry. We could call set_pte_at(mm, addr, ptep, entry) + * instead, but then we get double locking on pa_tlb_lock. */ + *ptep = entry; + ptep++; + + /* Drop the PAGE_SIZE/non-huge tlb entry */ + purge_tlb_entries(mm, addr); + + addr += PAGE_SIZE; + pte_val(entry) += PAGE_SIZE; + } + + purge_tlb_entries_huge(mm, addr_start); +} + + +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + pte_t entry; + + entry = *ptep; + set_huge_pte_at(mm, addr, ptep, __pte(0)); + + return entry; +} + +int pmd_huge(pmd_t pmd) +{ + return 0; +} + +int pud_huge(pud_t pud) +{ + return 0; +} -- cgit v1.2.3 From 41b85a1163386f8328ad570f383973cb3975d2fa Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 22 Nov 2015 00:07:44 +0100 Subject: parisc: Map kernel text and data on huge pages Adjust the linker script and map_pages() to map kernel text and data on physical 1MB huge/large pages. Signed-off-by: Helge Deller --- arch/parisc/kernel/asm-offsets.c | 8 ++++++++ arch/parisc/kernel/vmlinux.lds.S | 9 ++++++--- arch/parisc/mm/init.c | 40 +++++++++++++++++----------------------- 3 files changed, 31 insertions(+), 26 deletions(-) (limited to 'arch/parisc/mm') diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c index 59001cea13f9..d2f62570a7b1 100644 --- a/arch/parisc/kernel/asm-offsets.c +++ b/arch/parisc/kernel/asm-offsets.c @@ -289,6 +289,14 @@ int main(void) DEFINE(ASM_PTE_ENTRY_SIZE, PTE_ENTRY_SIZE); DEFINE(ASM_PFN_PTE_SHIFT, PFN_PTE_SHIFT); DEFINE(ASM_PT_INITIAL, PT_INITIAL); + BLANK(); + /* HUGEPAGE_SIZE is only used in vmlinux.lds.S to align kernel text + * and kernel data on physical huge pages */ +#ifdef CONFIG_HUGETLB_PAGE + DEFINE(HUGEPAGE_SIZE, 1UL << REAL_HPAGE_SHIFT); +#else + DEFINE(HUGEPAGE_SIZE, PAGE_SIZE); +#endif BLANK(); DEFINE(EXCDATA_IP, offsetof(struct exception_data, fault_ip)); DEFINE(EXCDATA_SPACE, offsetof(struct exception_data, fault_space)); diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index 0dacc5ca555a..308f29081d46 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -60,7 +60,7 @@ SECTIONS EXIT_DATA } PERCPU_SECTION(8) - . = ALIGN(PAGE_SIZE); + . = ALIGN(HUGEPAGE_SIZE); __init_end = .; /* freed after init ends here */ @@ -116,7 +116,7 @@ SECTIONS * that we can properly leave these * as writable */ - . = ALIGN(PAGE_SIZE); + . = ALIGN(HUGEPAGE_SIZE); data_start = .; EXCEPTION_TABLE(8) @@ -135,8 +135,11 @@ SECTIONS _edata = .; /* BSS */ - BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 8) + BSS_SECTION(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE) + + /* bootmap is allocated in setup_bootmem() directly behind bss. */ + . = ALIGN(HUGEPAGE_SIZE); _end = . ; STABS_DEBUG diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index c229427fa546..ac90df1119bd 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -407,15 +407,11 @@ static void __init map_pages(unsigned long start_vaddr, unsigned long vaddr; unsigned long ro_start; unsigned long ro_end; - unsigned long fv_addr; - unsigned long gw_addr; - extern const unsigned long fault_vector_20; - extern void * const linux_gateway_page; + unsigned long kernel_end; ro_start = __pa((unsigned long)_text); ro_end = __pa((unsigned long)&data_start); - fv_addr = __pa((unsigned long)&fault_vector_20) & PAGE_MASK; - gw_addr = __pa((unsigned long)&linux_gateway_page) & PAGE_MASK; + kernel_end = __pa((unsigned long)&_end); end_paddr = start_paddr + size; @@ -473,24 +469,25 @@ static void __init map_pages(unsigned long start_vaddr, for (tmp2 = start_pte; tmp2 < PTRS_PER_PTE; tmp2++, pg_table++) { pte_t pte; - /* - * Map the fault vector writable so we can - * write the HPMC checksum. - */ if (force) pte = __mk_pte(address, pgprot); - else if (parisc_text_address(vaddr) && - address != fv_addr) + else if (parisc_text_address(vaddr)) { pte = __mk_pte(address, PAGE_KERNEL_EXEC); + if (address >= ro_start && address < kernel_end) + pte = pte_mkhuge(pte); + } else #if defined(CONFIG_PARISC_PAGE_SIZE_4KB) - if (address >= ro_start && address < ro_end - && address != fv_addr - && address != gw_addr) - pte = __mk_pte(address, PAGE_KERNEL_RO); - else + if (address >= ro_start && address < ro_end) { + pte = __mk_pte(address, PAGE_KERNEL_EXEC); + pte = pte_mkhuge(pte); + } else #endif + { pte = __mk_pte(address, pgprot); + if (address >= ro_start && address < kernel_end) + pte = pte_mkhuge(pte); + } if (address >= end_paddr) { if (force) @@ -534,15 +531,12 @@ void free_initmem(void) /* force the kernel to see the new TLB entries */ __flush_tlb_range(0, init_begin, init_end); - /* Attempt to catch anyone trying to execute code here - * by filling the page with BRK insns. - */ - memset((void *)init_begin, 0x00, init_end - init_begin); + /* finally dump all the instructions which were cached, since the * pages are no-longer executable */ flush_icache_range(init_begin, init_end); - free_initmem_default(-1); + free_initmem_default(POISON_FREE_INITMEM); /* set up a new led state on systems shipped LED State panel */ pdc_chassis_send_status(PDC_CHASSIS_DIRECT_BCOMPLETE); @@ -712,8 +706,8 @@ static void __init pagetable_init(void) unsigned long size; start_paddr = pmem_ranges[range].start_pfn << PAGE_SHIFT; - end_paddr = start_paddr + (pmem_ranges[range].pages << PAGE_SHIFT); size = pmem_ranges[range].pages << PAGE_SHIFT; + end_paddr = start_paddr + size; map_pages((unsigned long)__va(start_paddr), start_paddr, size, PAGE_KERNEL, 0); -- cgit v1.2.3