From c920745e6964bd4b9315a17b018d83fad66010d3 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 18 Sep 2013 11:58:32 -0700 Subject: sparc64: Clean up 64-bit mmap exclusion defines. Older UltraSPARC chips had an address space hole due to the MMU only supporting 44-bit virtual addresses. The top end of this hole also has the same value as the current definition of PAGE_OFFSET, so this can be confusing. Consolidate the defines for the userspace mmap exclusion range into page_64.h and use them in sys_sparc_64.c and hugetlbpage.c Signed-off-by: David S. Miller Acked-by: Bob Picco --- arch/sparc/include/asm/page_64.h | 17 +++++++++++++++-- arch/sparc/kernel/sys_sparc_64.c | 3 --- arch/sparc/mm/hugetlbpage.c | 2 -- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h index e15538899f3d..d95931247feb 100644 --- a/arch/sparc/include/asm/page_64.h +++ b/arch/sparc/include/asm/page_64.h @@ -93,9 +93,22 @@ typedef unsigned long pgprot_t; typedef pte_t *pgtable_t; +/* These two values define the virtual address space range in which we + * must forbid 64-bit user processes from making mappings. It + * represents the virtual address space hole present in most early + * sparc64 chips including UltraSPARC-I. The next two defines specify + * the actual exclusion region we enforce, wherein we use a 4GB red + * zone on each side of the VA hole. + */ +#define SPARC64_VA_HOLE_TOP _AC(0xfffff80000000000,UL) +#define SPARC64_VA_HOLE_BOTTOM _AC(0x0000080000000000,UL) + +#define VA_EXCLUDE_START (SPARC64_VA_HOLE_BOTTOM - (1UL << 32UL)) +#define VA_EXCLUDE_END (SPARC64_VA_HOLE_TOP + (1UL << 32UL)) + #define TASK_UNMAPPED_BASE (test_thread_flag(TIF_32BIT) ? \ - (_AC(0x0000000070000000,UL)) : \ - (_AC(0xfffff80000000000,UL) + (1UL << 32UL))) + _AC(0x0000000070000000,UL) : \ + VA_EXCLUDE_END) #include diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 51561b8b15ba..d05eb9c1d846 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -39,9 +39,6 @@ asmlinkage unsigned long sys_getpagesize(void) return PAGE_SIZE; } -#define VA_EXCLUDE_START (0x0000080000000000UL - (1UL << 32UL)) -#define VA_EXCLUDE_END (0xfffff80000000000UL + (1UL << 32UL)) - /* Does addr --> addr+len fall within 4GB of the VA-space hole or * overflow past the end of the 64-bit address space? */ diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c index 96399646570a..30963178d7e9 100644 --- a/arch/sparc/mm/hugetlbpage.c +++ b/arch/sparc/mm/hugetlbpage.c @@ -21,8 +21,6 @@ /* Slightly simplified from the non-hugepage variant because by * definition we don't have to worry about any page coloring stuff */ -#define VA_EXCLUDE_START (0x0000080000000000UL - (1UL << 32UL)) -#define VA_EXCLUDE_END (0xfffff80000000000UL + (1UL << 32UL)) static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp, unsigned long addr, -- cgit v1.2.3 From 922631b988d8cbb821ebe2c67feffc0b95264894 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 18 Sep 2013 12:00:00 -0700 Subject: sparc64: Use PAGE_OFFSET instead of a magic constant. This pertains to all of the computations of the kernel fast TLB miss xor values. Based upon a patch by Bob Picco. Signed-off-by: David S. Miller Acked-by: Bob Picco --- arch/sparc/mm/init_64.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index ed82edad1a39..e295a5b30ac9 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -1722,7 +1722,7 @@ static void __init sun4v_linear_pte_xor_finalize(void) #ifndef CONFIG_DEBUG_PAGEALLOC if (cpu_pgsz_mask & HV_PGSZ_MASK_256MB) { kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZ256MB_4V) ^ - 0xfffff80000000000UL; + PAGE_OFFSET; kern_linear_pte_xor[1] |= (_PAGE_CP_4V | _PAGE_CV_4V | _PAGE_P_4V | _PAGE_W_4V); } else { @@ -1731,7 +1731,7 @@ static void __init sun4v_linear_pte_xor_finalize(void) if (cpu_pgsz_mask & HV_PGSZ_MASK_2GB) { kern_linear_pte_xor[2] = (_PAGE_VALID | _PAGE_SZ2GB_4V) ^ - 0xfffff80000000000UL; + PAGE_OFFSET; kern_linear_pte_xor[2] |= (_PAGE_CP_4V | _PAGE_CV_4V | _PAGE_P_4V | _PAGE_W_4V); } else { @@ -1740,7 +1740,7 @@ static void __init sun4v_linear_pte_xor_finalize(void) if (cpu_pgsz_mask & HV_PGSZ_MASK_16GB) { kern_linear_pte_xor[3] = (_PAGE_VALID | _PAGE_SZ16GB_4V) ^ - 0xfffff80000000000UL; + PAGE_OFFSET; kern_linear_pte_xor[3] |= (_PAGE_CP_4V | _PAGE_CV_4V | _PAGE_P_4V | _PAGE_W_4V); } else { @@ -2261,10 +2261,10 @@ static void __init sun4u_pgprot_init(void) __ACCESS_BITS_4U | _PAGE_E_4U); #ifdef CONFIG_DEBUG_PAGEALLOC - kern_linear_pte_xor[0] = _PAGE_VALID ^ 0xfffff80000000000UL; + kern_linear_pte_xor[0] = _PAGE_VALID ^ PAGE_OFFSET; #else kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4U) ^ - 0xfffff80000000000UL; + PAGE_OFFSET; #endif kern_linear_pte_xor[0] |= (_PAGE_CP_4U | _PAGE_CV_4U | _PAGE_P_4U | _PAGE_W_4U); @@ -2308,10 +2308,10 @@ static void __init sun4v_pgprot_init(void) _PAGE_CACHE = _PAGE_CACHE_4V; #ifdef CONFIG_DEBUG_PAGEALLOC - kern_linear_pte_xor[0] = _PAGE_VALID ^ 0xfffff80000000000UL; + kern_linear_pte_xor[0] = _PAGE_VALID ^ PAGE_OFFSET; #else kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4V) ^ - 0xfffff80000000000UL; + PAGE_OFFSET; #endif kern_linear_pte_xor[0] |= (_PAGE_CP_4V | _PAGE_CV_4V | _PAGE_P_4V | _PAGE_W_4V); -- cgit v1.2.3 From e0a45e3580a033669b24b04c3535515d69bb9702 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 18 Sep 2013 14:22:34 -0700 Subject: sparc64: Define PAGE_OFFSET in terms of physical address bits. This makes clearer the implications for a given choosen value. Based upon patches by Bob Picco. Signed-off-by: David S. Miller Acked-by: Bob Picco --- arch/sparc/include/asm/page_64.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h index d95931247feb..9dd0f7360822 100644 --- a/arch/sparc/include/asm/page_64.h +++ b/arch/sparc/include/asm/page_64.h @@ -117,7 +117,9 @@ typedef pte_t *pgtable_t; /* We used to stick this into a hard-coded global register (%g4) * but that does not make sense anymore. */ -#define PAGE_OFFSET _AC(0xFFFFF80000000000,UL) +#define MAX_SUPPORTED_PA_BITS 43 +#define PAGE_OFFSET_BY_BITS(X) (-(_AC(1,UL) << (X))) +#define PAGE_OFFSET PAGE_OFFSET_BY_BITS(MAX_SUPPORTED_PA_BITS) #ifndef __ASSEMBLY__ -- cgit v1.2.3 From bb7b435388b9f035ecfb16f42b5c6bf428359c63 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 18 Sep 2013 15:39:06 -0700 Subject: sparc64: Document the shift counts used to validate linear kernel addresses. This way we can see exactly what they are derived from, and in particular how they would change if we were to use a different PAGE_OFFSET value. Signed-off-by: David S. Miller Acked-by: Bob Picco --- arch/sparc/include/asm/page_64.h | 16 ++++++++++++++++ arch/sparc/kernel/ktlb.S | 10 +++++----- arch/sparc/mm/init_64.h | 4 +++- 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h index 9dd0f7360822..978ea6d022e9 100644 --- a/arch/sparc/include/asm/page_64.h +++ b/arch/sparc/include/asm/page_64.h @@ -121,6 +121,22 @@ typedef pte_t *pgtable_t; #define PAGE_OFFSET_BY_BITS(X) (-(_AC(1,UL) << (X))) #define PAGE_OFFSET PAGE_OFFSET_BY_BITS(MAX_SUPPORTED_PA_BITS) +/* The "virtual" portion of PAGE_OFFSET, used to clip off the non-physical + * bits of a linear kernel address. + */ +#define PAGE_OFFSET_VA_BITS (64 - MAX_SUPPORTED_PA_BITS) + +/* The actual number of physical memory address bits we support, this is + * used to size various tables used to manage kernel TLB misses. + */ +#define MAX_PHYS_ADDRESS_BITS 41 + +/* These two shift counts are used when indexing sparc64_valid_addr_bitmap + * and kpte_linear_bitmap. + */ +#define ILOG2_4MB 22 +#define ILOG2_256MB 28 + #ifndef __ASSEMBLY__ #define __pa(x) ((unsigned long)(x) - PAGE_OFFSET) diff --git a/arch/sparc/kernel/ktlb.S b/arch/sparc/kernel/ktlb.S index fde5a419cf27..7ad46bc0c698 100644 --- a/arch/sparc/kernel/ktlb.S +++ b/arch/sparc/kernel/ktlb.S @@ -153,12 +153,12 @@ kvmap_dtlb_tsb4m_miss: /* Clear the PAGE_OFFSET top virtual bits, shift * down to get PFN, and make sure PFN is in range. */ - sllx %g4, 21, %g5 + sllx %g4, PAGE_OFFSET_VA_BITS, %g5 /* Check to see if we know about valid memory at the 4MB * chunk this physical address will reside within. */ - srlx %g5, 21 + 41, %g2 + srlx %g5, PAGE_OFFSET_VA_BITS + MAX_PHYS_ADDRESS_BITS, %g2 brnz,pn %g2, kvmap_dtlb_longpath nop @@ -176,7 +176,7 @@ valid_addr_bitmap_patch: or %g7, %lo(sparc64_valid_addr_bitmap), %g7 .previous - srlx %g5, 21 + 22, %g2 + srlx %g5, PAGE_OFFSET_VA_BITS + ILOG2_4MB, %g2 srlx %g2, 6, %g5 and %g2, 63, %g2 sllx %g5, 3, %g5 @@ -189,9 +189,9 @@ valid_addr_bitmap_patch: 2: sethi %hi(kpte_linear_bitmap), %g2 /* Get the 256MB physical address index. */ - sllx %g4, 21, %g5 + sllx %g4, PAGE_OFFSET_VA_BITS, %g5 or %g2, %lo(kpte_linear_bitmap), %g2 - srlx %g5, 21 + 28, %g5 + srlx %g5, PAGE_OFFSET_VA_BITS + ILOG2_256MB, %g5 and %g5, (32 - 1), %g7 /* Divide by 32 to get the offset into the bitmask. */ diff --git a/arch/sparc/mm/init_64.h b/arch/sparc/mm/init_64.h index 0661aa606dec..5d3782deb403 100644 --- a/arch/sparc/mm/init_64.h +++ b/arch/sparc/mm/init_64.h @@ -1,11 +1,13 @@ #ifndef _SPARC64_MM_INIT_H #define _SPARC64_MM_INIT_H +#include + /* Most of the symbols in this file are defined in init.c and * marked non-static so that assembler code can get at them. */ -#define MAX_PHYS_ADDRESS (1UL << 41UL) +#define MAX_PHYS_ADDRESS (1UL << MAX_PHYS_ADDRESS_BITS) #define KPTE_BITMAP_CHUNK_SZ (256UL * 1024UL * 1024UL) #define KPTE_BITMAP_BYTES \ ((MAX_PHYS_ADDRESS / KPTE_BITMAP_CHUNK_SZ) / 4) -- cgit v1.2.3 From f998c9c0d663b013e3aa3ba78908396c8c497218 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 18 Sep 2013 18:39:25 -0700 Subject: sparc64: Fix inconsistent max-physical-address defines. Some parts of the code use '41' others use '42', make them all use the same value. Signed-off-by: David S. Miller Acked-by: Bob Picco --- arch/sparc/include/asm/sparsemem.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/sparc/include/asm/sparsemem.h b/arch/sparc/include/asm/sparsemem.h index b99d4e4b6d28..e5e1752d5d78 100644 --- a/arch/sparc/include/asm/sparsemem.h +++ b/arch/sparc/include/asm/sparsemem.h @@ -3,9 +3,11 @@ #ifdef __KERNEL__ +#include + #define SECTION_SIZE_BITS 30 -#define MAX_PHYSADDR_BITS 42 -#define MAX_PHYSMEM_BITS 42 +#define MAX_PHYSADDR_BITS MAX_PHYS_ADDRESS_BITS +#define MAX_PHYSMEM_BITS MAX_PHYS_ADDRESS_BITS #endif /* !(__KERNEL__) */ -- cgit v1.2.3 From b2d438348024b75a1ee8b66b85d77f569a5dfed8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 20 Sep 2013 21:50:41 -0700 Subject: sparc64: Make PAGE_OFFSET variable. Choose PAGE_OFFSET dynamically based upon cpu type. Original UltraSPARC-I (spitfire) chips only supported a 44-bit virtual address space. Newer chips (T4 and later) support 52-bit virtual addresses and up to 47-bits of physical memory space. Therefore we have to adjust PAGE_SIZE dynamically based upon the capabilities of the chip. Note that this change alone does not allow us to support > 43-bit physical memory, to do that we need to re-arrange our page table support. The current encodings of the pmd_t and pgd_t pointers restricts us to "32 + 11" == 43 bits. This change can waste quite a bit of memory for the various tables. In particular, a future change should work to size and allocate kern_linear_bitmap[] and sparc64_valid_addr_bitmap[] dynamically. This isn't easy as we really cannot take a TLB miss when accessing kern_linear_bitmap[]. We'd have to lock it into the TLB or similar. Signed-off-by: David S. Miller Acked-by: Bob Picco --- arch/sparc/include/asm/page_64.h | 20 +++------ arch/sparc/kernel/ktlb.S | 30 ++++++++++--- arch/sparc/kernel/vmlinux.lds.S | 5 +++ arch/sparc/lib/clear_page.S | 4 +- arch/sparc/lib/copy_page.S | 4 +- arch/sparc/mm/init_64.c | 92 ++++++++++++++++++++++++++++++++++++++++ arch/sparc/mm/ultra.S | 12 +++--- 7 files changed, 138 insertions(+), 29 deletions(-) diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h index 978ea6d022e9..89e07fd0ac88 100644 --- a/arch/sparc/include/asm/page_64.h +++ b/arch/sparc/include/asm/page_64.h @@ -112,24 +112,16 @@ typedef pte_t *pgtable_t; #include -#endif /* !(__ASSEMBLY__) */ - -/* We used to stick this into a hard-coded global register (%g4) - * but that does not make sense anymore. - */ -#define MAX_SUPPORTED_PA_BITS 43 #define PAGE_OFFSET_BY_BITS(X) (-(_AC(1,UL) << (X))) -#define PAGE_OFFSET PAGE_OFFSET_BY_BITS(MAX_SUPPORTED_PA_BITS) +extern unsigned long PAGE_OFFSET; -/* The "virtual" portion of PAGE_OFFSET, used to clip off the non-physical - * bits of a linear kernel address. - */ -#define PAGE_OFFSET_VA_BITS (64 - MAX_SUPPORTED_PA_BITS) +#endif /* !(__ASSEMBLY__) */ -/* The actual number of physical memory address bits we support, this is - * used to size various tables used to manage kernel TLB misses. +/* The maximum number of physical memory address bits we support, this + * is used to size various tables used to manage kernel TLB misses and + * also the sparsemem code. */ -#define MAX_PHYS_ADDRESS_BITS 41 +#define MAX_PHYS_ADDRESS_BITS 47 /* These two shift counts are used when indexing sparc64_valid_addr_bitmap * and kpte_linear_bitmap. diff --git a/arch/sparc/kernel/ktlb.S b/arch/sparc/kernel/ktlb.S index 7ad46bc0c698..542e96ac4d39 100644 --- a/arch/sparc/kernel/ktlb.S +++ b/arch/sparc/kernel/ktlb.S @@ -153,12 +153,19 @@ kvmap_dtlb_tsb4m_miss: /* Clear the PAGE_OFFSET top virtual bits, shift * down to get PFN, and make sure PFN is in range. */ - sllx %g4, PAGE_OFFSET_VA_BITS, %g5 +661: sllx %g4, 0, %g5 + .section .page_offset_shift_patch, "ax" + .word 661b + .previous /* Check to see if we know about valid memory at the 4MB * chunk this physical address will reside within. */ - srlx %g5, PAGE_OFFSET_VA_BITS + MAX_PHYS_ADDRESS_BITS, %g2 +661: srlx %g5, MAX_PHYS_ADDRESS_BITS, %g2 + .section .page_offset_shift_patch, "ax" + .word 661b + .previous + brnz,pn %g2, kvmap_dtlb_longpath nop @@ -176,7 +183,11 @@ valid_addr_bitmap_patch: or %g7, %lo(sparc64_valid_addr_bitmap), %g7 .previous - srlx %g5, PAGE_OFFSET_VA_BITS + ILOG2_4MB, %g2 +661: srlx %g5, ILOG2_4MB, %g2 + .section .page_offset_shift_patch, "ax" + .word 661b + .previous + srlx %g2, 6, %g5 and %g2, 63, %g2 sllx %g5, 3, %g5 @@ -189,9 +200,18 @@ valid_addr_bitmap_patch: 2: sethi %hi(kpte_linear_bitmap), %g2 /* Get the 256MB physical address index. */ - sllx %g4, PAGE_OFFSET_VA_BITS, %g5 +661: sllx %g4, 0, %g5 + .section .page_offset_shift_patch, "ax" + .word 661b + .previous + or %g2, %lo(kpte_linear_bitmap), %g2 - srlx %g5, PAGE_OFFSET_VA_BITS + ILOG2_256MB, %g5 + +661: srlx %g5, ILOG2_256MB, %g5 + .section .page_offset_shift_patch, "ax" + .word 661b + .previous + and %g5, (32 - 1), %g7 /* Divide by 32 to get the offset into the bitmask. */ diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index 0bacceb19150..932ff90fd760 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -122,6 +122,11 @@ SECTIONS *(.swapper_4m_tsb_phys_patch) __swapper_4m_tsb_phys_patch_end = .; } + .page_offset_shift_patch : { + __page_offset_shift_patch = .; + *(.page_offset_shift_patch) + __page_offset_shift_patch_end = .; + } .popc_3insn_patch : { __popc_3insn_patch = .; *(.popc_3insn_patch) diff --git a/arch/sparc/lib/clear_page.S b/arch/sparc/lib/clear_page.S index 77e531f6c2a7..46272dfc26e8 100644 --- a/arch/sparc/lib/clear_page.S +++ b/arch/sparc/lib/clear_page.S @@ -37,10 +37,10 @@ _clear_page: /* %o0=dest */ .globl clear_user_page clear_user_page: /* %o0=dest, %o1=vaddr */ lduw [%g6 + TI_PRE_COUNT], %o2 - sethi %uhi(PAGE_OFFSET), %g2 + sethi %hi(PAGE_OFFSET), %g2 sethi %hi(PAGE_SIZE), %o4 - sllx %g2, 32, %g2 + ldx [%g2 + %lo(PAGE_OFFSET)], %g2 sethi %hi(PAGE_KERNEL_LOCKED), %g3 ldx [%g3 + %lo(PAGE_KERNEL_LOCKED)], %g3 diff --git a/arch/sparc/lib/copy_page.S b/arch/sparc/lib/copy_page.S index 4d2df328e514..dd16c61f3263 100644 --- a/arch/sparc/lib/copy_page.S +++ b/arch/sparc/lib/copy_page.S @@ -46,10 +46,10 @@ .type copy_user_page,#function copy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */ lduw [%g6 + TI_PRE_COUNT], %o4 - sethi %uhi(PAGE_OFFSET), %g2 + sethi %hi(PAGE_OFFSET), %g2 sethi %hi(PAGE_SIZE), %o3 - sllx %g2, 32, %g2 + ldx [%g2 + %lo(PAGE_OFFSET)], %g2 sethi %hi(PAGE_KERNEL_LOCKED), %g3 ldx [%g3 + %lo(PAGE_KERNEL_LOCKED)], %g3 diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index e295a5b30ac9..7a97b5a28b4b 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -1557,6 +1557,96 @@ unsigned long __init find_ecache_flush_span(unsigned long size) return ~0UL; } +unsigned long PAGE_OFFSET; +EXPORT_SYMBOL(PAGE_OFFSET); + +static void __init page_offset_shift_patch_one(unsigned int *insn, unsigned long phys_bits) +{ + unsigned long final_shift; + unsigned int val = *insn; + unsigned int cnt; + + /* We are patching in ilog2(max_supported_phys_address), and + * we are doing so in a manner similar to a relocation addend. + * That is, we are adding the shift value to whatever value + * is in the shift instruction count field already. + */ + cnt = (val & 0x3f); + val &= ~0x3f; + + /* If we are trying to shift >= 64 bits, clear the destination + * register. This can happen when phys_bits ends up being equal + * to MAX_PHYS_ADDRESS_BITS. + */ + final_shift = (cnt + (64 - phys_bits)); + if (final_shift >= 64) { + unsigned int rd = (val >> 25) & 0x1f; + + val = 0x80100000 | (rd << 25); + } else { + val |= final_shift; + } + *insn = val; + + __asm__ __volatile__("flush %0" + : /* no outputs */ + : "r" (insn)); +} + +static void __init page_offset_shift_patch(unsigned long phys_bits) +{ + extern unsigned int __page_offset_shift_patch; + extern unsigned int __page_offset_shift_patch_end; + unsigned int *p; + + p = &__page_offset_shift_patch; + while (p < &__page_offset_shift_patch_end) { + unsigned int *insn = (unsigned int *)(unsigned long)*p; + + page_offset_shift_patch_one(insn, phys_bits); + + p++; + } +} + +static void __init setup_page_offset(void) +{ + unsigned long max_phys_bits = 40; + + if (tlb_type == cheetah || tlb_type == cheetah_plus) { + max_phys_bits = 42; + } else if (tlb_type == hypervisor) { + switch (sun4v_chip_type) { + case SUN4V_CHIP_NIAGARA1: + case SUN4V_CHIP_NIAGARA2: + max_phys_bits = 39; + break; + case SUN4V_CHIP_NIAGARA3: + max_phys_bits = 43; + break; + case SUN4V_CHIP_NIAGARA4: + case SUN4V_CHIP_NIAGARA5: + case SUN4V_CHIP_SPARC64X: + default: + max_phys_bits = 47; + break; + } + } + + if (max_phys_bits > MAX_PHYS_ADDRESS_BITS) { + prom_printf("MAX_PHYS_ADDRESS_BITS is too small, need %lu\n", + max_phys_bits); + prom_halt(); + } + + PAGE_OFFSET = PAGE_OFFSET_BY_BITS(max_phys_bits); + + pr_info("PAGE_OFFSET is 0x%016lx (max_phys_bits == %lu)\n", + PAGE_OFFSET, max_phys_bits); + + page_offset_shift_patch(max_phys_bits); +} + static void __init tsb_phys_patch(void) { struct tsb_ldquad_phys_patch_entry *pquad; @@ -1763,6 +1853,8 @@ void __init paging_init(void) unsigned long real_end, i; int node; + setup_page_offset(); + /* These build time checkes make sure that the dcache_dirty_cpu() * page->flags usage will work. * diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S index 432aa0cb1b38..b4f4733abc6e 100644 --- a/arch/sparc/mm/ultra.S +++ b/arch/sparc/mm/ultra.S @@ -153,10 +153,10 @@ __spitfire_flush_tlb_mm_slow: .globl __flush_icache_page __flush_icache_page: /* %o0 = phys_page */ srlx %o0, PAGE_SHIFT, %o0 - sethi %uhi(PAGE_OFFSET), %g1 + sethi %hi(PAGE_OFFSET), %g1 sllx %o0, PAGE_SHIFT, %o0 sethi %hi(PAGE_SIZE), %g2 - sllx %g1, 32, %g1 + ldx [%g1 + %lo(PAGE_OFFSET)], %g1 add %o0, %g1, %o0 1: subcc %g2, 32, %g2 bne,pt %icc, 1b @@ -178,8 +178,8 @@ __flush_icache_page: /* %o0 = phys_page */ .align 64 .globl __flush_dcache_page __flush_dcache_page: /* %o0=kaddr, %o1=flush_icache */ - sethi %uhi(PAGE_OFFSET), %g1 - sllx %g1, 32, %g1 + sethi %hi(PAGE_OFFSET), %g1 + ldx [%g1 + %lo(PAGE_OFFSET)], %g1 sub %o0, %g1, %o0 ! physical address srlx %o0, 11, %o0 ! make D-cache TAG sethi %hi(1 << 14), %o2 ! D-cache size @@ -287,8 +287,8 @@ __cheetah_flush_tlb_pending: /* 27 insns */ #ifdef DCACHE_ALIASING_POSSIBLE __cheetah_flush_dcache_page: /* 11 insns */ - sethi %uhi(PAGE_OFFSET), %g1 - sllx %g1, 32, %g1 + sethi %hi(PAGE_OFFSET), %g1 + ldx [%g1 + %lo(PAGE_OFFSET)], %g1 sub %o0, %g1, %o0 sethi %hi(PAGE_SIZE), %o4 1: subcc %o4, (1 << 5), %o4 -- cgit v1.2.3 From 37b3a8ff3e086cd5c369e77d2383b691b2874cd6 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 25 Sep 2013 13:48:49 -0700 Subject: sparc64: Move from 4MB to 8MB huge pages. The impetus for this is that we would like to move to 64-bit PMDs and PGDs, but that would result in only supporting a 42-bit address space with the current page table layout. It'd be nice to support at least 43-bits. The reason we'd end up with only 42-bits after making PMDs and PGDs 64-bit is that we only use half-page sized PTE tables in order to make PMDs line up to 4MB, the hardware huge page size we use. So what we do here is we make huge pages 8MB, and fabricate them using 4MB hw TLB entries. Facilitate this by providing a "REAL_HPAGE_SHIFT" which is used in places that really need to operate on hardware 4MB pages. Use full pages (512 entries) for PTE tables, and adjust PMD_SHIFT, PGD_SHIFT, and the build time CPP test as needed. Use a CPP test to make sure REAL_HPAGE_SHIFT and the _PAGE_SZHUGE_* we use match up. This makes the pgtable cache completely unused, so remove the code managing it and the state used in mm_context_t. Now we have less spinlocks taken in the page table allocation path. The technique we use to fabricate the 8MB pages is to transfer bit 22 from the missing virtual address into the PTEs physical address field. That takes care of the transparent huge pages case. For hugetlb, we fill things in at the PTE level and that code already puts the sub huge page physical bits into the PTEs, based upon the offset, so there is nothing special we need to do. It all just works out. So, a small amount of complexity in the THP case, but this code is about to get much simpler when we move the 64-bit PMDs as we can move away from the fancy 32-bit huge PMD encoding and just put a real PTE value in there. With bug fixes and help from Bob Picco. Signed-off-by: David S. Miller --- arch/sparc/include/asm/mmu_64.h | 1 - arch/sparc/include/asm/page_64.h | 5 ++- arch/sparc/include/asm/pgtable_64.h | 12 ++++-- arch/sparc/include/asm/tsb.h | 17 ++++++--- arch/sparc/kernel/sun4v_tlb_miss.S | 2 +- arch/sparc/kernel/tsb.S | 2 +- arch/sparc/mm/init_64.c | 74 ++++++++----------------------------- arch/sparc/mm/tlb.c | 6 ++- arch/sparc/mm/tsb.c | 13 +------ 9 files changed, 47 insertions(+), 85 deletions(-) diff --git a/arch/sparc/include/asm/mmu_64.h b/arch/sparc/include/asm/mmu_64.h index 76092c4dd277..f668797ae234 100644 --- a/arch/sparc/include/asm/mmu_64.h +++ b/arch/sparc/include/asm/mmu_64.h @@ -93,7 +93,6 @@ typedef struct { spinlock_t lock; unsigned long sparc64_ctx_val; unsigned long huge_pte_count; - struct page *pgtable_page; struct tsb_config tsb_block[MM_NUM_TSBS]; struct hv_tsb_descr tsb_descr[MM_NUM_TSBS]; } mm_context_t; diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h index 89e07fd0ac88..1958bfbe300c 100644 --- a/arch/sparc/include/asm/page_64.h +++ b/arch/sparc/include/asm/page_64.h @@ -15,7 +15,10 @@ #define DCACHE_ALIASING_POSSIBLE #endif -#define HPAGE_SHIFT 22 +#define HPAGE_SHIFT 23 +#define REAL_HPAGE_SHIFT 22 + +#define REAL_HPAGE_SIZE (_AC(1,UL) << REAL_HPAGE_SHIFT) #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) #define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT) diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 36760317814f..012b6eeeb06a 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -48,18 +48,18 @@ /* PMD_SHIFT determines the size of the area a second-level page * table can map */ -#define PMD_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-4)) +#define PMD_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-3)) #define PMD_SIZE (_AC(1,UL) << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1)) #define PMD_BITS (PAGE_SHIFT - 2) /* PGDIR_SHIFT determines what a third-level page table entry can map */ -#define PGDIR_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-4) + PMD_BITS) +#define PGDIR_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-3) + PMD_BITS) #define PGDIR_SIZE (_AC(1,UL) << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) #define PGDIR_BITS (PAGE_SHIFT - 2) -#if (PGDIR_SHIFT + PGDIR_BITS) != 44 +#if (PGDIR_SHIFT + PGDIR_BITS) != 45 #error Page table parameters do not cover virtual address space properly. #endif @@ -95,7 +95,7 @@ #include /* Entries per page directory level. */ -#define PTRS_PER_PTE (1UL << (PAGE_SHIFT-4)) +#define PTRS_PER_PTE (1UL << (PAGE_SHIFT-3)) #define PTRS_PER_PMD (1UL << PMD_BITS) #define PTRS_PER_PGD (1UL << PGDIR_BITS) @@ -180,6 +180,10 @@ #define _PAGE_SZBITS_4U _PAGE_SZ8K_4U #define _PAGE_SZBITS_4V _PAGE_SZ8K_4V +#if REAL_HPAGE_SHIFT != 22 +#error REAL_HPAGE_SHIFT and _PAGE_SZHUGE_foo must match up +#endif + #define _PAGE_SZHUGE_4U _PAGE_SZ4MB_4U #define _PAGE_SZHUGE_4V _PAGE_SZ4MB_4V diff --git a/arch/sparc/include/asm/tsb.h b/arch/sparc/include/asm/tsb.h index e696432b950d..16e577711a7b 100644 --- a/arch/sparc/include/asm/tsb.h +++ b/arch/sparc/include/asm/tsb.h @@ -152,7 +152,7 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; lduwa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ brz,pn REG1, FAIL_LABEL; \ sllx VADDR, 64 - PMD_SHIFT, REG2; \ - srlx REG2, 64 - (PAGE_SHIFT - 1), REG2; \ + srlx REG2, 64 - PAGE_SHIFT, REG2; \ sllx REG1, PMD_PADDR_SHIFT, REG1; \ andn REG2, 0x7, REG2; \ add REG1, REG2, REG1; @@ -177,8 +177,15 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; or REG, _PAGE_##NAME##_4V, REG; \ .previous; - /* Load into REG the PTE value for VALID, CACHE, and SZHUGE. */ -#define BUILD_PTE_VALID_SZHUGE_CACHE(REG) \ + /* Load into REG the PTE value for VALID, CACHE, and SZHUGE. + * + * We are fabricating an 8MB page using 2 4MB HW pages here. + */ +#define BUILD_PTE_VALID_SZHUGE_CACHE(VADDR, PADDR_BITS, REG) \ + sethi %hi(4 * 1024 * 1024), REG; \ + andn PADDR_BITS, REG, PADDR_BITS; \ + and VADDR, REG, REG; \ + or PADDR_BITS, REG, PADDR_BITS; \ 661: sethi %uhi(_PAGE_VALID|_PAGE_SZHUGE_4U), REG; \ .section .sun4v_1insn_patch, "ax"; \ .word 661b; \ @@ -231,7 +238,7 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; nop; \ OR_PTE_BIT_2INSN(REG2, REG1, EXEC); \ /* REG1 can now be clobbered, build final PTE */ \ -1: BUILD_PTE_VALID_SZHUGE_CACHE(REG1); \ +1: BUILD_PTE_VALID_SZHUGE_CACHE(VADDR, REG2, REG1); \ ba,pt %xcc, PTE_LABEL; \ or REG1, REG2, REG1; \ 700: @@ -263,7 +270,7 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; lduwa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, 800f) \ sllx VADDR, 64 - PMD_SHIFT, REG2; \ - srlx REG2, 64 - (PAGE_SHIFT - 1), REG2; \ + srlx REG2, 64 - PAGE_SHIFT, REG2; \ sllx REG1, PMD_PADDR_SHIFT, REG1; \ andn REG2, 0x7, REG2; \ add REG1, REG2, REG1; \ diff --git a/arch/sparc/kernel/sun4v_tlb_miss.S b/arch/sparc/kernel/sun4v_tlb_miss.S index bde867fd71e8..e0c09bf85610 100644 --- a/arch/sparc/kernel/sun4v_tlb_miss.S +++ b/arch/sparc/kernel/sun4v_tlb_miss.S @@ -182,7 +182,7 @@ sun4v_tsb_miss_common: cmp %g5, -1 be,pt %xcc, 80f nop - COMPUTE_TSB_PTR(%g5, %g4, HPAGE_SHIFT, %g2, %g7) + COMPUTE_TSB_PTR(%g5, %g4, REAL_HPAGE_SHIFT, %g2, %g7) /* That clobbered %g2, reload it. */ ldxa [%g0] ASI_SCRATCHPAD, %g2 diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S index a313e4a9399b..14158d40ba76 100644 --- a/arch/sparc/kernel/tsb.S +++ b/arch/sparc/kernel/tsb.S @@ -75,7 +75,7 @@ tsb_miss_page_table_walk: mov 512, %g7 andn %g5, 0x7, %g5 sllx %g7, %g6, %g7 - srlx %g4, HPAGE_SHIFT, %g6 + srlx %g4, REAL_HPAGE_SHIFT, %g6 sub %g7, 1, %g7 and %g6, %g7, %g6 sllx %g6, 4, %g6 diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 7a97b5a28b4b..807e10833512 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -354,7 +354,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t * #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) if (mm->context.huge_pte_count && is_hugetlb_pte(pte)) - __update_mmu_tsb_insert(mm, MM_TSB_HUGE, HPAGE_SHIFT, + __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT, address, pte_val(pte)); else #endif @@ -2547,53 +2547,13 @@ void __flush_tlb_all(void) : : "r" (pstate)); } -static pte_t *get_from_cache(struct mm_struct *mm) -{ - struct page *page; - pte_t *ret; - - spin_lock(&mm->page_table_lock); - page = mm->context.pgtable_page; - ret = NULL; - if (page) { - void *p = page_address(page); - - mm->context.pgtable_page = NULL; - - ret = (pte_t *) (p + (PAGE_SIZE / 2)); - } - spin_unlock(&mm->page_table_lock); - - return ret; -} - -static struct page *__alloc_for_cache(struct mm_struct *mm) -{ - struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | - __GFP_REPEAT | __GFP_ZERO); - - if (page) { - spin_lock(&mm->page_table_lock); - if (!mm->context.pgtable_page) { - atomic_set(&page->_count, 2); - mm->context.pgtable_page = page; - } - spin_unlock(&mm->page_table_lock); - } - return page; -} - pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - struct page *page; - pte_t *pte; - - pte = get_from_cache(mm); - if (pte) - return pte; + struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | + __GFP_REPEAT | __GFP_ZERO); + pte_t *pte = NULL; - page = __alloc_for_cache(mm); if (page) pte = (pte_t *) page_address(page); @@ -2603,14 +2563,10 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { - struct page *page; - pte_t *pte; - - pte = get_from_cache(mm); - if (pte) - return pte; + struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | + __GFP_REPEAT | __GFP_ZERO); + pte_t *pte = NULL; - page = __alloc_for_cache(mm); if (page) { pgtable_page_ctor(page); pte = (pte_t *) page_address(page); @@ -2621,18 +2577,15 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { - struct page *page = virt_to_page(pte); - if (put_page_testzero(page)) - free_hot_cold_page(page, 0); + free_page((unsigned long)pte); } static void __pte_free(pgtable_t pte) { struct page *page = virt_to_page(pte); - if (put_page_testzero(page)) { - pgtable_page_dtor(page); - free_hot_cold_page(page, 0); - } + + pgtable_page_dtor(page); + __free_page(page); } void pte_free(struct mm_struct *mm, pgtable_t pte) @@ -2752,6 +2705,9 @@ void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, pte <<= PMD_PADDR_SHIFT; pte |= _PAGE_VALID; + /* We are fabricating 8MB pages using 4MB real hw pages. */ + pte |= (addr & (1UL << REAL_HPAGE_SHIFT)); + prot = pmd_pgprot(entry); if (tlb_type == hypervisor) @@ -2766,7 +2722,7 @@ void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, spin_lock_irqsave(&mm->context.lock, flags); if (mm->context.tsb_block[MM_TSB_HUGE].tsb != NULL) - __update_mmu_tsb_insert(mm, MM_TSB_HUGE, HPAGE_SHIFT, + __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT, addr, pte); spin_unlock_irqrestore(&mm->context.lock, flags); diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index 7a91f288c708..97d1e56e9863 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c @@ -181,10 +181,12 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, bool exec = ((pmd_val(orig) & PMD_HUGE_EXEC) != 0); addr &= HPAGE_MASK; - if (pmd_val(orig) & PMD_ISHUGE) + if (pmd_val(orig) & PMD_ISHUGE) { tlb_batch_add_one(mm, addr, exec); - else + tlb_batch_add_one(mm, addr + REAL_HPAGE_SIZE, exec); + } else { tlb_batch_pmd_scan(mm, addr, orig, exec); + } } } diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c index 2cc3bce5ee91..3b3a360b429a 100644 --- a/arch/sparc/mm/tsb.c +++ b/arch/sparc/mm/tsb.c @@ -87,7 +87,7 @@ void flush_tsb_user(struct tlb_batch *tb) nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries; if (tlb_type == cheetah_plus || tlb_type == hypervisor) base = __pa(base); - __flush_tsb_one(tb, HPAGE_SHIFT, base, nentries); + __flush_tsb_one(tb, REAL_HPAGE_SHIFT, base, nentries); } #endif spin_unlock_irqrestore(&mm->context.lock, flags); @@ -111,7 +111,7 @@ void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr) nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries; if (tlb_type == cheetah_plus || tlb_type == hypervisor) base = __pa(base); - __flush_tsb_one_entry(base, vaddr, HPAGE_SHIFT, nentries); + __flush_tsb_one_entry(base, vaddr, REAL_HPAGE_SHIFT, nentries); } #endif spin_unlock_irqrestore(&mm->context.lock, flags); @@ -472,8 +472,6 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) mm->context.huge_pte_count = 0; #endif - mm->context.pgtable_page = NULL; - /* copy_mm() copies over the parent's mm_struct before calling * us, so we need to zero out the TSB pointer or else tsb_grow() * will be confused and think there is an older TSB to free up. @@ -512,17 +510,10 @@ static void tsb_destroy_one(struct tsb_config *tp) void destroy_context(struct mm_struct *mm) { unsigned long flags, i; - struct page *page; for (i = 0; i < MM_NUM_TSBS; i++) tsb_destroy_one(&mm->context.tsb_block[i]); - page = mm->context.pgtable_page; - if (page && put_page_testzero(page)) { - pgtable_page_dtor(page); - free_hot_cold_page(page, 0); - } - spin_lock_irqsave(&ctx_alloc_lock, flags); if (CTX_VALID(mm->context)) { -- cgit v1.2.3 From 2b77933c28f5044629bb19e8045aae65b72b939d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 25 Sep 2013 14:33:16 -0700 Subject: sparc64: Move to 64-bit PGDs and PMDs. To make the page tables compact, we were using 32-bit PGDs and PMDs. We only had to support <= 43 bits of physical addresses so this was quite feasible. In order to support larger physical addresses we have to move to 64-bit PGDs and PMDs. Most of the changes are straight-forward: 1) {pgd,pmd}_t --> unsigned long 2) Anything that tries to use plain "unsigned int" types with pgd/pmd values needs to be adjusted. In particular things like "0U" become "0UL". 3) {PGDIR,PMD}_BITS decrease by one. 4) In the assembler page table walkers, use "ldxa" instead of "lduwa" and adjust the low bit masks to clear out the low 3 bits instead of just the low 2 bits during pgd/pmd address formation. Also, use PTRS_PER_PGD and PTRS_PER_PMD in the sizing of the swapper_{pg_dir,low_pmd_dir} arrays. This patch does not try to take advantage of having 64-bits in the PMDs to simplify the hugepage code, that will come in a subsequent change. Signed-off-by: David S. Miller --- arch/sparc/include/asm/page_64.h | 25 ++++++++++++++----------- arch/sparc/include/asm/pgtable_64.h | 22 +++++++++++----------- arch/sparc/include/asm/tsb.h | 16 ++++++++-------- arch/sparc/mm/init_64.c | 2 +- 4 files changed, 34 insertions(+), 31 deletions(-) diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h index 1958bfbe300c..aac53fcea807 100644 --- a/arch/sparc/include/asm/page_64.h +++ b/arch/sparc/include/asm/page_64.h @@ -56,8 +56,8 @@ extern void copy_user_page(void *to, void *from, unsigned long vaddr, struct pag /* These are used to make use of C type-checking.. */ typedef struct { unsigned long pte; } pte_t; typedef struct { unsigned long iopte; } iopte_t; -typedef struct { unsigned int pmd; } pmd_t; -typedef struct { unsigned int pgd; } pgd_t; +typedef struct { unsigned long pmd; } pmd_t; +typedef struct { unsigned long pgd; } pgd_t; typedef struct { unsigned long pgprot; } pgprot_t; #define pte_val(x) ((x).pte) @@ -76,8 +76,8 @@ typedef struct { unsigned long pgprot; } pgprot_t; /* .. while these make it easier on the compiler */ typedef unsigned long pte_t; typedef unsigned long iopte_t; -typedef unsigned int pmd_t; -typedef unsigned int pgd_t; +typedef unsigned long pmd_t; +typedef unsigned long pgd_t; typedef unsigned long pgprot_t; #define pte_val(x) (x) @@ -97,15 +97,18 @@ typedef unsigned long pgprot_t; typedef pte_t *pgtable_t; /* These two values define the virtual address space range in which we - * must forbid 64-bit user processes from making mappings. It - * represents the virtual address space hole present in most early - * sparc64 chips including UltraSPARC-I. The next two defines specify - * the actual exclusion region we enforce, wherein we use a 4GB red - * zone on each side of the VA hole. + * must forbid 64-bit user processes from making mappings. It used to + * represent precisely the virtual address space hole present in most + * early sparc64 chips including UltraSPARC-I. But now it also is + * further constrained by the limits of our page tables, which is + * 43-bits of virtual address. */ -#define SPARC64_VA_HOLE_TOP _AC(0xfffff80000000000,UL) -#define SPARC64_VA_HOLE_BOTTOM _AC(0x0000080000000000,UL) +#define SPARC64_VA_HOLE_TOP _AC(0xfffffc0000000000,UL) +#define SPARC64_VA_HOLE_BOTTOM _AC(0x0000040000000000,UL) +/* The next two defines specify the actual exclusion region we + * enforce, wherein we use a 4GB red zone on each side of the VA hole. + */ #define VA_EXCLUDE_START (SPARC64_VA_HOLE_BOTTOM - (1UL << 32UL)) #define VA_EXCLUDE_END (SPARC64_VA_HOLE_TOP + (1UL << 32UL)) diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 012b6eeeb06a..eee803e10b92 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -51,15 +51,15 @@ #define PMD_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-3)) #define PMD_SIZE (_AC(1,UL) << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1)) -#define PMD_BITS (PAGE_SHIFT - 2) +#define PMD_BITS (PAGE_SHIFT - 3) /* PGDIR_SHIFT determines what a third-level page table entry can map */ #define PGDIR_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-3) + PMD_BITS) #define PGDIR_SIZE (_AC(1,UL) << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) -#define PGDIR_BITS (PAGE_SHIFT - 2) +#define PGDIR_BITS (PAGE_SHIFT - 3) -#if (PGDIR_SHIFT + PGDIR_BITS) != 45 +#if (PGDIR_SHIFT + PGDIR_BITS) != 43 #error Page table parameters do not cover virtual address space properly. #endif @@ -714,7 +714,7 @@ extern pgprot_t pmd_pgprot(pmd_t entry); static inline int pmd_present(pmd_t pmd) { - return pmd_val(pmd) != 0U; + return pmd_val(pmd) != 0UL; } #define pmd_none(pmd) (!pmd_val(pmd)) @@ -741,7 +741,7 @@ static inline void pmd_set(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep) (pud_val(*(pudp)) = (__pa((unsigned long) (pmdp)) >> PGD_PADDR_SHIFT)) static inline unsigned long __pmd_page(pmd_t pmd) { - unsigned long paddr = (unsigned long) pmd_val(pmd); + unsigned long paddr = pmd_val(pmd); #ifdef CONFIG_TRANSPARENT_HUGEPAGE if (pmd_val(pmd) & PMD_ISHUGE) paddr &= PMD_HUGE_PADDR; @@ -751,14 +751,14 @@ static inline unsigned long __pmd_page(pmd_t pmd) } #define pmd_page(pmd) virt_to_page((void *)__pmd_page(pmd)) #define pud_page_vaddr(pud) \ - ((unsigned long) __va((((unsigned long)pud_val(pud))< Date: Thu, 26 Sep 2013 13:45:15 -0700 Subject: sparc64: Encode huge PMDs using PTE encoding. Now that we have 64-bits for PMDs we can stop using special encodings for the huge PMD values, and just put real PTEs in there. We allocate a _PAGE_PMD_HUGE bit to distinguish between plain PMDs and huge ones. It is the same for both 4U and 4V PTE layouts. We also use _PAGE_SPECIAL to indicate the splitting state, since a huge PMD cannot also be special. All of the PMD --> PTE translation code disappears, and most of the huge PMD bit modifications and tests just degenerate into the PTE operations. In particular USER_PGTABLE_CHECK_PMD_HUGE becomes trivial. As a side effect, normal PMDs don't shift the physical address around. This also speeds up the page table walks in the TLB miss paths since they don't have to do the shifts any more. Another non-trivial aspect is that pte_modify() has to be changed to preserve the _PAGE_PMD_HUGE bits as well as the page size field of the pte. Signed-off-by: David S. Miller --- arch/sparc/include/asm/pgtable_64.h | 181 ++++++++++++++++++++++-------------- arch/sparc/include/asm/tsb.h | 92 +++--------------- arch/sparc/mm/gup.c | 9 +- arch/sparc/mm/init_64.c | 102 +------------------- arch/sparc/mm/tlb.c | 9 +- 5 files changed, 131 insertions(+), 262 deletions(-) diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index eee803e10b92..8358dc144959 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -67,29 +67,6 @@ #error PMD_SHIFT must equal HPAGE_SHIFT for transparent huge pages. #endif -/* PMDs point to PTE tables which are 4K aligned. */ -#define PMD_PADDR _AC(0xfffffffe,UL) -#define PMD_PADDR_SHIFT _AC(11,UL) - -#define PMD_ISHUGE _AC(0x00000001,UL) - -/* This is the PMD layout when PMD_ISHUGE is set. With 4MB huge - * pages, this frees up a bunch of bits in the layout that we can - * use for the protection settings and software metadata. - */ -#define PMD_HUGE_PADDR _AC(0xfffff800,UL) -#define PMD_HUGE_PROTBITS _AC(0x000007ff,UL) -#define PMD_HUGE_PRESENT _AC(0x00000400,UL) -#define PMD_HUGE_WRITE _AC(0x00000200,UL) -#define PMD_HUGE_DIRTY _AC(0x00000100,UL) -#define PMD_HUGE_ACCESSED _AC(0x00000080,UL) -#define PMD_HUGE_EXEC _AC(0x00000040,UL) -#define PMD_HUGE_SPLITTING _AC(0x00000020,UL) - -/* PGDs point to PMD tables which are 8K aligned. */ -#define PGD_PADDR _AC(0xfffffffc,UL) -#define PGD_PADDR_SHIFT _AC(11,UL) - #ifndef __ASSEMBLY__ #include @@ -112,6 +89,7 @@ #define _PAGE_VALID _AC(0x8000000000000000,UL) /* Valid TTE */ #define _PAGE_R _AC(0x8000000000000000,UL) /* Keep ref bit uptodate*/ #define _PAGE_SPECIAL _AC(0x0200000000000000,UL) /* Special page */ +#define _PAGE_PMD_HUGE _AC(0x0100000000000000,UL) /* Huge page */ /* Advertise support for _PAGE_SPECIAL */ #define __HAVE_ARCH_PTE_SPECIAL @@ -125,6 +103,7 @@ #define _PAGE_IE_4U _AC(0x0800000000000000,UL) /* Invert Endianness */ #define _PAGE_SOFT2_4U _AC(0x07FC000000000000,UL) /* Software bits, set 2 */ #define _PAGE_SPECIAL_4U _AC(0x0200000000000000,UL) /* Special page */ +#define _PAGE_PMD_HUGE_4U _AC(0x0100000000000000,UL) /* Huge page */ #define _PAGE_RES1_4U _AC(0x0002000000000000,UL) /* Reserved */ #define _PAGE_SZ32MB_4U _AC(0x0001000000000000,UL) /* (Panther) 32MB page */ #define _PAGE_SZ256MB_4U _AC(0x2001000000000000,UL) /* (Panther) 256MB page */ @@ -155,6 +134,7 @@ #define _PAGE_READ_4V _AC(0x0800000000000000,UL) /* Readable SW Bit */ #define _PAGE_WRITE_4V _AC(0x0400000000000000,UL) /* Writable SW Bit */ #define _PAGE_SPECIAL_4V _AC(0x0200000000000000,UL) /* Special page */ +#define _PAGE_PMD_HUGE_4V _AC(0x0100000000000000,UL) /* Huge page */ #define _PAGE_PADDR_4V _AC(0x00FFFFFFFFFFE000,UL) /* paddr[55:13] */ #define _PAGE_IE_4V _AC(0x0000000000001000,UL) /* Invert Endianness */ #define _PAGE_E_4V _AC(0x0000000000000800,UL) /* side-Effect */ @@ -243,16 +223,13 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot) #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) #ifdef CONFIG_TRANSPARENT_HUGEPAGE -extern pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot); -#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot)) - -extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot); - -static inline pmd_t pmd_mkhuge(pmd_t pmd) +static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) { - /* Do nothing, mk_pmd() does this part. */ - return pmd; + pte_t pte = pfn_pte(page_nr, pgprot); + + return __pmd(pte_val(pte)); } +#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot)) #endif /* This one can be done with two shifts. */ @@ -313,14 +290,25 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t prot) : "=r" (mask), "=r" (tmp) : "i" (_PAGE_PADDR_4U | _PAGE_MODIFIED_4U | _PAGE_ACCESSED_4U | _PAGE_CP_4U | _PAGE_CV_4U | _PAGE_E_4U | _PAGE_PRESENT_4U | - _PAGE_SPECIAL), + _PAGE_SPECIAL | _PAGE_PMD_HUGE | _PAGE_SZALL_4U), "i" (_PAGE_PADDR_4V | _PAGE_MODIFIED_4V | _PAGE_ACCESSED_4V | _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_E_4V | _PAGE_PRESENT_4V | - _PAGE_SPECIAL)); + _PAGE_SPECIAL | _PAGE_PMD_HUGE | _PAGE_SZALL_4V)); return __pte((pte_val(pte) & mask) | (pgprot_val(prot) & ~mask)); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) +{ + pte_t pte = __pte(pmd_val(pmd)); + + pte = pte_modify(pte, newprot); + + return __pmd(pte_val(pte)); +} +#endif + static inline pte_t pgoff_to_pte(unsigned long off) { off <<= PAGE_SHIFT; @@ -361,7 +349,7 @@ static inline pgprot_t pgprot_noncached(pgprot_t prot) */ #define pgprot_noncached pgprot_noncached -#ifdef CONFIG_HUGETLB_PAGE +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) static inline pte_t pte_mkhuge(pte_t pte) { unsigned long mask; @@ -379,6 +367,17 @@ static inline pte_t pte_mkhuge(pte_t pte) return __pte(pte_val(pte) | mask); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static inline pmd_t pmd_mkhuge(pmd_t pmd) +{ + pte_t pte = __pte(pmd_val(pmd)); + + pte = pte_mkhuge(pte); + pte_val(pte) |= _PAGE_PMD_HUGE; + + return __pmd(pte_val(pte)); +} +#endif #endif static inline pte_t pte_mkdirty(pte_t pte) @@ -630,86 +629,125 @@ static inline unsigned long pte_special(pte_t pte) return pte_val(pte) & _PAGE_SPECIAL; } -static inline int pmd_large(pmd_t pmd) +static inline unsigned long pmd_large(pmd_t pmd) { - return (pmd_val(pmd) & (PMD_ISHUGE | PMD_HUGE_PRESENT)) == - (PMD_ISHUGE | PMD_HUGE_PRESENT); + pte_t pte = __pte(pmd_val(pmd)); + + return (pte_val(pte) & _PAGE_PMD_HUGE) && pte_present(pte); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE -static inline int pmd_young(pmd_t pmd) +static inline unsigned long pmd_young(pmd_t pmd) { - return pmd_val(pmd) & PMD_HUGE_ACCESSED; + pte_t pte = __pte(pmd_val(pmd)); + + return pte_young(pte); } -static inline int pmd_write(pmd_t pmd) +static inline unsigned long pmd_write(pmd_t pmd) { - return pmd_val(pmd) & PMD_HUGE_WRITE; + pte_t pte = __pte(pmd_val(pmd)); + + return pte_write(pte); } static inline unsigned long pmd_pfn(pmd_t pmd) { - unsigned long val = pmd_val(pmd) & PMD_HUGE_PADDR; + pte_t pte = __pte(pmd_val(pmd)); - return val >> (PAGE_SHIFT - PMD_PADDR_SHIFT); + return pte_pfn(pte); } -static inline int pmd_trans_splitting(pmd_t pmd) +static inline unsigned long pmd_trans_huge(pmd_t pmd) { - return (pmd_val(pmd) & (PMD_ISHUGE|PMD_HUGE_SPLITTING)) == - (PMD_ISHUGE|PMD_HUGE_SPLITTING); + pte_t pte = __pte(pmd_val(pmd)); + + return pte_val(pte) & _PAGE_PMD_HUGE; } -static inline int pmd_trans_huge(pmd_t pmd) +static inline unsigned long pmd_trans_splitting(pmd_t pmd) { - return pmd_val(pmd) & PMD_ISHUGE; + pte_t pte = __pte(pmd_val(pmd)); + + return pmd_trans_huge(pmd) && pte_special(pte); } #define has_transparent_hugepage() 1 static inline pmd_t pmd_mkold(pmd_t pmd) { - pmd_val(pmd) &= ~PMD_HUGE_ACCESSED; - return pmd; + pte_t pte = __pte(pmd_val(pmd)); + + pte = pte_mkold(pte); + + return __pmd(pte_val(pte)); } static inline pmd_t pmd_wrprotect(pmd_t pmd) { - pmd_val(pmd) &= ~PMD_HUGE_WRITE; - return pmd; + pte_t pte = __pte(pmd_val(pmd)); + + pte = pte_wrprotect(pte); + + return __pmd(pte_val(pte)); } static inline pmd_t pmd_mkdirty(pmd_t pmd) { - pmd_val(pmd) |= PMD_HUGE_DIRTY; - return pmd; + pte_t pte = __pte(pmd_val(pmd)); + + pte = pte_mkdirty(pte); + + return __pmd(pte_val(pte)); } static inline pmd_t pmd_mkyoung(pmd_t pmd) { - pmd_val(pmd) |= PMD_HUGE_ACCESSED; - return pmd; + pte_t pte = __pte(pmd_val(pmd)); + + pte = pte_mkyoung(pte); + + return __pmd(pte_val(pte)); } static inline pmd_t pmd_mkwrite(pmd_t pmd) { - pmd_val(pmd) |= PMD_HUGE_WRITE; - return pmd; + pte_t pte = __pte(pmd_val(pmd)); + + pte = pte_mkwrite(pte); + + return __pmd(pte_val(pte)); } static inline pmd_t pmd_mknotpresent(pmd_t pmd) { - pmd_val(pmd) &= ~PMD_HUGE_PRESENT; + unsigned long mask; + + if (tlb_type == hypervisor) + mask = _PAGE_PRESENT_4V; + else + mask = _PAGE_PRESENT_4U; + + pmd_val(pmd) &= ~mask; + return pmd; } static inline pmd_t pmd_mksplitting(pmd_t pmd) { - pmd_val(pmd) |= PMD_HUGE_SPLITTING; - return pmd; + pte_t pte = __pte(pmd_val(pmd)); + + pte = pte_mkspecial(pte); + + return __pmd(pte_val(pte)); } -extern pgprot_t pmd_pgprot(pmd_t entry); +static inline pgprot_t pmd_pgprot(pmd_t entry) +{ + unsigned long val = pmd_val(entry); + + return __pgprot(val); +} #endif static inline int pmd_present(pmd_t pmd) @@ -732,26 +770,25 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, static inline void pmd_set(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep) { - unsigned long val = __pa((unsigned long) (ptep)) >> PMD_PADDR_SHIFT; + unsigned long val = __pa((unsigned long) (ptep)); pmd_val(*pmdp) = val; } #define pud_set(pudp, pmdp) \ - (pud_val(*(pudp)) = (__pa((unsigned long) (pmdp)) >> PGD_PADDR_SHIFT)) + (pud_val(*(pudp)) = (__pa((unsigned long) (pmdp)))) static inline unsigned long __pmd_page(pmd_t pmd) { - unsigned long paddr = pmd_val(pmd); -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - if (pmd_val(pmd) & PMD_ISHUGE) - paddr &= PMD_HUGE_PADDR; -#endif - paddr <<= PMD_PADDR_SHIFT; - return ((unsigned long) __va(paddr)); + pte_t pte = __pte(pmd_val(pmd)); + unsigned long pfn; + + pfn = pte_pfn(pte); + + return ((unsigned long) __va(pfn << PAGE_SHIFT)); } #define pmd_page(pmd) virt_to_page((void *)__pmd_page(pmd)) #define pud_page_vaddr(pud) \ - ((unsigned long) __va((pud_val(pud)<vm_mm; spin_lock_irqsave(&mm->context.lock, flags); diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index 97d1e56e9863..f1bd83019e71 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c @@ -161,8 +161,8 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, if (mm == &init_mm) return; - if ((pmd_val(pmd) ^ pmd_val(orig)) & PMD_ISHUGE) { - if (pmd_val(pmd) & PMD_ISHUGE) + if ((pmd_val(pmd) ^ pmd_val(orig)) & _PAGE_PMD_HUGE) { + if (pmd_val(pmd) & _PAGE_PMD_HUGE) mm->context.huge_pte_count++; else mm->context.huge_pte_count--; @@ -178,10 +178,11 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, } if (!pmd_none(orig)) { - bool exec = ((pmd_val(orig) & PMD_HUGE_EXEC) != 0); + pte_t orig_pte = __pte(pmd_val(orig)); + bool exec = pte_exec(orig_pte); addr &= HPAGE_MASK; - if (pmd_val(orig) & PMD_ISHUGE) { + if (pmd_trans_huge(orig)) { tlb_batch_add_one(mm, addr, exec); tlb_batch_add_one(mm, addr + REAL_HPAGE_SIZE, exec); } else { -- cgit v1.2.3 From 557fc5873ef178c4b3e1e36a42db547ecdc43f9b Mon Sep 17 00:00:00 2001 From: oftedal Date: Fri, 18 Oct 2013 22:28:29 +0200 Subject: sparc: PCI: Fix incorrect address calculation of PCI Bridge windows on Simba-bridges The SIMBA APB Bridges lacks the 'ranges' of-property describing the PCI I/O and memory areas located beneath the bridge. Faking this information has been performed by reading range registers in the APB bridge, and calculating the corresponding areas. In commit 01f94c4a6ced476ce69b895426fc29bfc48c69bd ("Fix sabre pci controllers with new probing scheme.") a bug was introduced into this calculation, causing the PCI memory areas to be calculated incorrectly: The shift size was set to be identical for I/O and MEM ranges, which is incorrect. This patch set the shift size of the MEM range back to the value used before 01f94c4a6ced476ce69b895426fc29bfc48c69bd. Signed-off-by: Kjetil Oftedal Signed-off-by: David S. Miller --- arch/sparc/kernel/pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index bc4d3f5d2e5d..cb021453de2a 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -398,8 +398,8 @@ static void apb_fake_ranges(struct pci_dev *dev, apb_calc_first_last(map, &first, &last); res = bus->resource[1]; res->flags = IORESOURCE_MEM; - region.start = (first << 21); - region.end = (last << 21) + ((1 << 21) - 1); + region.start = (first << 29); + region.end = (last << 29) + ((1 << 29) - 1); pcibios_bus_to_resource(dev, res, ®ion); } -- cgit v1.2.3 From 1a36265bf7d7e16b35fab5ae7f2f5d499bd0c14d Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Sat, 14 Sep 2013 16:00:09 +0400 Subject: sparc64: Add self-IPI support for smp_send_reschedule() CONFIG_NO_HZ_FULL requires possibility of smp_send_reschedule() for the calling CPU. Currently, it is used in inc_nr_running() scheduler primitive only. Nobody calls smp_send_reschedule() from preemptible context (furthermore, it looks like it will be save if anybody use it another way in the future). But anyway I add WARN_ON() here just to return here if anything changes. Signed-off-by: Kirill Tkhai CC: David Miller Signed-off-by: David S. Miller --- arch/sparc/kernel/smp_64.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index e142545244f2..b66a5338231e 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1399,8 +1399,13 @@ void __init smp_cpus_done(unsigned int max_cpus) void smp_send_reschedule(int cpu) { - xcall_deliver((u64) &xcall_receive_signal, 0, 0, - cpumask_of(cpu)); + if (cpu == smp_processor_id()) { + WARN_ON_ONCE(preemptible()); + set_softint(1 << PIL_SMP_RECEIVE_SIGNAL); + } else { + xcall_deliver((u64) &xcall_receive_signal, + 0, 0, cpumask_of(cpu)); + } } void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs) -- cgit v1.2.3 From 812cb83a56a908729c453a7db3fb2c262119bc9d Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Sat, 14 Sep 2013 16:02:11 +0400 Subject: sparc64: Implement HAVE_CONTEXT_TRACKING Mark the places when the system are in user or are in kernel. This is used to make full dynticks system (tickless) -- CONFIG_NO_HZ_FULL dependence. Signed-off-by: Kirill Tkhai CC: David Miller Signed-off-by: David S. Miller --- arch/sparc/Kconfig | 1 + arch/sparc/include/asm/thread_info_64.h | 3 +- arch/sparc/kernel/entry.h | 1 - arch/sparc/kernel/kgdb_64.c | 5 +- arch/sparc/kernel/kprobes.c | 7 ++- arch/sparc/kernel/process_64.c | 2 + arch/sparc/kernel/ptrace_64.c | 10 ++++ arch/sparc/kernel/rtrap_64.S | 8 +++- arch/sparc/kernel/signal_64.c | 13 ++++- arch/sparc/kernel/sys_sparc_64.c | 3 ++ arch/sparc/kernel/syscalls.S | 8 ++-- arch/sparc/kernel/traps_64.c | 85 ++++++++++++++++++++++++++------- arch/sparc/kernel/unaligned_64.c | 16 +++++-- arch/sparc/mm/fault_64.c | 14 ++++-- 14 files changed, 137 insertions(+), 39 deletions(-) diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 78c4fdb91bc5..01d77d78cd32 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -64,6 +64,7 @@ config SPARC64 select HAVE_DYNAMIC_FTRACE select HAVE_FTRACE_MCOUNT_RECORD select HAVE_SYSCALL_TRACEPOINTS + select HAVE_CONTEXT_TRACKING select HAVE_DEBUG_KMEMLEAK select RTC_DRV_CMOS select RTC_DRV_BQ4802 diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h index d5e504251079..5d9292ab1077 100644 --- a/arch/sparc/include/asm/thread_info_64.h +++ b/arch/sparc/include/asm/thread_info_64.h @@ -192,7 +192,7 @@ register struct thread_info *current_thread_info_reg asm("g6"); #define TIF_UNALIGNED 5 /* allowed to do unaligned accesses */ /* flag bit 6 is available */ #define TIF_32BIT 7 /* 32-bit binary */ -/* flag bit 8 is available */ +#define TIF_NOHZ 8 /* in adaptive nohz mode */ #define TIF_SECCOMP 9 /* secure computing */ #define TIF_SYSCALL_AUDIT 10 /* syscall auditing active */ #define TIF_SYSCALL_TRACEPOINT 11 /* syscall tracepoint instrumentation */ @@ -210,6 +210,7 @@ register struct thread_info *current_thread_info_reg asm("g6"); #define _TIF_NEED_RESCHED (1< #include #include +#include #include #include #include @@ -418,12 +419,14 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, asmlinkage void __kprobes kprobe_trap(unsigned long trap_level, struct pt_regs *regs) { + enum ctx_state prev_state = exception_enter(); + BUG_ON(trap_level != 0x170 && trap_level != 0x171); if (user_mode(regs)) { local_irq_enable(); bad_trap(regs, trap_level); - return; + goto out; } /* trap_level == 0x170 --> ta 0x70 @@ -433,6 +436,8 @@ asmlinkage void __kprobes kprobe_trap(unsigned long trap_level, (trap_level == 0x170) ? "debug" : "debug_2", regs, 0, trap_level, SIGTRAP) != NOTIFY_STOP) bad_trap(regs, trap_level); +out: + exception_exit(prev_state); } /* Jprobes support. */ diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index baebab215492..32a280ec38c1 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -557,6 +558,7 @@ void fault_in_user_windows(void) barf: set_thread_wsaved(window + 1); + user_exit(); do_exit(SIGILL); } diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c index 773c1f2983ce..c13c9f25d83a 100644 --- a/arch/sparc/kernel/ptrace_64.c +++ b/arch/sparc/kernel/ptrace_64.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -1066,6 +1067,9 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs) /* do the secure computing check first */ secure_computing_strict(regs->u_regs[UREG_G1]); + if (test_thread_flag(TIF_NOHZ)) + user_exit(); + if (test_thread_flag(TIF_SYSCALL_TRACE)) ret = tracehook_report_syscall_entry(regs); @@ -1086,6 +1090,9 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs) asmlinkage void syscall_trace_leave(struct pt_regs *regs) { + if (test_thread_flag(TIF_NOHZ)) + user_exit(); + audit_syscall_exit(regs); if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) @@ -1093,4 +1100,7 @@ asmlinkage void syscall_trace_leave(struct pt_regs *regs) if (test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall_exit(regs, 0); + + if (test_thread_flag(TIF_NOHZ)) + user_enter(); } diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S index afa2a9e3d0a0..a954eb81881b 100644 --- a/arch/sparc/kernel/rtrap_64.S +++ b/arch/sparc/kernel/rtrap_64.S @@ -18,10 +18,16 @@ #define RTRAP_PSTATE_IRQOFF (PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV) #define RTRAP_PSTATE_AG_IRQOFF (PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV|PSTATE_AG) +#ifdef CONFIG_CONTEXT_TRACKING +# define SCHEDULE_USER schedule_user +#else +# define SCHEDULE_USER schedule +#endif + .text .align 32 __handle_preemption: - call schedule + call SCHEDULE_USER wrpr %g0, RTRAP_PSTATE, %pstate ba,pt %xcc, __handle_preemption_continue wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c index 35923e8abd82..cd91d010e6d3 100644 --- a/arch/sparc/kernel/signal_64.c +++ b/arch/sparc/kernel/signal_64.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -43,6 +44,7 @@ asmlinkage void sparc64_set_context(struct pt_regs *regs) { struct ucontext __user *ucp = (struct ucontext __user *) regs->u_regs[UREG_I0]; + enum ctx_state prev_state = exception_enter(); mc_gregset_t __user *grp; unsigned long pc, npc, tstate; unsigned long fp, i7; @@ -129,16 +131,19 @@ asmlinkage void sparc64_set_context(struct pt_regs *regs) } if (err) goto do_sigsegv; - +out: + exception_exit(prev_state); return; do_sigsegv: force_sig(SIGSEGV, current); + goto out; } asmlinkage void sparc64_get_context(struct pt_regs *regs) { struct ucontext __user *ucp = (struct ucontext __user *) regs->u_regs[UREG_I0]; + enum ctx_state prev_state = exception_enter(); mc_gregset_t __user *grp; mcontext_t __user *mcp; unsigned long fp, i7; @@ -220,10 +225,12 @@ asmlinkage void sparc64_get_context(struct pt_regs *regs) } if (err) goto do_sigsegv; - +out: + exception_exit(prev_state); return; do_sigsegv: force_sig(SIGSEGV, current); + goto out; } struct rt_signal_frame { @@ -528,11 +535,13 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0) void do_notify_resume(struct pt_regs *regs, unsigned long orig_i0, unsigned long thread_info_flags) { + user_exit(); if (thread_info_flags & _TIF_SIGPENDING) do_signal(regs, orig_i0); if (thread_info_flags & _TIF_NOTIFY_RESUME) { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); } + user_enter(); } diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index d05eb9c1d846..beb0b5a5f21f 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -496,6 +497,7 @@ asmlinkage unsigned long c_sys_nis_syscall(struct pt_regs *regs) asmlinkage void sparc_breakpoint(struct pt_regs *regs) { + enum ctx_state prev_state = exception_enter(); siginfo_t info; if (test_thread_flag(TIF_32BIT)) { @@ -514,6 +516,7 @@ asmlinkage void sparc_breakpoint(struct pt_regs *regs) #ifdef DEBUG_SPARC_BREAKPOINT printk ("TRAP: Returning to space: PC=%lx nPC=%lx\n", regs->tpc, regs->tnpc); #endif + exception_exit(prev_state); } extern void check_pending(int signum); diff --git a/arch/sparc/kernel/syscalls.S b/arch/sparc/kernel/syscalls.S index d950197a17e1..87729fff13b9 100644 --- a/arch/sparc/kernel/syscalls.S +++ b/arch/sparc/kernel/syscalls.S @@ -52,7 +52,7 @@ sys32_rt_sigreturn: #endif .align 32 1: ldx [%g6 + TI_FLAGS], %l5 - andcc %l5, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT|_TIF_SYSCALL_TRACEPOINT), %g0 + andcc %l5, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT|_TIF_SYSCALL_TRACEPOINT|_TIF_NOHZ), %g0 be,pt %icc, rtrap nop call syscall_trace_leave @@ -184,7 +184,7 @@ linux_sparc_syscall32: srl %i3, 0, %o3 ! IEU0 srl %i2, 0, %o2 ! IEU0 Group - andcc %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT|_TIF_SYSCALL_TRACEPOINT), %g0 + andcc %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT|_TIF_SYSCALL_TRACEPOINT|_TIF_NOHZ), %g0 bne,pn %icc, linux_syscall_trace32 ! CTI mov %i0, %l5 ! IEU1 5: call %l7 ! CTI Group brk forced @@ -207,7 +207,7 @@ linux_sparc_syscall: mov %i3, %o3 ! IEU1 mov %i4, %o4 ! IEU0 Group - andcc %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT|_TIF_SYSCALL_TRACEPOINT), %g0 + andcc %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT|_TIF_SYSCALL_TRACEPOINT|_TIF_NOHZ), %g0 bne,pn %icc, linux_syscall_trace ! CTI Group mov %i0, %l5 ! IEU0 2: call %l7 ! CTI Group brk forced @@ -223,7 +223,7 @@ ret_sys_call: cmp %o0, -ERESTART_RESTARTBLOCK bgeu,pn %xcc, 1f - andcc %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT|_TIF_SYSCALL_TRACEPOINT), %g0 + andcc %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT|_TIF_SYSCALL_TRACEPOINT|_TIF_NOHZ), %g0 ldx [%sp + PTREGS_OFF + PT_V9_TNPC], %l1 ! pc = npc 2: diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index b3f833ab90eb..4ced92f05358 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -186,11 +187,12 @@ EXPORT_SYMBOL_GPL(unregister_dimm_printer); void spitfire_insn_access_exception(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar) { + enum ctx_state prev_state = exception_enter(); siginfo_t info; if (notify_die(DIE_TRAP, "instruction access exception", regs, 0, 0x8, SIGTRAP) == NOTIFY_STOP) - return; + goto out; if (regs->tstate & TSTATE_PRIV) { printk("spitfire_insn_access_exception: SFSR[%016lx] " @@ -207,6 +209,8 @@ void spitfire_insn_access_exception(struct pt_regs *regs, unsigned long sfsr, un info.si_addr = (void __user *)regs->tpc; info.si_trapno = 0; force_sig_info(SIGSEGV, &info, current); +out: + exception_exit(prev_state); } void spitfire_insn_access_exception_tl1(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar) @@ -260,11 +264,12 @@ void sun4v_insn_access_exception_tl1(struct pt_regs *regs, unsigned long addr, u void spitfire_data_access_exception(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar) { + enum ctx_state prev_state = exception_enter(); siginfo_t info; if (notify_die(DIE_TRAP, "data access exception", regs, 0, 0x30, SIGTRAP) == NOTIFY_STOP) - return; + goto out; if (regs->tstate & TSTATE_PRIV) { /* Test if this comes from uaccess places. */ @@ -280,7 +285,7 @@ void spitfire_data_access_exception(struct pt_regs *regs, unsigned long sfsr, un #endif regs->tpc = entry->fixup; regs->tnpc = regs->tpc + 4; - return; + goto out; } /* Shit... */ printk("spitfire_data_access_exception: SFSR[%016lx] " @@ -294,6 +299,8 @@ void spitfire_data_access_exception(struct pt_regs *regs, unsigned long sfsr, un info.si_addr = (void __user *)sfar; info.si_trapno = 0; force_sig_info(SIGSEGV, &info, current); +out: + exception_exit(prev_state); } void spitfire_data_access_exception_tl1(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar) @@ -1994,6 +2001,7 @@ static void sun4v_log_error(struct pt_regs *regs, struct sun4v_error_entry *ent, */ void sun4v_resum_error(struct pt_regs *regs, unsigned long offset) { + enum ctx_state prev_state = exception_enter(); struct sun4v_error_entry *ent, local_copy; struct trap_per_cpu *tb; unsigned long paddr; @@ -2022,12 +2030,14 @@ void sun4v_resum_error(struct pt_regs *regs, unsigned long offset) pr_info("Shutdown request, %u seconds...\n", local_copy.err_secs); orderly_poweroff(true); - return; + goto out; } sun4v_log_error(regs, &local_copy, cpu, KERN_ERR "RESUMABLE ERROR", &sun4v_resum_oflow_cnt); +out: + exception_exit(prev_state); } /* If we try to printk() we'll probably make matters worse, by trying @@ -2152,7 +2162,7 @@ void hypervisor_tlbop_error_xcall(unsigned long err, unsigned long op) err, op); } -void do_fpe_common(struct pt_regs *regs) +static void do_fpe_common(struct pt_regs *regs) { if (regs->tstate & TSTATE_PRIV) { regs->tpc = regs->tnpc; @@ -2188,23 +2198,28 @@ void do_fpe_common(struct pt_regs *regs) void do_fpieee(struct pt_regs *regs) { + enum ctx_state prev_state = exception_enter(); + if (notify_die(DIE_TRAP, "fpu exception ieee", regs, 0, 0x24, SIGFPE) == NOTIFY_STOP) - return; + goto out; do_fpe_common(regs); +out: + exception_exit(prev_state); } extern int do_mathemu(struct pt_regs *, struct fpustate *, bool); void do_fpother(struct pt_regs *regs) { + enum ctx_state prev_state = exception_enter(); struct fpustate *f = FPUSTATE; int ret = 0; if (notify_die(DIE_TRAP, "fpu exception other", regs, 0, 0x25, SIGFPE) == NOTIFY_STOP) - return; + goto out; switch ((current_thread_info()->xfsr[0] & 0x1c000)) { case (2 << 14): /* unfinished_FPop */ @@ -2213,17 +2228,20 @@ void do_fpother(struct pt_regs *regs) break; } if (ret) - return; + goto out; do_fpe_common(regs); +out: + exception_exit(prev_state); } void do_tof(struct pt_regs *regs) { + enum ctx_state prev_state = exception_enter(); siginfo_t info; if (notify_die(DIE_TRAP, "tagged arithmetic overflow", regs, 0, 0x26, SIGEMT) == NOTIFY_STOP) - return; + goto out; if (regs->tstate & TSTATE_PRIV) die_if_kernel("Penguin overflow trap from kernel mode", regs); @@ -2237,15 +2255,18 @@ void do_tof(struct pt_regs *regs) info.si_addr = (void __user *)regs->tpc; info.si_trapno = 0; force_sig_info(SIGEMT, &info, current); +out: + exception_exit(prev_state); } void do_div0(struct pt_regs *regs) { + enum ctx_state prev_state = exception_enter(); siginfo_t info; if (notify_die(DIE_TRAP, "integer division by zero", regs, 0, 0x28, SIGFPE) == NOTIFY_STOP) - return; + goto out; if (regs->tstate & TSTATE_PRIV) die_if_kernel("TL0: Kernel divide by zero.", regs); @@ -2259,6 +2280,8 @@ void do_div0(struct pt_regs *regs) info.si_addr = (void __user *)regs->tpc; info.si_trapno = 0; force_sig_info(SIGFPE, &info, current); +out: + exception_exit(prev_state); } static void instruction_dump(unsigned int *pc) @@ -2415,6 +2438,7 @@ extern int handle_ldf_stq(u32 insn, struct pt_regs *regs); void do_illegal_instruction(struct pt_regs *regs) { + enum ctx_state prev_state = exception_enter(); unsigned long pc = regs->tpc; unsigned long tstate = regs->tstate; u32 insn; @@ -2422,7 +2446,7 @@ void do_illegal_instruction(struct pt_regs *regs) if (notify_die(DIE_TRAP, "illegal instruction", regs, 0, 0x10, SIGILL) == NOTIFY_STOP) - return; + goto out; if (tstate & TSTATE_PRIV) die_if_kernel("Kernel illegal instruction", regs); @@ -2431,14 +2455,14 @@ void do_illegal_instruction(struct pt_regs *regs) if (get_user(insn, (u32 __user *) pc) != -EFAULT) { if ((insn & 0xc1ffc000) == 0x81700000) /* POPC */ { if (handle_popc(insn, regs)) - return; + goto out; } else if ((insn & 0xc1580000) == 0xc1100000) /* LDQ/STQ */ { if (handle_ldf_stq(insn, regs)) - return; + goto out; } else if (tlb_type == hypervisor) { if ((insn & VIS_OPCODE_MASK) == VIS_OPCODE_VAL) { if (!vis_emul(regs, insn)) - return; + goto out; } else { struct fpustate *f = FPUSTATE; @@ -2448,7 +2472,7 @@ void do_illegal_instruction(struct pt_regs *regs) * Trap in the %fsr to unimplemented_FPop. */ if (do_mathemu(regs, f, true)) - return; + goto out; } } } @@ -2458,21 +2482,24 @@ void do_illegal_instruction(struct pt_regs *regs) info.si_addr = (void __user *)pc; info.si_trapno = 0; force_sig_info(SIGILL, &info, current); +out: + exception_exit(prev_state); } extern void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn); void mem_address_unaligned(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr) { + enum ctx_state prev_state = exception_enter(); siginfo_t info; if (notify_die(DIE_TRAP, "memory address unaligned", regs, 0, 0x34, SIGSEGV) == NOTIFY_STOP) - return; + goto out; if (regs->tstate & TSTATE_PRIV) { kernel_unaligned_trap(regs, *((unsigned int *)regs->tpc)); - return; + goto out; } info.si_signo = SIGBUS; info.si_errno = 0; @@ -2480,6 +2507,8 @@ void mem_address_unaligned(struct pt_regs *regs, unsigned long sfar, unsigned lo info.si_addr = (void __user *)sfar; info.si_trapno = 0; force_sig_info(SIGBUS, &info, current); +out: + exception_exit(prev_state); } void sun4v_do_mna(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx) @@ -2504,11 +2533,12 @@ void sun4v_do_mna(struct pt_regs *regs, unsigned long addr, unsigned long type_c void do_privop(struct pt_regs *regs) { + enum ctx_state prev_state = exception_enter(); siginfo_t info; if (notify_die(DIE_TRAP, "privileged operation", regs, 0, 0x11, SIGILL) == NOTIFY_STOP) - return; + goto out; if (test_thread_flag(TIF_32BIT)) { regs->tpc &= 0xffffffff; @@ -2520,6 +2550,8 @@ void do_privop(struct pt_regs *regs) info.si_addr = (void __user *)regs->tpc; info.si_trapno = 0; force_sig_info(SIGILL, &info, current); +out: + exception_exit(prev_state); } void do_privact(struct pt_regs *regs) @@ -2530,99 +2562,116 @@ void do_privact(struct pt_regs *regs) /* Trap level 1 stuff or other traps we should never see... */ void do_cee(struct pt_regs *regs) { + exception_enter(); die_if_kernel("TL0: Cache Error Exception", regs); } void do_cee_tl1(struct pt_regs *regs) { + exception_enter(); dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); die_if_kernel("TL1: Cache Error Exception", regs); } void do_dae_tl1(struct pt_regs *regs) { + exception_enter(); dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); die_if_kernel("TL1: Data Access Exception", regs); } void do_iae_tl1(struct pt_regs *regs) { + exception_enter(); dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); die_if_kernel("TL1: Instruction Access Exception", regs); } void do_div0_tl1(struct pt_regs *regs) { + exception_enter(); dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); die_if_kernel("TL1: DIV0 Exception", regs); } void do_fpdis_tl1(struct pt_regs *regs) { + exception_enter(); dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); die_if_kernel("TL1: FPU Disabled", regs); } void do_fpieee_tl1(struct pt_regs *regs) { + exception_enter(); dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); die_if_kernel("TL1: FPU IEEE Exception", regs); } void do_fpother_tl1(struct pt_regs *regs) { + exception_enter(); dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); die_if_kernel("TL1: FPU Other Exception", regs); } void do_ill_tl1(struct pt_regs *regs) { + exception_enter(); dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); die_if_kernel("TL1: Illegal Instruction Exception", regs); } void do_irq_tl1(struct pt_regs *regs) { + exception_enter(); dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); die_if_kernel("TL1: IRQ Exception", regs); } void do_lddfmna_tl1(struct pt_regs *regs) { + exception_enter(); dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); die_if_kernel("TL1: LDDF Exception", regs); } void do_stdfmna_tl1(struct pt_regs *regs) { + exception_enter(); dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); die_if_kernel("TL1: STDF Exception", regs); } void do_paw(struct pt_regs *regs) { + exception_enter(); die_if_kernel("TL0: Phys Watchpoint Exception", regs); } void do_paw_tl1(struct pt_regs *regs) { + exception_enter(); dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); die_if_kernel("TL1: Phys Watchpoint Exception", regs); } void do_vaw(struct pt_regs *regs) { + exception_enter(); die_if_kernel("TL0: Virt Watchpoint Exception", regs); } void do_vaw_tl1(struct pt_regs *regs) { + exception_enter(); dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); die_if_kernel("TL1: Virt Watchpoint Exception", regs); } void do_tof_tl1(struct pt_regs *regs) { + exception_enter(); dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); die_if_kernel("TL1: Tag Overflow Exception", regs); } diff --git a/arch/sparc/kernel/unaligned_64.c b/arch/sparc/kernel/unaligned_64.c index 8201c25e7669..3c1a7cb31579 100644 --- a/arch/sparc/kernel/unaligned_64.c +++ b/arch/sparc/kernel/unaligned_64.c @@ -21,9 +21,12 @@ #include #include #include +#include #include #include +#include "entry.h" + enum direction { load, /* ld, ldd, ldh, ldsh */ store, /* st, std, sth, stsh */ @@ -418,9 +421,6 @@ int handle_popc(u32 insn, struct pt_regs *regs) extern void do_fpother(struct pt_regs *regs); extern void do_privact(struct pt_regs *regs); -extern void spitfire_data_access_exception(struct pt_regs *regs, - unsigned long sfsr, - unsigned long sfar); extern void sun4v_data_access_exception(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx); @@ -578,6 +578,7 @@ void handle_ld_nf(u32 insn, struct pt_regs *regs) void handle_lddfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr) { + enum ctx_state prev_state = exception_enter(); unsigned long pc = regs->tpc; unsigned long tstate = regs->tstate; u32 insn; @@ -632,13 +633,16 @@ daex: sun4v_data_access_exception(regs, sfar, sfsr); else spitfire_data_access_exception(regs, sfsr, sfar); - return; + goto out; } advance(regs); +out: + exception_exit(prev_state); } void handle_stdfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr) { + enum ctx_state prev_state = exception_enter(); unsigned long pc = regs->tpc; unsigned long tstate = regs->tstate; u32 insn; @@ -680,7 +684,9 @@ daex: sun4v_data_access_exception(regs, sfar, sfsr); else spitfire_data_access_exception(regs, sfsr, sfar); - return; + goto out; } advance(regs); +out: + exception_exit(prev_state); } diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index 2ebec263d685..69bb818fdd79 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -272,6 +273,7 @@ static void noinline __kprobes bogus_32bit_fault_address(struct pt_regs *regs, asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) { + enum ctx_state prev_state = exception_enter(); struct mm_struct *mm = current->mm; struct vm_area_struct *vma; unsigned int insn = 0; @@ -282,7 +284,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) fault_code = get_thread_fault_code(); if (notify_page_fault(regs)) - return; + goto exit_exception; si_code = SEGV_MAPERR; address = current_thread_info()->fault_address; @@ -313,7 +315,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) /* Valid, no problems... */ } else { bad_kernel_pc(regs, address); - return; + goto exit_exception; } } else flags |= FAULT_FLAG_USER; @@ -430,7 +432,7 @@ good_area: fault = handle_mm_fault(mm, vma, address, flags); if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) - return; + goto exit_exception; if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) @@ -482,6 +484,8 @@ good_area: } #endif +exit_exception: + exception_exit(prev_state); return; /* @@ -494,7 +498,7 @@ bad_area: handle_kernel_fault: do_kernel_fault(regs, si_code, fault_code, insn, address); - return; + goto exit_exception; /* * We ran out of memory, or some other thing happened to us that made @@ -505,7 +509,7 @@ out_of_memory: up_read(&mm->mmap_sem); if (!(regs->tstate & TSTATE_PRIV)) { pagefault_out_of_memory(); - return; + goto exit_exception; } goto handle_kernel_fault; -- cgit v1.2.3